From 9ec979f382403595f80a7f58c207b80c9b6e7e2f Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 2 May 2017 12:50:43 +0100
Subject: [PATCH 001/660] Bazel-ify Cortex (#342)

* Move proto to ingester/client.

* Move build to build-image

* Add BUILD files for bazel

* Move all non-main packages under pkg/

* Update circle.yml for the new build-image

* Update configs-itegration-test

* Documentation for the bazel builds.
---
 BUILD                      |  80 ++++++
 aws_storage_client.go      | 567 +++++++++++++++++++++++++++++++++++++
 aws_storage_client_test.go | 419 +++++++++++++++++++++++++++
 by_key.go                  | 114 ++++++++
 by_key_test.go             |  99 +++++++
 chunk.go                   | 320 +++++++++++++++++++++
 chunk_cache.go             | 219 ++++++++++++++
 chunk_cache_test.go        | 114 ++++++++
 chunk_store.go             | 526 ++++++++++++++++++++++++++++++++++
 chunk_store_test.go        | 424 +++++++++++++++++++++++++++
 chunk_test.go              | 123 ++++++++
 inmemory_storage_client.go | 280 ++++++++++++++++++
 memcache_client.go         | 106 +++++++
 schema.go                  | 538 +++++++++++++++++++++++++++++++++++
 schema_config.go           | 329 +++++++++++++++++++++
 schema_config_test.go      | 321 +++++++++++++++++++++
 schema_test.go             | 468 ++++++++++++++++++++++++++++++
 schema_util.go             | 142 ++++++++++
 schema_util_test.go        |  85 ++++++
 storage_client.go          |  64 +++++
 table_manager.go           | 342 ++++++++++++++++++++++
 table_manager_test.go      | 178 ++++++++++++
 22 files changed, 5858 insertions(+)
 create mode 100644 BUILD
 create mode 100644 aws_storage_client.go
 create mode 100644 aws_storage_client_test.go
 create mode 100644 by_key.go
 create mode 100644 by_key_test.go
 create mode 100644 chunk.go
 create mode 100644 chunk_cache.go
 create mode 100644 chunk_cache_test.go
 create mode 100644 chunk_store.go
 create mode 100644 chunk_store_test.go
 create mode 100644 chunk_test.go
 create mode 100644 inmemory_storage_client.go
 create mode 100644 memcache_client.go
 create mode 100644 schema.go
 create mode 100644 schema_config.go
 create mode 100644 schema_config_test.go
 create mode 100644 schema_test.go
 create mode 100644 schema_util.go
 create mode 100644 schema_util_test.go
 create mode 100644 storage_client.go
 create mode 100644 table_manager.go
 create mode 100644 table_manager_test.go

diff --git a/BUILD b/BUILD
new file mode 100644
index 0000000000000..646f2a670ede6
--- /dev/null
+++ b/BUILD
@@ -0,0 +1,80 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "aws_storage_client.go",
+        "by_key.go",
+        "chunk.go",
+        "chunk_cache.go",
+        "chunk_store.go",
+        "inmemory_storage_client.go",
+        "memcache_client.go",
+        "schema.go",
+        "schema_config.go",
+        "schema_util.go",
+        "storage_client.go",
+        "table_manager.go",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/util:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/credentials:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
+        "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
+        "//vendor/github.com/golang/snappy:go_default_library",
+        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
+        "//vendor/github.com/prometheus/common/log:go_default_library",
+        "//vendor/github.com/prometheus/common/model:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
+        "//vendor/github.com/weaveworks/common/errors:go_default_library",
+        "//vendor/github.com/weaveworks/common/instrument:go_default_library",
+        "//vendor/github.com/weaveworks/common/mtime:go_default_library",
+        "//vendor/github.com/weaveworks/common/user:go_default_library",
+        "//vendor/golang.org/x/net/context:go_default_library",
+    ],
+)
+
+go_test(
+    name = "go_default_test",
+    srcs = [
+        "aws_storage_client_test.go",
+        "by_key_test.go",
+        "chunk_cache_test.go",
+        "chunk_store_test.go",
+        "chunk_test.go",
+        "schema_config_test.go",
+        "schema_test.go",
+        "schema_util_test.go",
+        "table_manager_test.go",
+    ],
+    library = ":go_default_library",
+    deps = [
+        "//pkg/util:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
+        "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
+        "//vendor/github.com/prometheus/common/log:go_default_library",
+        "//vendor/github.com/prometheus/common/model:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
+        "//vendor/github.com/stretchr/testify/assert:go_default_library",
+        "//vendor/github.com/stretchr/testify/require:go_default_library",
+        "//vendor/github.com/weaveworks/common/mtime:go_default_library",
+        "//vendor/github.com/weaveworks/common/test:go_default_library",
+        "//vendor/github.com/weaveworks/common/user:go_default_library",
+        "//vendor/golang.org/x/net/context:go_default_library",
+    ],
+)
diff --git a/aws_storage_client.go b/aws_storage_client.go
new file mode 100644
index 0000000000000..a0c4c3afa58c1
--- /dev/null
+++ b/aws_storage_client.go
@@ -0,0 +1,567 @@
+package chunk
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"math/rand"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/aws/credentials"
+	"github.com/aws/aws-sdk-go/aws/request"
+	"github.com/aws/aws-sdk-go/aws/session"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/aws/aws-sdk-go/service/s3"
+	"github.com/aws/aws-sdk-go/service/s3/s3iface"
+	"github.com/prometheus/client_golang/prometheus"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	hashKey  = "h"
+	rangeKey = "r"
+	valueKey = "c"
+
+	// For dynamodb errors
+	tableNameLabel   = "table"
+	errorReasonLabel = "error"
+	otherError       = "other"
+
+	provisionedThroughputExceededException = "ProvisionedThroughputExceededException"
+
+	// Backoff for dynamoDB requests, to match AWS lib - see:
+	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
+	minBackoff = 50 * time.Millisecond
+	maxBackoff = 50 * time.Second
+	maxRetries = 20
+
+	// See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html.
+	dynamoMaxBatchSize = 25
+)
+
+var (
+	dynamoRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_request_duration_seconds",
+		Help:      "Time spent doing DynamoDB requests.",
+
+		// DynamoDB latency seems to range from a few ms to a few sec and is
+		// important.  So use 8 buckets from 64us to 8s.
+		Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+	}, []string{"operation", "status_code"})
+	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_consumed_capacity_total",
+		Help:      "The capacity units consumed by operation.",
+	}, []string{"operation"})
+	dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_failures_total",
+		Help:      "The total number of errors while storing chunks to the chunk store.",
+	}, []string{tableNameLabel, errorReasonLabel})
+	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "s3_request_duration_seconds",
+		Help:      "Time spent doing S3 requests.",
+		Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
+	}, []string{"operation", "status_code"})
+)
+
+func init() {
+	prometheus.MustRegister(dynamoRequestDuration)
+	prometheus.MustRegister(dynamoConsumedCapacity)
+	prometheus.MustRegister(dynamoFailures)
+	prometheus.MustRegister(s3RequestDuration)
+}
+
+// DynamoDBConfig specifies config for a DynamoDB database.
+type DynamoDBConfig struct {
+	DynamoDB util.URLValue
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
+	f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+
+		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
+}
+
+// AWSStorageConfig specifies config for storing data on AWS.
+type AWSStorageConfig struct {
+	DynamoDBConfig
+	S3 util.URLValue
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *AWSStorageConfig) RegisterFlags(f *flag.FlagSet) {
+	cfg.DynamoDBConfig.RegisterFlags(f)
+	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
+		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
+}
+
+type awsStorageClient struct {
+	DynamoDB   dynamodbiface.DynamoDBAPI
+	S3         s3iface.S3API
+	bucketName string
+
+	// queryRequestFn exists for mocking, so we don't have to write a whole load
+	// of boilerplate.
+	queryRequestFn func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
+}
+
+// NewAWSStorageClient makes a new AWS-backed StorageClient.
+func NewAWSStorageClient(cfg AWSStorageConfig) (StorageClient, error) {
+	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
+	if err != nil {
+		return nil, err
+	}
+
+	if cfg.S3.URL == nil {
+		return nil, fmt.Errorf("no URL specified for S3")
+	}
+	s3Config, err := awsConfigFromURL(cfg.S3.URL)
+	if err != nil {
+		return nil, err
+	}
+	s3Client := s3.New(session.New(s3Config))
+	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
+
+	storageClient := awsStorageClient{
+		DynamoDB:   dynamoDB,
+		S3:         s3Client,
+		bucketName: bucketName,
+	}
+	storageClient.queryRequestFn = storageClient.queryRequest
+	return storageClient, nil
+}
+
+func (a awsStorageClient) NewWriteBatch() WriteBatch {
+	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
+}
+
+// batchWrite writes requests to the underlying storage, handling retires and backoff.
+func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) error {
+	outstanding := input.(dynamoDBWriteBatch)
+	unprocessed := map[string][]*dynamodb.WriteRequest{}
+	backoff, numRetries := minBackoff, 0
+	for dictLen(outstanding)+dictLen(unprocessed) > 0 && numRetries < maxRetries {
+		reqs := map[string][]*dynamodb.WriteRequest{}
+		takeReqs(unprocessed, reqs, dynamoMaxBatchSize)
+		takeReqs(outstanding, reqs, dynamoMaxBatchSize)
+		var resp *dynamodb.BatchWriteItemOutput
+
+		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
+			var err error
+			resp, err = a.DynamoDB.BatchWriteItemWithContext(ctx, &dynamodb.BatchWriteItemInput{
+				RequestItems:           reqs,
+				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+			})
+			return err
+		})
+		for _, cc := range resp.ConsumedCapacity {
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem").
+				Add(float64(*cc.CapacityUnits))
+		}
+
+		if err != nil {
+			for tableName := range reqs {
+				recordDynamoError(tableName, err)
+			}
+		}
+
+		// If there are unprocessed items, backoff and retry those items.
+		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dictLen(unprocessedItems) > 0 {
+			takeReqs(unprocessedItems, unprocessed, -1)
+			time.Sleep(backoff)
+			backoff = nextBackoff(backoff)
+			continue
+		}
+
+		// If we get provisionedThroughputExceededException, then no items were processed,
+		// so back off and retry all.
+		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+			takeReqs(reqs, unprocessed, -1)
+			time.Sleep(backoff)
+			backoff = nextBackoff(backoff)
+			numRetries++
+			continue
+		}
+
+		// All other errors are fatal.
+		if err != nil {
+			return err
+		}
+
+		backoff = minBackoff
+		numRetries = 0
+	}
+
+	if valuesLeft := dictLen(outstanding) + dictLen(unprocessed); valuesLeft > 0 {
+		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", numRetries, valuesLeft)
+	}
+	return nil
+}
+
+func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	input := &dynamodb.QueryInput{
+		TableName: aws.String(query.TableName),
+		KeyConditions: map[string]*dynamodb.Condition{
+			hashKey: {
+				AttributeValueList: []*dynamodb.AttributeValue{
+					{S: aws.String(query.HashValue)},
+				},
+				ComparisonOperator: aws.String(dynamodb.ComparisonOperatorEq),
+			},
+		},
+		ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+	}
+
+	if query.RangeValuePrefix != nil {
+		input.KeyConditions[rangeKey] = &dynamodb.Condition{
+			AttributeValueList: []*dynamodb.AttributeValue{
+				{B: query.RangeValuePrefix},
+			},
+			ComparisonOperator: aws.String(dynamodb.ComparisonOperatorBeginsWith),
+		}
+	} else if query.RangeValueStart != nil {
+		input.KeyConditions[rangeKey] = &dynamodb.Condition{
+			AttributeValueList: []*dynamodb.AttributeValue{
+				{B: query.RangeValueStart},
+			},
+			ComparisonOperator: aws.String(dynamodb.ComparisonOperatorGe),
+		}
+	}
+
+	// Filters
+	if query.ValueEqual != nil {
+		input.FilterExpression = aws.String(fmt.Sprintf("%s = :v", valueKey))
+		input.ExpressionAttributeValues = map[string]*dynamodb.AttributeValue{
+			":v": {
+				B: query.ValueEqual,
+			},
+		}
+	}
+
+	request := a.queryRequestFn(ctx, input)
+	backoff := minBackoff
+	for page := request; page != nil; page = page.NextPage() {
+		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
+			return page.Send()
+		})
+
+		if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil {
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages").
+				Add(float64(*cc.CapacityUnits))
+		}
+
+		if err != nil {
+			recordDynamoError(*input.TableName, err)
+
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+				time.Sleep(backoff)
+				backoff = nextBackoff(backoff)
+				continue
+			}
+
+			return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, err)
+		}
+
+		queryOutput := page.Data().(*dynamodb.QueryOutput)
+		if getNextPage := callback(dynamoDBReadBatch(queryOutput.Items), !page.HasNextPage()); !getNextPage {
+			if err != nil {
+				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
+			}
+			return nil
+		}
+
+		backoff = minBackoff
+	}
+
+	return nil
+}
+
+type dynamoDBRequest interface {
+	NextPage() dynamoDBRequest
+	Send() error
+	Data() interface{}
+	Error() error
+	HasNextPage() bool
+}
+
+func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+	req, _ := a.DynamoDB.QueryRequest(input)
+	req.SetContext(ctx)
+	return dynamoDBRequestAdapter{req}
+}
+
+type dynamoDBRequestAdapter struct {
+	request *request.Request
+}
+
+func (a dynamoDBRequestAdapter) NextPage() dynamoDBRequest {
+	next := a.request.NextPage()
+	if next == nil {
+		return nil
+	}
+	return dynamoDBRequestAdapter{next}
+}
+
+func (a dynamoDBRequestAdapter) Data() interface{} {
+	return a.request.Data
+}
+
+func (a dynamoDBRequestAdapter) Send() error {
+	return a.request.Send()
+}
+
+func (a dynamoDBRequestAdapter) Error() error {
+	return a.request.Error
+}
+
+func (a dynamoDBRequestAdapter) HasNextPage() bool {
+	return a.request.HasNextPage()
+}
+
+func (a awsStorageClient) GetChunk(ctx context.Context, key string) ([]byte, error) {
+	var resp *s3.GetObjectOutput
+	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
+		var err error
+		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(a.bucketName),
+			Key:    aws.String(key),
+		})
+		return err
+	})
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+	buf, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+	return buf, nil
+}
+
+func (a awsStorageClient) PutChunk(ctx context.Context, key string, buf []byte) error {
+	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
+		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
+			Body:   bytes.NewReader(buf),
+			Bucket: aws.String(a.bucketName),
+			Key:    aws.String(key),
+		})
+		return err
+	})
+}
+
+type dynamoDBWriteBatch map[string][]*dynamodb.WriteRequest
+
+func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	item := map[string]*dynamodb.AttributeValue{
+		hashKey:  {S: aws.String(hashValue)},
+		rangeKey: {B: rangeValue},
+	}
+
+	if value != nil {
+		item[valueKey] = &dynamodb.AttributeValue{B: value}
+	}
+
+	b[tableName] = append(b[tableName], &dynamodb.WriteRequest{
+		PutRequest: &dynamodb.PutRequest{
+			Item: item,
+		},
+	})
+}
+
+type dynamoDBReadBatch []map[string]*dynamodb.AttributeValue
+
+func (b dynamoDBReadBatch) Len() int {
+	return len(b)
+}
+
+func (b dynamoDBReadBatch) RangeValue(i int) []byte {
+	return b[i][rangeKey].B
+}
+
+func (b dynamoDBReadBatch) Value(i int) []byte {
+	chunkValue, ok := b[i][valueKey]
+	if !ok {
+		return nil
+	}
+	return chunkValue.B
+}
+
+type dynamoTableClient struct {
+	DynamoDB dynamodbiface.DynamoDBAPI
+}
+
+// newDynamoTableClient makes a new DynamoTableClient.
+func newDynamoTableClient(cfg DynamoDBConfig) (DynamoTableClient, error) {
+	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
+	if err != nil {
+		return nil, err
+	}
+	return dynamoTableClient{
+		DynamoDB: dynamoDB,
+	}, nil
+}
+
+func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
+	table := []string{}
+	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(_ context.Context) error {
+		return d.DynamoDB.ListTablesPages(&dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
+			for _, s := range resp.TableNames {
+				table = append(table, *s)
+			}
+			return true
+		})
+	})
+	return table, err
+}
+
+func (d dynamoTableClient) CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(_ context.Context) error {
+		input := &dynamodb.CreateTableInput{
+			TableName: aws.String(name),
+			AttributeDefinitions: []*dynamodb.AttributeDefinition{
+				{
+					AttributeName: aws.String(hashKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
+				},
+			},
+			KeySchema: []*dynamodb.KeySchemaElement{
+				{
+					AttributeName: aws.String(hashKey),
+					KeyType:       aws.String(dynamodb.KeyTypeHash),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					KeyType:       aws.String(dynamodb.KeyTypeRange),
+				},
+			},
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(readCapacity),
+				WriteCapacityUnits: aws.Int64(writeCapacity),
+			},
+		}
+		_, err := d.DynamoDB.CreateTable(input)
+		return err
+	})
+}
+
+func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+	var out *dynamodb.DescribeTableOutput
+	instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(_ context.Context) error {
+		out, err = d.DynamoDB.DescribeTable(&dynamodb.DescribeTableInput{
+			TableName: aws.String(name),
+		})
+		readCapacity = *out.Table.ProvisionedThroughput.ReadCapacityUnits
+		writeCapacity = *out.Table.ProvisionedThroughput.WriteCapacityUnits
+		status = *out.Table.TableStatus
+		return err
+	})
+	return
+}
+
+func (d dynamoTableClient) UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(_ context.Context) error {
+		_, err := d.DynamoDB.UpdateTable(&dynamodb.UpdateTableInput{
+			TableName: aws.String(name),
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(readCapacity),
+				WriteCapacityUnits: aws.Int64(writeCapacity),
+			},
+		})
+		return err
+	})
+}
+
+func nextBackoff(lastBackoff time.Duration) time.Duration {
+	// Based on the "Decorrelated Jitter" approach from https://www.awsarchitectureblog.com/2015/03/backoff.html
+	// sleep = min(cap, random_between(base, sleep * 3))
+	backoff := minBackoff + time.Duration(rand.Int63n(int64((lastBackoff*3)-minBackoff)))
+	if backoff > maxBackoff {
+		backoff = maxBackoff
+	}
+	return backoff
+}
+
+func recordDynamoError(tableName string, err error) {
+	if awsErr, ok := err.(awserr.Error); ok {
+		dynamoFailures.WithLabelValues(tableName, awsErr.Code()).Add(float64(1))
+	} else {
+		dynamoFailures.WithLabelValues(tableName, otherError).Add(float64(1))
+	}
+}
+
+func dictLen(b map[string][]*dynamodb.WriteRequest) int {
+	result := 0
+	for _, reqs := range b {
+		result += len(reqs)
+	}
+	return result
+}
+
+// Fill 'to' with WriteRequests from 'from' until 'to' has at most max requests. Remove those requests from 'from'.
+func takeReqs(from, to map[string][]*dynamodb.WriteRequest, max int) {
+	outLen, inLen := dictLen(to), dictLen(from)
+	toFill := inLen
+	if max > 0 {
+		toFill = util.Min(inLen, max-outLen)
+	}
+	for toFill > 0 {
+		for tableName, fromReqs := range from {
+			taken := util.Min(len(fromReqs), toFill)
+			if taken > 0 {
+				to[tableName] = append(to[tableName], fromReqs[:taken]...)
+				from[tableName] = fromReqs[taken:]
+				toFill -= taken
+			}
+		}
+	}
+}
+
+// dynamoClientFromURL creates a new DynamoDB client from a URL.
+func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
+	if awsURL == nil {
+		return nil, fmt.Errorf("no URL specified for DynamoDB")
+	}
+	config, err := awsConfigFromURL(awsURL)
+	if err != nil {
+		return nil, err
+	}
+	return dynamodb.New(session.New(config)), nil
+}
+
+// awsConfigFromURL returns AWS config from given URL. It expects escaped AWS Access key ID & Secret Access Key to be
+// encoded in the URL. It also expects region specified as a host (letting AWS generate full endpoint) or fully valid
+// endpoint with dummy region assumed (e.g for URLs to emulated services).
+func awsConfigFromURL(awsURL *url.URL) (*aws.Config, error) {
+	if awsURL.User == nil {
+		return nil, fmt.Errorf("must specify escaped Access Key & Secret Access in URL")
+	}
+
+	password, _ := awsURL.User.Password()
+	creds := credentials.NewStaticCredentials(awsURL.User.Username(), password, "")
+	config := aws.NewConfig().
+		WithCredentials(creds).
+		WithMaxRetries(0) // We do our own retries, so we can monitor them
+	if strings.Contains(awsURL.Host, ".") {
+		return config.WithEndpoint(fmt.Sprintf("http://%s", awsURL.Host)).WithRegion("dummy"), nil
+	}
+
+	// Let AWS generate default endpoint based on region passed as a host in URL.
+	return config.WithRegion(awsURL.Host), nil
+}
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
new file mode 100644
index 0000000000000..640582b1b8ff0
--- /dev/null
+++ b/aws_storage_client_test.go
@@ -0,0 +1,419 @@
+package chunk
+
+import (
+	"bytes"
+	"fmt"
+	"net/url"
+	"sort"
+	"sync"
+	"testing"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/aws/request"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/prometheus/common/log"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/net/context"
+)
+
+type mockDynamoDBClient struct {
+	dynamodbiface.DynamoDBAPI
+
+	mtx            sync.RWMutex
+	unprocessed    int
+	provisionedErr int
+	tables         map[string]*mockDynamoDBTable
+}
+
+type mockDynamoDBTable struct {
+	items map[string][]mockDynamoDBItem
+}
+
+type mockDynamoDBItem map[string]*dynamodb.AttributeValue
+
+func newMockDynamoDB(unprocessed int, provisionedErr int) *mockDynamoDBClient {
+	return &mockDynamoDBClient{
+		tables:         map[string]*mockDynamoDBTable{},
+		unprocessed:    unprocessed,
+		provisionedErr: provisionedErr,
+	}
+}
+
+func (m *mockDynamoDBClient) createTable(name string) {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+	m.tables[name] = &mockDynamoDBTable{
+		items: map[string][]mockDynamoDBItem{},
+	}
+}
+
+func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dynamodb.BatchWriteItemInput, _ ...request.Option) (*dynamodb.BatchWriteItemOutput, error) {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	resp := &dynamodb.BatchWriteItemOutput{
+		UnprocessedItems: map[string][]*dynamodb.WriteRequest{},
+	}
+
+	if m.provisionedErr > 0 {
+		m.provisionedErr--
+		return resp, awserr.New(provisionedThroughputExceededException, "", nil)
+	}
+
+	for tableName, writeRequests := range input.RequestItems {
+		table, ok := m.tables[tableName]
+		if !ok {
+			return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("table not found")
+		}
+
+		for _, writeRequest := range writeRequests {
+			if m.unprocessed > 0 {
+				m.unprocessed--
+				resp.UnprocessedItems[tableName] = append(resp.UnprocessedItems[tableName], writeRequest)
+				continue
+			}
+
+			hashValue := *writeRequest.PutRequest.Item[hashKey].S
+			rangeValue := writeRequest.PutRequest.Item[rangeKey].B
+
+			items := table.items[hashValue]
+
+			// insert in order
+			i := sort.Search(len(items), func(i int) bool {
+				return bytes.Compare(items[i][rangeKey].B, rangeValue) >= 0
+			})
+			if i >= len(items) || !bytes.Equal(items[i][rangeKey].B, rangeValue) {
+				items = append(items, nil)
+				copy(items[i+1:], items[i:])
+			} else {
+				return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("Duplicate entry")
+			}
+			items[i] = writeRequest.PutRequest.Item
+
+			table.items[hashValue] = items
+		}
+	}
+	return resp, nil
+}
+
+func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+	result := &dynamodb.QueryOutput{
+		Items: []map[string]*dynamodb.AttributeValue{},
+	}
+
+	// Required filters
+	hashValue := *input.KeyConditions[hashKey].AttributeValueList[0].S
+
+	// Optional filters
+	var (
+		rangeValueFilter     []byte
+		rangeValueFilterType string
+	)
+	if c, ok := input.KeyConditions[rangeKey]; ok {
+		rangeValueFilter = c.AttributeValueList[0].B
+		rangeValueFilterType = *c.ComparisonOperator
+	}
+
+	// Filter by HashValue, RangeValue and Value if it exists
+	items := m.tables[*input.TableName].items[hashValue]
+	for _, item := range items {
+		rangeValue := item[rangeKey].B
+		if rangeValueFilterType == dynamodb.ComparisonOperatorGe && bytes.Compare(rangeValue, rangeValueFilter) < 0 {
+			continue
+		}
+		if rangeValueFilterType == dynamodb.ComparisonOperatorBeginsWith && !bytes.HasPrefix(rangeValue, rangeValueFilter) {
+			continue
+		}
+
+		if item[valueKey] != nil {
+			value := item[valueKey].B
+
+			// Apply filterExpression if it exists (supporting only v = :v)
+			if input.FilterExpression != nil {
+				if *input.FilterExpression == fmt.Sprintf("%s = :v", valueKey) {
+					filterValue := input.ExpressionAttributeValues[":v"].B
+					if !bytes.Equal(value, filterValue) {
+						continue
+					}
+				} else {
+					log.Warnf("Unsupported FilterExpression: %s", *input.FilterExpression)
+				}
+			}
+		}
+
+		result.Items = append(result.Items, item)
+	}
+
+	return &dynamoDBMockRequest{
+		result: result,
+	}
+}
+
+type dynamoDBMockRequest struct {
+	result *dynamodb.QueryOutput
+}
+
+func (m *dynamoDBMockRequest) NextPage() dynamoDBRequest {
+	return m
+}
+func (m *dynamoDBMockRequest) Send() error {
+	return nil
+}
+func (m *dynamoDBMockRequest) Data() interface{} {
+	return m.result
+}
+func (m *dynamoDBMockRequest) Error() error {
+	return nil
+}
+func (m *dynamoDBMockRequest) HasNextPage() bool {
+	return false
+}
+
+func TestDynamoDBClient(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	client := awsStorageClient{
+		DynamoDB:       dynamoDB,
+		queryRequestFn: dynamoDB.queryRequest,
+	}
+	batch := client.NewWriteBatch()
+	for i := 0; i < 30; i++ {
+		batch.Add("table", fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
+	}
+	dynamoDB.createTable("table")
+
+	err := client.BatchWrite(context.Background(), batch)
+	require.NoError(t, err)
+
+	for i := 0; i < 30; i++ {
+		entry := IndexQuery{
+			TableName: "table",
+			HashValue: fmt.Sprintf("hash%d", i),
+		}
+		var have []IndexEntry
+		err := client.QueryPages(context.Background(), entry, func(read ReadBatch, lastPage bool) bool {
+			for i := 0; i < read.Len(); i++ {
+				have = append(have, IndexEntry{
+					RangeValue: read.RangeValue(i),
+				})
+			}
+			return !lastPage
+		})
+		require.NoError(t, err)
+		require.Equal(t, []IndexEntry{
+			{RangeValue: []byte(fmt.Sprintf("range%d", i))},
+		}, have)
+	}
+}
+
+func TestDynamoDBClientQueryPages(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	client := awsStorageClient{
+		DynamoDB:       dynamoDB,
+		queryRequestFn: dynamoDB.queryRequest,
+	}
+
+	entries := []IndexEntry{
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:1"),
+			Value:      []byte("10"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:2"),
+			Value:      []byte("20"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:3"),
+			Value:      []byte("30"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("baz:1"),
+			Value:      []byte("10"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("baz:2"),
+			Value:      []byte("20"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:1"),
+			Value:      []byte("abc"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:2"),
+			Value:      []byte("abc"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:3"),
+			Value:      []byte("abc"),
+		},
+	}
+
+	tests := []struct {
+		name  string
+		query IndexQuery
+		want  []IndexEntry
+	}{
+		{
+			"check HashValue only",
+			IndexQuery{
+				TableName: "table",
+				HashValue: "flip",
+			},
+			[]IndexEntry{entries[5], entries[6], entries[7]},
+		},
+		{
+			"check RangeValueStart",
+			IndexQuery{
+				TableName:       "table",
+				HashValue:       "foo",
+				RangeValueStart: []byte("bar:2"),
+			},
+			[]IndexEntry{entries[1], entries[2], entries[3], entries[4]},
+		},
+		{
+			"check RangeValuePrefix",
+			IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("baz:"),
+			},
+			[]IndexEntry{entries[3], entries[4]},
+		},
+		{
+			"check ValueEqual",
+			IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("bar"),
+				ValueEqual:       []byte("20"),
+			},
+			[]IndexEntry{entries[1]},
+		},
+	}
+
+	batch := client.NewWriteBatch()
+	for _, entry := range entries {
+		batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+	}
+	dynamoDB.createTable("table")
+
+	err := client.BatchWrite(context.Background(), batch)
+	require.NoError(t, err)
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var have []IndexEntry
+			err := client.QueryPages(context.Background(), tt.query, func(read ReadBatch, lastPage bool) bool {
+				for i := 0; i < read.Len(); i++ {
+					have = append(have, IndexEntry{
+						TableName:  tt.query.TableName,
+						HashValue:  tt.query.HashValue,
+						RangeValue: read.RangeValue(i),
+						Value:      read.Value(i),
+					})
+				}
+				return !lastPage
+			})
+			require.NoError(t, err)
+			require.Equal(t, tt.want, have)
+		})
+	}
+}
+
+func TestAWSConfigFromURL(t *testing.T) {
+	for _, tc := range []struct {
+		url            string
+		expectedKey    string
+		expectedSecret string
+		expectedRegion string
+		expectedEp     string
+
+		expectedNotSpecifiedUserErr bool
+	}{
+		{
+			"s3://abc:123@s3.default.svc.cluster.local:4569",
+			"abc",
+			"123",
+			"dummy",
+			"http://s3.default.svc.cluster.local:4569",
+			false,
+		},
+		{
+			"dynamodb://user:pass@dynamodb.default.svc.cluster.local:8000/cortex",
+			"user",
+			"pass",
+			"dummy",
+			"http://dynamodb.default.svc.cluster.local:8000",
+			false,
+		},
+		{
+			// Not escaped password.
+			"s3://abc:123/@s3.default.svc.cluster.local:4569",
+			"",
+			"",
+			"",
+			"",
+			true,
+		},
+		{
+			// Not escaped username.
+			"s3://abc/:123@s3.default.svc.cluster.local:4569",
+			"",
+			"",
+			"",
+			"",
+			true,
+		},
+		{
+			"s3://keyWithEscapedSlashAtTheEnd%2F:%24%2C%26%2C%2B%2C%27%2C%2F%2C%3A%2C%3B%2C%3D%2C%3F%2C%40@eu-west-2/bucket1",
+			"keyWithEscapedSlashAtTheEnd/",
+			"$,&,+,',/,:,;,=,?,@",
+			"eu-west-2",
+			"",
+			false,
+		},
+	} {
+		parsedURL, err := url.Parse(tc.url)
+		require.NoError(t, err)
+
+		cfg, err := awsConfigFromURL(parsedURL)
+		if tc.expectedNotSpecifiedUserErr {
+			require.Error(t, err)
+			continue
+		}
+		require.NoError(t, err)
+
+		require.NotNil(t, cfg.Credentials)
+		val, err := cfg.Credentials.Get()
+		require.NoError(t, err)
+
+		assert.Equal(t, tc.expectedKey, val.AccessKeyID)
+		assert.Equal(t, tc.expectedSecret, val.SecretAccessKey)
+
+		require.NotNil(t, cfg.Region)
+		assert.Equal(t, tc.expectedRegion, *cfg.Region)
+
+		if tc.expectedEp != "" {
+			require.NotNil(t, cfg.Endpoint)
+			assert.Equal(t, tc.expectedEp, *cfg.Endpoint)
+		}
+	}
+}
diff --git a/by_key.go b/by_key.go
new file mode 100644
index 0000000000000..243ddf3aff127
--- /dev/null
+++ b/by_key.go
@@ -0,0 +1,114 @@
+package chunk
+
+// ByKey allow you to sort chunks by ID
+type ByKey []Chunk
+
+func (cs ByKey) Len() int           { return len(cs) }
+func (cs ByKey) Swap(i, j int)      { cs[i], cs[j] = cs[j], cs[i] }
+func (cs ByKey) Less(i, j int) bool { return cs[i].externalKey() < cs[j].externalKey() }
+
+// unique will remove duplicates from the input.
+// list must be sorted.
+func unique(cs ByKey) ByKey {
+	if len(cs) == 0 {
+		return ByKey{}
+	}
+
+	result := make(ByKey, 1, len(cs))
+	result[0] = cs[0]
+	i, j := 0, 1
+	for j < len(cs) {
+		if result[i].externalKey() == cs[j].externalKey() {
+			j++
+			continue
+		}
+		result = append(result, cs[j])
+		i++
+		j++
+	}
+	return result
+}
+
+// merge will merge & dedupe two lists of chunks.
+// list musts be sorted and not contain dupes.
+func merge(a, b ByKey) ByKey {
+	result := make(ByKey, 0, len(a)+len(b))
+	i, j := 0, 0
+	for i < len(a) && j < len(b) {
+		if a[i].externalKey() < b[j].externalKey() {
+			result = append(result, a[i])
+			i++
+		} else if a[i].externalKey() > b[j].externalKey() {
+			result = append(result, b[j])
+			j++
+		} else {
+			result = append(result, a[i])
+			i++
+			j++
+		}
+	}
+	for ; i < len(a); i++ {
+		result = append(result, a[i])
+	}
+	for ; j < len(b); j++ {
+		result = append(result, b[j])
+	}
+	return result
+}
+
+// nWayUnion will merge and dedupe n lists of chunks.
+// lists must be sorted and not contain dupes.
+func nWayUnion(sets []ByKey) ByKey {
+	l := len(sets)
+	switch l {
+	case 0:
+		return ByKey{}
+	case 1:
+		return sets[0]
+	case 2:
+		return merge(sets[0], sets[1])
+	default:
+		var (
+			split = l / 2
+			left  = nWayUnion(sets[:split])
+			right = nWayUnion(sets[split:])
+		)
+		return nWayUnion([]ByKey{left, right})
+	}
+}
+
+// nWayIntersect will interesct n sorted lists of chunks.
+func nWayIntersect(sets []ByKey) ByKey {
+	l := len(sets)
+	switch l {
+	case 0:
+		return ByKey{}
+	case 1:
+		return sets[0]
+	case 2:
+		var (
+			left, right = sets[0], sets[1]
+			i, j        = 0, 0
+			result      = []Chunk{}
+		)
+		for i < len(left) && j < len(right) {
+			if left[i].externalKey() == right[j].externalKey() {
+				result = append(result, left[i])
+			}
+
+			if left[i].externalKey() < right[j].externalKey() {
+				i++
+			} else {
+				j++
+			}
+		}
+		return result
+	default:
+		var (
+			split = l / 2
+			left  = nWayIntersect(sets[:split])
+			right = nWayIntersect(sets[split:])
+		)
+		return nWayIntersect([]ByKey{left, right})
+	}
+}
diff --git a/by_key_test.go b/by_key_test.go
new file mode 100644
index 0000000000000..0e3a1bb3823df
--- /dev/null
+++ b/by_key_test.go
@@ -0,0 +1,99 @@
+package chunk
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func c(id string) Chunk {
+	return Chunk{UserID: id}
+}
+
+func TestUnique(t *testing.T) {
+	for _, tc := range []struct {
+		in   ByKey
+		want ByKey
+	}{
+		{nil, ByKey{}},
+		{ByKey{c("a"), c("a")}, ByKey{c("a")}},
+		{ByKey{c("a"), c("a"), c("b"), c("b"), c("c")}, ByKey{c("a"), c("b"), c("c")}},
+		{ByKey{c("a"), c("b"), c("c")}, ByKey{c("a"), c("b"), c("c")}},
+	} {
+		have := unique(tc.in)
+		if !reflect.DeepEqual(tc.want, have) {
+			assert.Equal(t, tc.want, have)
+		}
+	}
+}
+
+func TestMerge(t *testing.T) {
+	type args struct {
+		a ByKey
+		b ByKey
+	}
+	for _, tc := range []struct {
+		args args
+		want ByKey
+	}{
+		{args{ByKey{}, ByKey{}}, ByKey{}},
+		{args{ByKey{c("a")}, ByKey{}}, ByKey{c("a")}},
+		{args{ByKey{}, ByKey{c("b")}}, ByKey{c("b")}},
+		{args{ByKey{c("a")}, ByKey{c("b")}}, ByKey{c("a"), c("b")}},
+		{
+			args{ByKey{c("a"), c("c")}, ByKey{c("a"), c("b"), c("d")}},
+			ByKey{c("a"), c("b"), c("c"), c("d")},
+		},
+	} {
+		have := merge(tc.args.a, tc.args.b)
+		if !reflect.DeepEqual(tc.want, have) {
+			assert.Equal(t, tc.want, have)
+		}
+	}
+}
+
+func TestNWayUnion(t *testing.T) {
+	for _, tc := range []struct {
+		in   []ByKey
+		want ByKey
+	}{
+		{nil, ByKey{}},
+		{[]ByKey{{c("a")}}, ByKey{c("a")}},
+		{[]ByKey{{c("a")}, {c("a")}}, ByKey{c("a")}},
+		{[]ByKey{{c("a")}, {}}, ByKey{c("a")}},
+		{[]ByKey{{}, {c("b")}}, ByKey{c("b")}},
+		{[]ByKey{{c("a")}, {c("b")}}, ByKey{c("a"), c("b")}},
+		{
+			[]ByKey{{c("a"), c("c"), c("e")}, {c("c"), c("d")}, {c("b")}},
+			ByKey{c("a"), c("b"), c("c"), c("d"), c("e")},
+		},
+		{
+			[]ByKey{{c("c"), c("d")}, {c("b")}, {c("a"), c("c"), c("e")}},
+			ByKey{c("a"), c("b"), c("c"), c("d"), c("e")},
+		},
+	} {
+		have := nWayUnion(tc.in)
+		if !reflect.DeepEqual(tc.want, have) {
+			assert.Equal(t, tc.want, have)
+		}
+	}
+}
+
+func TestNWayIntersect(t *testing.T) {
+	for _, tc := range []struct {
+		in   []ByKey
+		want ByKey
+	}{
+		{nil, ByKey{}},
+		{[]ByKey{{c("a"), c("b"), c("c")}}, []Chunk{c("a"), c("b"), c("c")}},
+		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}}, ByKey{c("a"), c("c")}},
+		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}, {c("b")}}, ByKey{}},
+		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}, {c("a")}}, ByKey{c("a")}},
+	} {
+		have := nWayIntersect(tc.in)
+		if !reflect.DeepEqual(tc.want, have) {
+			assert.Equal(t, tc.want, have)
+		}
+	}
+}
diff --git a/chunk.go b/chunk.go
new file mode 100644
index 0000000000000..d331f165296ef
--- /dev/null
+++ b/chunk.go
@@ -0,0 +1,320 @@
+package chunk
+
+import (
+	"bytes"
+	"encoding/binary"
+	"encoding/json"
+	"fmt"
+	"hash/crc32"
+	"io"
+	"strconv"
+	"strings"
+
+	"github.com/golang/snappy"
+	"github.com/prometheus/common/model"
+	prom_chunk "github.com/prometheus/prometheus/storage/local/chunk"
+
+	"github.com/weaveworks/common/errors"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+// Errors that decode can return
+const (
+	ErrInvalidChunkID  = errors.Error("invalid chunk ID")
+	ErrInvalidChecksum = errors.Error("invalid chunk checksum")
+	ErrWrongMetadata   = errors.Error("wrong chunk metadata")
+)
+
+var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
+
+// Chunk contains encoded timeseries data
+type Chunk struct {
+	// These two fields will be missing from older chunks (as will the hash).
+	// On fetch we will initialise these fields from the DynamoDB key.
+	Fingerprint model.Fingerprint `json:"fingerprint"`
+	UserID      string            `json:"userID"`
+
+	// These fields will be in all chunks, including old ones.
+	From    model.Time   `json:"from"`
+	Through model.Time   `json:"through"`
+	Metric  model.Metric `json:"metric"`
+
+	// The hash is not written to the external storage either.  We use
+	// crc32, Castagnoli table.  See http://www.evanjones.ca/crc32c.html.
+	// For old chunks, ChecksumSet will be false.
+	ChecksumSet bool   `json:"-"`
+	Checksum    uint32 `json:"-"`
+
+	// We never use Delta encoding (the zero value), so if this entry is
+	// missing, we default to DoubleDelta.
+	Encoding prom_chunk.Encoding `json:"encoding"`
+	Data     prom_chunk.Chunk    `json:"-"`
+
+	// This flag is used for very old chunks, where the metadata is read out
+	// of the index.
+	metadataInIndex bool
+}
+
+// NewChunk creates a new chunk
+func NewChunk(userID string, fp model.Fingerprint, metric model.Metric, c prom_chunk.Chunk, from, through model.Time) Chunk {
+	return Chunk{
+		Fingerprint: fp,
+		UserID:      userID,
+		From:        from,
+		Through:     through,
+		Metric:      metric,
+		Encoding:    c.Encoding(),
+		Data:        c,
+	}
+}
+
+// parseExternalKey is used to construct a partially-populated chunk from the
+// key in DynamoDB.  This chunk can then be used to calculate the key needed
+// to fetch the Chunk data from Memcache/S3, and then fully populate the chunk
+// with decode().
+//
+// Pre-checksums, the keys written to DynamoDB looked like
+// `<fingerprint>:<start time>:<end time>` (aka the ID), and the key for
+// memcache and S3 was `<user id>/<fingerprint>:<start time>:<end time>.
+// Finger prints and times were written in base-10.
+//
+// Post-checksums, externals keys become the same across DynamoDB, Memcache
+// and S3.  Numbers become hex encoded.  Keys look like:
+// `<user id>/<fingerprint>:<start time>:<end time>:<checksum>`.
+func parseExternalKey(userID, externalKey string) (Chunk, error) {
+	if !strings.Contains(externalKey, "/") {
+		return parseLegacyChunkID(userID, externalKey)
+	}
+	chunk, err := parseNewExternalKey(externalKey)
+	if err != nil {
+		return Chunk{}, err
+	}
+	if chunk.UserID != userID {
+		return Chunk{}, ErrWrongMetadata
+	}
+	return chunk, nil
+}
+
+func parseLegacyChunkID(userID, key string) (Chunk, error) {
+	parts := strings.Split(key, ":")
+	if len(parts) != 3 {
+		return Chunk{}, ErrInvalidChunkID
+	}
+	fingerprint, err := strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	from, err := strconv.ParseInt(parts[1], 10, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	through, err := strconv.ParseInt(parts[2], 10, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	return Chunk{
+		UserID:      userID,
+		Fingerprint: model.Fingerprint(fingerprint),
+		From:        model.Time(from),
+		Through:     model.Time(through),
+	}, nil
+}
+
+func parseNewExternalKey(key string) (Chunk, error) {
+	parts := strings.Split(key, "/")
+	if len(parts) != 2 {
+		return Chunk{}, ErrInvalidChunkID
+	}
+	userID := parts[0]
+	hexParts := strings.Split(parts[1], ":")
+	if len(hexParts) != 4 {
+		return Chunk{}, ErrInvalidChunkID
+	}
+	fingerprint, err := strconv.ParseUint(hexParts[0], 16, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	from, err := strconv.ParseInt(hexParts[1], 16, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	through, err := strconv.ParseInt(hexParts[2], 16, 64)
+	if err != nil {
+		return Chunk{}, err
+	}
+	checksum, err := strconv.ParseUint(hexParts[3], 16, 32)
+	if err != nil {
+		return Chunk{}, err
+	}
+	return Chunk{
+		UserID:      userID,
+		Fingerprint: model.Fingerprint(fingerprint),
+		From:        model.Time(from),
+		Through:     model.Time(through),
+		Checksum:    uint32(checksum),
+		ChecksumSet: true,
+	}, nil
+}
+
+// externalKey returns the key you can use to fetch this chunk from external
+// storage. For newer chunks, this key includes a checksum.
+func (c *Chunk) externalKey() string {
+	// Some chunks have a checksum stored in dynamodb, some do not.  We must
+	// generate keys appropriately.
+	if c.ChecksumSet {
+		// This is the inverse of parseNewExternalKey.
+		return fmt.Sprintf("%s/%x:%x:%x:%x", c.UserID, uint64(c.Fingerprint), int64(c.From), int64(c.Through), c.Checksum)
+	}
+	// This is the inverse of parseLegacyExternalKey, with "<user id>/" prepended.
+	// Legacy chunks had the user ID prefix on s3/memcache, but not in DynamoDB.
+	// See comment on parseExternalKey.
+	return fmt.Sprintf("%s/%d:%d:%d", c.UserID, uint64(c.Fingerprint), int64(c.From), int64(c.Through))
+}
+
+// encode writes the chunk out to a big write buffer, then calculates the checksum.
+func (c *Chunk) encode() ([]byte, error) {
+	var buf bytes.Buffer
+
+	// Write 4 empty bytes first - we will come back and put the len in here.
+	metadataLenBytes := [4]byte{}
+	if _, err := buf.Write(metadataLenBytes[:]); err != nil {
+		return nil, err
+	}
+
+	// Encode chunk metadata into snappy-compressed buffer
+	if err := json.NewEncoder(snappy.NewWriter(&buf)).Encode(c); err != nil {
+		return nil, err
+	}
+
+	// Write the metadata length back at the start of the buffer.
+	binary.BigEndian.PutUint32(metadataLenBytes[:], uint32(buf.Len()))
+	copy(buf.Bytes(), metadataLenBytes[:])
+
+	// Write the data length
+	dataLenBytes := [4]byte{}
+	binary.BigEndian.PutUint32(dataLenBytes[:], uint32(prom_chunk.ChunkLen))
+	if _, err := buf.Write(dataLenBytes[:]); err != nil {
+		return nil, err
+	}
+
+	// And now the chunk data
+	if err := c.Data.Marshal(&buf); err != nil {
+		return nil, err
+	}
+
+	// Now work out the checksum
+	output := buf.Bytes()
+	c.ChecksumSet = true
+	c.Checksum = crc32.Checksum(output, castagnoliTable)
+	return output, nil
+}
+
+// decode the chunk from the given buffer, and confirm the chunk is the one we
+// expected.
+func (c *Chunk) decode(input []byte) error {
+	// Legacy chunks were written with metadata in the index.
+	if c.metadataInIndex {
+		var err error
+		c.Data, err = prom_chunk.NewForEncoding(prom_chunk.DoubleDelta)
+		if err != nil {
+			return err
+		}
+		return c.Data.UnmarshalFromBuf(input)
+	}
+
+	// First, calculate the checksum of the chunk and confirm it matches
+	// what we expected.
+	if c.ChecksumSet && c.Checksum != crc32.Checksum(input, castagnoliTable) {
+		return ErrInvalidChecksum
+	}
+
+	// Now unmarshal the chunk metadata.
+	r := bytes.NewReader(input)
+	var metadataLen uint32
+	if err := binary.Read(r, binary.BigEndian, &metadataLen); err != nil {
+		return err
+	}
+	var tempMetadata Chunk
+	err := json.NewDecoder(snappy.NewReader(&io.LimitedReader{
+		N: int64(metadataLen),
+		R: r,
+	})).Decode(&tempMetadata)
+	if err != nil {
+		return err
+	}
+
+	// Next, confirm the chunks matches what we expected.  Easiest way to do this
+	// is to compare what the decoded data thinks its external ID would be, but
+	// we don't write the checksum to s3, so we have to copy the checksum in.
+	if c.ChecksumSet {
+		tempMetadata.Checksum, tempMetadata.ChecksumSet = c.Checksum, c.ChecksumSet
+		if c.externalKey() != tempMetadata.externalKey() {
+			return ErrWrongMetadata
+		}
+	}
+	*c = tempMetadata
+
+	// Flag indicates if metadata was written to index, and if false implies
+	// we should read a header of the chunk containing the metadata.  Exists
+	// for backwards compatibility with older chunks, which did not have header.
+	if c.Encoding == prom_chunk.Delta {
+		c.Encoding = prom_chunk.DoubleDelta
+	}
+
+	// Finally, unmarshal the actual chunk data.
+	c.Data, err = prom_chunk.NewForEncoding(c.Encoding)
+	if err != nil {
+		return err
+	}
+
+	var dataLen uint32
+	if err := binary.Read(r, binary.BigEndian, &dataLen); err != nil {
+		return err
+	}
+
+	return c.Data.Unmarshal(&io.LimitedReader{
+		N: int64(dataLen),
+		R: r,
+	})
+}
+
+// ChunksToMatrix converts a slice of chunks into a model.Matrix.
+func ChunksToMatrix(chunks []Chunk) (model.Matrix, error) {
+	// Group chunks by series, sort and dedupe samples.
+	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
+	for _, c := range chunks {
+		fp := c.Metric.Fingerprint()
+		ss, ok := sampleStreams[fp]
+		if !ok {
+			ss = &model.SampleStream{
+				Metric: c.Metric,
+			}
+			sampleStreams[fp] = ss
+		}
+
+		samples, err := c.samples()
+		if err != nil {
+			return nil, err
+		}
+
+		ss.Values = util.MergeSamples(ss.Values, samples)
+	}
+
+	matrix := make(model.Matrix, 0, len(sampleStreams))
+	for _, ss := range sampleStreams {
+		matrix = append(matrix, ss)
+	}
+
+	return matrix, nil
+}
+
+func (c *Chunk) samples() ([]model.SamplePair, error) {
+	it := c.Data.NewIterator()
+	// TODO(juliusv): Pre-allocate this with the right length again once we
+	// add a method upstream to get the number of samples in a chunk.
+	var samples []model.SamplePair
+	for it.Scan() {
+		samples = append(samples, it.Value())
+	}
+	return samples, nil
+}
diff --git a/chunk_cache.go b/chunk_cache.go
new file mode 100644
index 0000000000000..044535f2aaabc
--- /dev/null
+++ b/chunk_cache.go
@@ -0,0 +1,219 @@
+package chunk
+
+import (
+	"flag"
+	"sync"
+	"time"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"github.com/weaveworks/common/instrument"
+	"golang.org/x/net/context"
+)
+
+var (
+	memcacheRequests = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "memcache_requests_total",
+		Help:      "Total count of chunks requested from memcache.",
+	})
+
+	memcacheHits = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "memcache_hits_total",
+		Help:      "Total count of chunks found in memcache.",
+	})
+
+	memcacheCorrupt = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "memcache_corrupt_chunks_total",
+		Help:      "Total count of corrupt chunks found in memcache.",
+	})
+
+	memcacheDroppedWriteBack = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "memcache_dropped_write_back",
+		Help:      "Total count of dropped write backs to memcache.",
+	})
+
+	memcacheRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "memcache_request_duration_seconds",
+		Help:      "Total time spent in seconds doing memcache requests.",
+		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
+		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
+	}, []string{"method", "status_code"})
+)
+
+func init() {
+	prometheus.MustRegister(memcacheRequests)
+	prometheus.MustRegister(memcacheHits)
+	prometheus.MustRegister(memcacheCorrupt)
+	prometheus.MustRegister(memcacheRequestDuration)
+}
+
+// Memcache caches things
+type Memcache interface {
+	GetMulti(keys []string) (map[string]*memcache.Item, error)
+	Set(item *memcache.Item) error
+}
+
+// CacheConfig is config to make a Cache
+type CacheConfig struct {
+	Expiration          time.Duration
+	WriteBackGoroutines int
+	WriteBackBuffer     int
+	memcacheConfig      MemcacheConfig
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *CacheConfig) RegisterFlags(f *flag.FlagSet) {
+	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long chunks stay in the memcache.")
+	f.IntVar(&cfg.WriteBackGoroutines, "memcache.write-back-goroutines", 10, "How many goroutines to use to write back to memcache.")
+	f.IntVar(&cfg.WriteBackBuffer, "memcache.write-back-buffer", 10000, "How many chunks to buffer for background write back.")
+	cfg.memcacheConfig.RegisterFlags(f)
+}
+
+// Cache type caches chunks
+type Cache struct {
+	cfg      CacheConfig
+	memcache Memcache
+
+	wg       sync.WaitGroup
+	quit     chan struct{}
+	bgWrites chan backgroundWrite
+}
+
+type backgroundWrite struct {
+	key string
+	buf []byte
+}
+
+// NewCache makes a new Cache
+func NewCache(cfg CacheConfig) *Cache {
+	var memcache Memcache
+	if cfg.memcacheConfig.Host != "" {
+		memcache = NewMemcacheClient(cfg.memcacheConfig)
+	}
+	c := &Cache{
+		cfg:      cfg,
+		memcache: memcache,
+		quit:     make(chan struct{}),
+		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
+	}
+	c.wg.Add(cfg.WriteBackGoroutines)
+	for i := 0; i < cfg.WriteBackGoroutines; i++ {
+		go c.writeBackLoop()
+	}
+	return c
+}
+
+// Stop the background flushing goroutines.
+func (c *Cache) Stop() {
+	close(c.quit)
+	c.wg.Wait()
+}
+
+func memcacheStatusCode(err error) string {
+	// See https://godoc.org/github.com/bradfitz/gomemcache/memcache#pkg-variables
+	switch err {
+	case nil:
+		return "200"
+	case memcache.ErrCacheMiss:
+		return "404"
+	case memcache.ErrMalformedKey:
+		return "400"
+	default:
+		return "500"
+	}
+}
+
+// FetchChunkData gets chunks from the chunk cache.
+func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chunk, missing []Chunk, err error) {
+	if c.memcache == nil {
+		return nil, chunks, nil
+	}
+
+	memcacheRequests.Add(float64(len(chunks)))
+
+	keys := make([]string, 0, len(chunks))
+	for _, chunk := range chunks {
+		keys = append(keys, chunk.externalKey())
+	}
+
+	var items map[string]*memcache.Item
+	err = instrument.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		var err error
+		items, err = c.memcache.GetMulti(keys)
+		return err
+	})
+	if err != nil {
+		return nil, chunks, err
+	}
+
+	for i, externalKey := range keys {
+		item, ok := items[externalKey]
+		if !ok {
+			missing = append(missing, chunks[i])
+			continue
+		}
+
+		if err := chunks[i].decode(item.Value); err != nil {
+			memcacheCorrupt.Inc()
+			log.Errorf("Failed to decode chunk from cache: %v", err)
+			missing = append(missing, chunks[i])
+			continue
+		}
+
+		found = append(found, chunks[i])
+	}
+
+	memcacheHits.Add(float64(len(found)))
+	return found, missing, nil
+}
+
+// StoreChunk serializes and stores a chunk in the chunk cache.
+func (c *Cache) StoreChunk(ctx context.Context, key string, buf []byte) error {
+	if c.memcache == nil {
+		return nil
+	}
+
+	return instrument.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		item := memcache.Item{
+			Key:        key,
+			Value:      buf,
+			Expiration: int32(c.cfg.Expiration.Seconds()),
+		}
+		return c.memcache.Set(&item)
+	})
+}
+
+// BackgroundWrite writes chunks for the cache in the background
+func (c *Cache) BackgroundWrite(key string, buf []byte) {
+	bgWrite := backgroundWrite{
+		key: key,
+		buf: buf,
+	}
+	select {
+	case c.bgWrites <- bgWrite:
+	default:
+		memcacheDroppedWriteBack.Inc()
+	}
+}
+
+func (c *Cache) writeBackLoop() {
+	defer c.wg.Done()
+
+	for {
+		select {
+		case bgWrite := <-c.bgWrites:
+			err := c.StoreChunk(context.Background(), bgWrite.key, bgWrite.buf)
+			if err != nil {
+				log.Errorf("Error writing to memcache: %v", err)
+			}
+		case <-c.quit:
+			return
+		}
+	}
+}
diff --git a/chunk_cache_test.go b/chunk_cache_test.go
new file mode 100644
index 0000000000000..e66440ddace88
--- /dev/null
+++ b/chunk_cache_test.go
@@ -0,0 +1,114 @@
+package chunk
+
+import (
+	"math/rand"
+	"sync"
+	"testing"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/net/context"
+)
+
+type mockMemcache struct {
+	sync.RWMutex
+	contents map[string][]byte
+}
+
+func newMockMemcache() *mockMemcache {
+	return &mockMemcache{
+		contents: map[string][]byte{},
+	}
+}
+
+func (m *mockMemcache) GetMulti(keys []string) (map[string]*memcache.Item, error) {
+	m.RLock()
+	defer m.RUnlock()
+	result := map[string]*memcache.Item{}
+	for _, k := range keys {
+		if c, ok := m.contents[k]; ok {
+			result[k] = &memcache.Item{
+				Value: c,
+			}
+		}
+	}
+	return result, nil
+}
+
+func (m *mockMemcache) Set(item *memcache.Item) error {
+	m.Lock()
+	defer m.Unlock()
+	m.contents[item.Key] = item.Value
+	return nil
+}
+
+func TestChunkCache(t *testing.T) {
+	c := Cache{
+		memcache: newMockMemcache(),
+	}
+
+	const (
+		chunkLen = 13 * 3600 // in seconds
+	)
+
+	// put 100 chunks from 0 to 99
+	keys := []string{}
+	chunks := []Chunk{}
+	for i := 0; i < 100; i++ {
+		ts := model.TimeFromUnix(int64(i * chunkLen))
+		promChunk, _ := chunk.New().Add(model.SamplePair{
+			Timestamp: ts,
+			Value:     model.SampleValue(i),
+		})
+		chunk := NewChunk(
+			userID,
+			model.Fingerprint(1),
+			model.Metric{
+				model.MetricNameLabel: "foo",
+				"bar": "baz",
+			},
+			promChunk[0],
+			ts,
+			ts.Add(chunkLen),
+		)
+
+		buf, err := chunk.encode()
+		require.NoError(t, err)
+
+		key := chunk.externalKey()
+		err = c.StoreChunk(context.Background(), key, buf)
+		require.NoError(t, err)
+
+		keys = append(keys, key)
+		chunks = append(chunks, chunk)
+	}
+
+	for i := 0; i < 100; i++ {
+		index := rand.Intn(len(keys))
+		key := keys[index]
+
+		chunk, err := parseExternalKey(userID, key)
+		require.NoError(t, err)
+
+		found, missing, err := c.FetchChunkData(context.Background(), []Chunk{chunk})
+		require.NoError(t, err)
+		require.Empty(t, missing)
+		require.Len(t, found, 1)
+		require.Equal(t, chunks[index], found[0])
+	}
+
+	// test getting them all
+	receivedChunks := []Chunk{}
+	for i := 0; i < len(keys); i++ {
+		chunk, err := parseExternalKey(userID, keys[i])
+		require.NoError(t, err)
+		receivedChunks = append(receivedChunks, chunk)
+	}
+	found, missing, err := c.FetchChunkData(context.Background(), receivedChunks)
+	require.NoError(t, err)
+	require.Empty(t, missing)
+	require.Len(t, found, len(keys))
+	require.Equal(t, chunks, receivedChunks)
+}
diff --git a/chunk_store.go b/chunk_store.go
new file mode 100644
index 0000000000000..5248f4ebd3fbe
--- /dev/null
+++ b/chunk_store.go
@@ -0,0 +1,526 @@
+package chunk
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"sort"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage/metric"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+var (
+	indexEntriesPerChunk = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_index_entries_per_chunk",
+		Help:      "Number of entries written to storage per chunk.",
+		Buckets:   prometheus.ExponentialBuckets(1, 2, 5),
+	})
+	rowWrites = util.NewHashBucketHistogram(util.HashBucketHistogramOpts{
+		HistogramOpts: prometheus.HistogramOpts{
+			Namespace: "cortex",
+			Name:      "chunk_store_row_writes_distribution",
+			Help:      "Distribution of writes to individual storage rows",
+			Buckets:   prometheus.DefBuckets,
+		},
+		HashBuckets: 1024,
+	})
+)
+
+func init() {
+	prometheus.MustRegister(indexEntriesPerChunk)
+	prometheus.MustRegister(rowWrites)
+}
+
+// StoreConfig specifies config for a ChunkStore
+type StoreConfig struct {
+	SchemaConfig
+	CacheConfig
+
+	// For injecting different schemas in tests.
+	schemaFactory func(cfg SchemaConfig) Schema
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
+	cfg.SchemaConfig.RegisterFlags(f)
+	cfg.CacheConfig.RegisterFlags(f)
+}
+
+// Store implements Store
+type Store struct {
+	cfg StoreConfig
+
+	storage StorageClient
+	cache   *Cache
+	schema  Schema
+}
+
+// NewStore makes a new ChunkStore
+func NewStore(cfg StoreConfig, storage StorageClient) (*Store, error) {
+	var schema Schema
+	var err error
+	if cfg.schemaFactory == nil {
+		schema, err = newCompositeSchema(cfg.SchemaConfig)
+	} else {
+		schema = cfg.schemaFactory(cfg.SchemaConfig)
+	}
+	if err != nil {
+		return nil, err
+	}
+
+	return &Store{
+		cfg:     cfg,
+		storage: storage,
+		schema:  schema,
+		cache:   NewCache(cfg.CacheConfig),
+	}, nil
+}
+
+// Stop any background goroutines (ie in the cache.)
+func (c *Store) Stop() {
+	c.cache.Stop()
+}
+
+// Put implements ChunkStore
+func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
+	userID, err := user.Extract(ctx)
+	if err != nil {
+		return err
+	}
+
+	// Encode the chunk first - checksum is calculated as a side effect.
+	bufs := [][]byte{}
+	keys := []string{}
+	for i := range chunks {
+		encoded, err := chunks[i].encode()
+		if err != nil {
+			return err
+		}
+		bufs = append(bufs, encoded)
+		keys = append(keys, chunks[i].externalKey())
+	}
+
+	err = c.putChunks(ctx, keys, bufs)
+	if err != nil {
+		return err
+	}
+
+	return c.updateIndex(ctx, userID, chunks)
+}
+
+// putChunks writes a collection of chunks to S3 in parallel.
+func (c *Store) putChunks(ctx context.Context, keys []string, bufs [][]byte) error {
+	incomingErrors := make(chan error)
+	for i := range bufs {
+		go func(i int) {
+			incomingErrors <- c.putChunk(ctx, keys[i], bufs[i])
+		}(i)
+	}
+
+	var lastErr error
+	for range keys {
+		err := <-incomingErrors
+		if err != nil {
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+// putChunk puts a chunk into S3.
+func (c *Store) putChunk(ctx context.Context, key string, buf []byte) error {
+	err := c.storage.PutChunk(ctx, key, buf)
+	if err != nil {
+		return err
+	}
+
+	if err := c.cache.StoreChunk(ctx, key, buf); err != nil {
+		log.Warnf("Could not store %v in chunk cache: %v", key, err)
+	}
+	return nil
+}
+
+func (c *Store) updateIndex(ctx context.Context, userID string, chunks []Chunk) error {
+	writeReqs, err := c.calculateDynamoWrites(userID, chunks)
+	if err != nil {
+		return err
+	}
+
+	return c.storage.BatchWrite(ctx, writeReqs)
+}
+
+// calculateDynamoWrites creates a set of batched WriteRequests to dynamo for all
+// the chunks it is given.
+func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch, error) {
+	seenIndexEntries := map[string]struct{}{}
+
+	writeReqs := c.storage.NewWriteBatch()
+	for _, chunk := range chunks {
+		metricName, err := util.ExtractMetricNameFromMetric(chunk.Metric)
+		if err != nil {
+			return nil, err
+		}
+
+		entries, err := c.schema.GetWriteEntries(chunk.From, chunk.Through, userID, metricName, chunk.Metric, chunk.externalKey())
+		if err != nil {
+			return nil, err
+		}
+		indexEntriesPerChunk.Observe(float64(len(entries)))
+
+		// Remove duplicate entries based on tableName:hashValue:rangeValue
+		unseenEntries := []IndexEntry{}
+		for _, entry := range entries {
+			key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
+			if _, ok := seenIndexEntries[key]; !ok {
+				seenIndexEntries[key] = struct{}{}
+				unseenEntries = append(unseenEntries, entry)
+			}
+		}
+
+		for _, entry := range unseenEntries {
+			rowWrites.Observe(entry.HashValue, 1)
+			writeReqs.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+		}
+	}
+	return writeReqs, nil
+}
+
+// Get implements ChunkStore
+func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*metric.LabelMatcher) ([]Chunk, error) {
+	if through < from {
+		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
+	}
+
+	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
+
+	// Fetch chunk descriptors (just ID really) from storage
+	chunks, err := c.lookupChunksByMatchers(ctx, from, through, matchers)
+	if err != nil {
+		return nil, promql.ErrStorage(err)
+	}
+
+	// Filter out chunks that are not in the selected time range.
+	filtered := make([]Chunk, 0, len(chunks))
+	for _, chunk := range chunks {
+		if chunk.Through < from || through < chunk.From {
+			continue
+		}
+		filtered = append(filtered, chunk)
+	}
+
+	// Now fetch the actual chunk data from Memcache / S3
+	fromCache, missing, err := c.cache.FetchChunkData(ctx, filtered)
+	if err != nil {
+		log.Warnf("Error fetching from cache: %v", err)
+	}
+
+	fromS3, err := c.fetchChunkData(ctx, missing)
+	if err != nil {
+		return nil, promql.ErrStorage(err)
+	}
+
+	if err = c.writeBackCache(ctx, fromS3); err != nil {
+		log.Warnf("Could not store chunks in chunk cache: %v", err)
+	}
+
+	// TODO instead of doing this sort, propagate an index and assign chunks
+	// into the result based on that index.
+	allChunks := append(fromCache, fromS3...)
+	sort.Sort(ByKey(allChunks))
+
+	// Filter out chunks
+	filteredChunks := make([]Chunk, 0, len(allChunks))
+outer:
+	for _, chunk := range allChunks {
+		for _, filter := range filters {
+			if !filter.Match(chunk.Metric[filter.Name]) {
+				continue outer
+			}
+		}
+
+		filteredChunks = append(filteredChunks, chunk)
+	}
+
+	return filteredChunks, nil
+}
+
+func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher) ([]Chunk, error) {
+	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(matchers)
+
+	// Only lookup by metric name if the matcher is of type equal, otherwise we
+	// have to fetch chunks for all metric names as other metric names could match.
+	if ok && metricNameMatcher.Type == metric.Equal {
+		return c.lookupChunksByMetricName(ctx, from, through, matchers, metricNameMatcher.Value)
+	}
+
+	userID, err := user.Extract(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// If there is no metric name, we want return chunks for all metric names
+	metricNameQueries, err := c.schema.GetReadQueries(from, through, userID)
+	if err != nil {
+		return nil, err
+	}
+	metricNameEntries, err := c.lookupEntriesByQueries(ctx, metricNameQueries)
+	if err != nil {
+		return nil, err
+	}
+
+	incomingChunkSets := make(chan ByKey)
+	incomingErrors := make(chan error)
+	skippedMetricNames := 0
+
+	for _, metricNameEntry := range metricNameEntries {
+		metricName, err := parseMetricNameRangeValue(metricNameEntry.RangeValue, metricNameEntry.Value)
+		if err != nil {
+			return nil, err
+		}
+
+		// We are fetching all metric name chunks, however if there is a metricNameMatcher,
+		// we only want metric names that match
+		if ok && !metricNameMatcher.Match(metricName) {
+			skippedMetricNames++
+			continue
+		}
+
+		go func(metricName model.LabelValue) {
+			chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
+			if err != nil {
+				incomingErrors <- err
+			} else {
+				incomingChunkSets <- chunks
+			}
+		}(metricName)
+	}
+
+	var chunkSets []ByKey
+	var lastErr error
+	for i := 0; i < (len(metricNameEntries) - skippedMetricNames); i++ {
+		select {
+		case incoming := <-incomingChunkSets:
+			chunkSets = append(chunkSets, incoming)
+		case err := <-incomingErrors:
+			lastErr = err
+		}
+	}
+
+	return nWayUnion(chunkSets), lastErr
+}
+
+func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
+	userID, err := user.Extract(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// Just get chunks for metric if there are no matchers
+	if len(matchers) == 0 {
+		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
+		if err != nil {
+			return nil, err
+		}
+
+		entries, err := c.lookupEntriesByQueries(ctx, queries)
+		if err != nil {
+			return nil, err
+		}
+
+		return c.convertIndexEntriesToChunks(ctx, entries, nil)
+	}
+
+	// Otherwise get chunks which include other matchers
+	incomingChunkSets := make(chan ByKey)
+	incomingErrors := make(chan error)
+	for _, matcher := range matchers {
+		go func(matcher *metric.LabelMatcher) {
+			// Lookup IndexQuery's
+			var queries []IndexQuery
+			var err error
+			if matcher.Type != metric.Equal {
+				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
+			} else {
+				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
+			}
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+
+			// Lookup IndexEntry's
+			entries, err := c.lookupEntriesByQueries(ctx, queries)
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+
+			// Convert IndexEntry's into chunks
+			chunks, err := c.convertIndexEntriesToChunks(ctx, entries, matcher)
+			if err != nil {
+				incomingErrors <- err
+			} else {
+				incomingChunkSets <- chunks
+			}
+		}(matcher)
+	}
+
+	// Receive chunkSets from all matchers
+	var chunkSets []ByKey
+	var lastErr error
+	for i := 0; i < len(matchers); i++ {
+		select {
+		case incoming := <-incomingChunkSets:
+			chunkSets = append(chunkSets, incoming)
+		case err := <-incomingErrors:
+			lastErr = err
+		}
+	}
+
+	// Merge chunkSets in order because we wish to keep label series together consecutively
+	return nWayIntersect(chunkSets), lastErr
+}
+
+func (c *Store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+	incomingEntries := make(chan []IndexEntry)
+	incomingErrors := make(chan error)
+	for _, query := range queries {
+		go func(query IndexQuery) {
+			entries, err := c.lookupEntriesByQuery(ctx, query)
+			if err != nil {
+				incomingErrors <- err
+			} else {
+				incomingEntries <- entries
+			}
+		}(query)
+	}
+
+	// Combine the results into one slice
+	var entries []IndexEntry
+	var lastErr error
+	for i := 0; i < len(queries); i++ {
+		select {
+		case incoming := <-incomingEntries:
+			entries = append(entries, incoming...)
+		case err := <-incomingErrors:
+			lastErr = err
+		}
+	}
+
+	return entries, lastErr
+}
+
+func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
+	var entries []IndexEntry
+
+	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch, lastPage bool) (shouldContinue bool) {
+		for i := 0; i < resp.Len(); i++ {
+			entries = append(entries, IndexEntry{
+				TableName:  query.TableName,
+				HashValue:  query.HashValue,
+				RangeValue: resp.RangeValue(i),
+				Value:      resp.Value(i),
+			})
+		}
+		return !lastPage
+	}); err != nil {
+		log.Errorf("Error querying storage: %v", err)
+		return nil, err
+	}
+
+	return entries, nil
+}
+
+func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *metric.LabelMatcher) (ByKey, error) {
+	userID, err := user.Extract(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	var chunkSet ByKey
+
+	for _, entry := range entries {
+		chunkKey, labelValue, metadataInIndex, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		if err != nil {
+			return nil, err
+		}
+
+		chunk, err := parseExternalKey(userID, chunkKey)
+		if err != nil {
+			return nil, err
+		}
+
+		// This can be removed in Dev 2017, 13 months after the last chunks
+		// was written with metadata in the index.
+		if metadataInIndex && entry.Value != nil {
+			if err := json.Unmarshal(entry.Value, &chunk); err != nil {
+				return nil, err
+			}
+			chunk.metadataInIndex = true
+		}
+
+		if matcher != nil && !matcher.Match(labelValue) {
+			log.Debug("Dropping chunk for non-matching metric ", chunk.Metric)
+			continue
+		}
+		chunkSet = append(chunkSet, chunk)
+	}
+
+	// Return chunks sorted and deduped because they will be merged with other sets
+	sort.Sort(chunkSet)
+	return unique(chunkSet), nil
+}
+
+func (c *Store) fetchChunkData(ctx context.Context, chunkSet []Chunk) ([]Chunk, error) {
+	incomingChunks := make(chan Chunk)
+	incomingErrors := make(chan error)
+	for _, chunk := range chunkSet {
+		go func(chunk Chunk) {
+			buf, err := c.storage.GetChunk(ctx, chunk.externalKey())
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+			if err := chunk.decode(buf); err != nil {
+				incomingErrors <- err
+				return
+			}
+			incomingChunks <- chunk
+		}(chunk)
+	}
+
+	chunks := []Chunk{}
+	errors := []error{}
+	for i := 0; i < len(chunkSet); i++ {
+		select {
+		case chunk := <-incomingChunks:
+			chunks = append(chunks, chunk)
+		case err := <-incomingErrors:
+			errors = append(errors, err)
+		}
+	}
+	if len(errors) > 0 {
+		return nil, errors[0]
+	}
+	return chunks, nil
+}
+
+func (c *Store) writeBackCache(_ context.Context, chunks []Chunk) error {
+	for i := range chunks {
+		encoded, err := chunks[i].encode()
+		if err != nil {
+			return err
+		}
+		c.cache.BackgroundWrite(chunks[i].externalKey(), encoded)
+	}
+	return nil
+}
diff --git a/chunk_store_test.go b/chunk_store_test.go
new file mode 100644
index 0000000000000..53225fb2d19db
--- /dev/null
+++ b/chunk_store_test.go
@@ -0,0 +1,424 @@
+package chunk
+
+import (
+	"fmt"
+	"math/rand"
+	"reflect"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/common/test"
+	"github.com/weaveworks/common/user"
+)
+
+// newTestStore creates a new Store for testing.
+func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
+	storage := NewMockStorage()
+	tableManager, err := NewDynamoTableManager(TableManagerConfig{}, storage)
+	require.NoError(t, err)
+	err = tableManager.syncTables(context.Background())
+	require.NoError(t, err)
+	store, err := NewStore(cfg, storage)
+	require.NoError(t, err)
+	return store
+}
+
+func TestChunkStore(t *testing.T) {
+	ctx := user.Inject(context.Background(), userID)
+	now := model.Now()
+	chunk1 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
+	})
+	chunk2 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "beep",
+		"toms": "code",
+	})
+
+	schemas := []struct {
+		name string
+		fn   func(cfg SchemaConfig) Schema
+	}{
+		{"v1 schema", v1Schema},
+		{"v2 schema", v2Schema},
+		{"v3 schema", v3Schema},
+		{"v4 schema", v4Schema},
+		{"v5 schema", v5Schema},
+		{"v6 schema", v6Schema},
+		{"v7 schema", v7Schema},
+	}
+
+	nameMatcher := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
+
+	for _, tc := range []struct {
+		query    string
+		expect   []Chunk
+		matchers []*metric.LabelMatcher
+	}{
+		{
+			`foo`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{nameMatcher},
+		},
+		{
+			`foo{flip=""}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "flip", "")},
+		},
+		{
+			`foo{bar="baz"}`,
+			[]Chunk{chunk1},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+		},
+		{
+			`foo{bar="beep"}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+		},
+		{
+			`foo{toms="code"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code")},
+		},
+		{
+			`foo{bar!="baz"}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.NotEqual, "bar", "baz")},
+		},
+		{
+			`foo{bar=~"beep|baz"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`foo{toms="code", bar=~"beep|baz"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`foo{toms="code", bar="baz"}`,
+			[]Chunk{chunk1}, []*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+		},
+	} {
+		for _, schema := range schemas {
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
+				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+				store := newTestChunkStore(t, StoreConfig{
+					schemaFactory: schema.fn,
+				})
+
+				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
+					t.Fatal(err)
+				}
+
+				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
+				require.NoError(t, err)
+
+				// Zero out the checksums, as the inputs above didn't have the checksums calculated
+				for i := range chunks {
+					chunks[i].Checksum = 0
+					chunks[i].ChecksumSet = false
+				}
+
+				if !reflect.DeepEqual(tc.expect, chunks) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
+				}
+			})
+		}
+	}
+}
+
+// TestChunkStoreMetricNames tests no metric name queries supported from v7Schema
+func TestChunkStoreMetricNames(t *testing.T) {
+	ctx := user.Inject(context.Background(), userID)
+	now := model.Now()
+
+	foo1Chunk1 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo1",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
+	})
+	foo1Chunk2 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo1",
+		"bar":  "beep",
+		"toms": "code",
+	})
+	foo2Chunk := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo2",
+		"bar":  "beep",
+		"toms": "code",
+	})
+	foo3Chunk := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo3",
+		"bar":  "beep",
+		"toms": "code",
+	})
+
+	schemas := []struct {
+		name string
+		fn   func(cfg SchemaConfig) Schema
+	}{
+		{"v7 schema", v7Schema},
+	}
+
+	for _, tc := range []struct {
+		query    string
+		expect   []Chunk
+		matchers []*metric.LabelMatcher
+	}{
+		{
+			`foo1`,
+			[]Chunk{foo1Chunk1, foo1Chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo1")},
+		},
+		{
+			`foo2`,
+			[]Chunk{foo2Chunk},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo2")},
+		},
+		{
+			`foo3`,
+			[]Chunk{foo3Chunk},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo3")},
+		},
+
+		// When name matcher is used without Equal, start matching all metric names
+		// however still filter out metric names which do not match query
+		{
+			`{__name__!="foo1"}`,
+			[]Chunk{foo3Chunk, foo2Chunk},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, model.MetricNameLabel, "foo1")},
+		},
+		{
+			`{__name__=~"foo1|foo2"}`,
+			[]Chunk{foo1Chunk1, foo2Chunk, foo1Chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, model.MetricNameLabel, "foo1|foo2")},
+		},
+
+		// No metric names
+		{
+			`{bar="baz"}`,
+			[]Chunk{foo1Chunk1},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+		},
+		{
+			`{bar="beep"}`,
+			[]Chunk{foo3Chunk, foo2Chunk, foo1Chunk2}, // doesn't match foo1 chunk1
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+		},
+		{
+			`{flip=""}`,
+			[]Chunk{foo3Chunk, foo2Chunk, foo1Chunk2}, // doesn't match foo1 chunk1 as it has a flip value
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "flip", "")},
+		},
+		{
+			`{bar!="beep"}`,
+			[]Chunk{foo1Chunk1},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, "bar", "beep")},
+		},
+		{
+			`{bar=~"beep|baz"}`,
+			[]Chunk{foo3Chunk, foo1Chunk1, foo2Chunk, foo1Chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`{toms="code", bar=~"beep|baz"}`,
+			[]Chunk{foo3Chunk, foo1Chunk1, foo2Chunk, foo1Chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`{toms="code", bar="baz"}`,
+			[]Chunk{foo1Chunk1},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+		},
+	} {
+		for _, schema := range schemas {
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
+				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+				store := newTestChunkStore(t, StoreConfig{
+					schemaFactory: schema.fn,
+				})
+
+				if err := store.Put(ctx, []Chunk{foo1Chunk1, foo1Chunk2, foo2Chunk, foo3Chunk}); err != nil {
+					t.Fatal(err)
+				}
+
+				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
+				require.NoError(t, err)
+
+				// Zero out the checksums, as the inputs above didn't have the checksums calculated
+				for i := range chunks {
+					chunks[i].Checksum = 0
+					chunks[i].ChecksumSet = false
+				}
+
+				if !reflect.DeepEqual(tc.expect, chunks) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
+				}
+			})
+		}
+	}
+}
+
+func mustNewLabelMatcher(matchType metric.MatchType, name model.LabelName, value model.LabelValue) *metric.LabelMatcher {
+	matcher, err := metric.NewLabelMatcher(matchType, name, value)
+	if err != nil {
+		panic(err)
+	}
+	return matcher
+}
+
+func TestChunkStoreRandom(t *testing.T) {
+	ctx := user.Inject(context.Background(), userID)
+	schemas := []struct {
+		name  string
+		fn    func(cfg SchemaConfig) Schema
+		store *Store
+	}{
+		{name: "v1 schema", fn: v1Schema},
+		{name: "v2 schema", fn: v2Schema},
+		{name: "v3 schema", fn: v3Schema},
+		{name: "v4 schema", fn: v4Schema},
+		{name: "v5 schema", fn: v5Schema},
+		{name: "v6 schema", fn: v6Schema},
+		{name: "v7 schema", fn: v7Schema},
+	}
+
+	for i := range schemas {
+		schemas[i].store = newTestChunkStore(t, StoreConfig{
+			schemaFactory: schemas[i].fn,
+		})
+	}
+
+	// put 100 chunks from 0 to 99
+	const chunkLen = 13 * 3600 // in seconds
+	for i := 0; i < 100; i++ {
+		ts := model.TimeFromUnix(int64(i * chunkLen))
+		chunks, _ := chunk.New().Add(model.SamplePair{
+			Timestamp: ts,
+			Value:     model.SampleValue(float64(i)),
+		})
+		chunk := NewChunk(
+			userID,
+			model.Fingerprint(1),
+			model.Metric{
+				model.MetricNameLabel: "foo",
+				"bar": "baz",
+			},
+			chunks[0],
+			ts,
+			ts.Add(chunkLen*time.Second),
+		)
+		for _, s := range schemas {
+			err := s.store.Put(ctx, []Chunk{chunk})
+			require.NoError(t, err)
+		}
+	}
+
+	// pick two random numbers and do a query
+	for i := 0; i < 100; i++ {
+		start := rand.Int63n(100 * chunkLen)
+		end := start + rand.Int63n((100*chunkLen)-start)
+		assert.True(t, start < end)
+
+		startTime := model.TimeFromUnix(start)
+		endTime := model.TimeFromUnix(end)
+
+		for _, s := range schemas {
+			chunks, err := s.store.Get(ctx, startTime, endTime,
+				mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo"),
+				mustNewLabelMatcher(metric.Equal, "bar", "baz"),
+			)
+			require.NoError(t, err)
+
+			// We need to check that each chunk is in the time range
+			for _, chunk := range chunks {
+				assert.False(t, chunk.From.After(endTime))
+				assert.False(t, chunk.Through.Before(startTime))
+				samples, err := chunk.samples()
+				assert.NoError(t, err)
+				assert.Equal(t, 1, len(samples))
+				// TODO verify chunk contents
+			}
+
+			// And check we got all the chunks we want
+			numChunks := (end / chunkLen) - (start / chunkLen) + 1
+			assert.Equal(t, int(numChunks), len(chunks), s.name)
+		}
+	}
+}
+
+func TestChunkStoreLeastRead(t *testing.T) {
+	// Test we don't read too much from the index
+	ctx := user.Inject(context.Background(), userID)
+	store := newTestChunkStore(t, StoreConfig{
+		schemaFactory: v6Schema,
+	})
+
+	// Put 24 chunks 1hr chunks in the store
+	const chunkLen = 60 // in seconds
+	for i := 0; i < 24; i++ {
+		ts := model.TimeFromUnix(int64(i * chunkLen))
+		chunks, _ := chunk.New().Add(model.SamplePair{
+			Timestamp: ts,
+			Value:     model.SampleValue(float64(i)),
+		})
+		chunk := NewChunk(
+			userID,
+			model.Fingerprint(1),
+			model.Metric{
+				model.MetricNameLabel: "foo",
+				"bar": "baz",
+			},
+			chunks[0],
+			ts,
+			ts.Add(chunkLen*time.Second),
+		)
+		log.Infof("Loop %d", i)
+		err := store.Put(ctx, []Chunk{chunk})
+		require.NoError(t, err)
+	}
+
+	// pick a random numbers and do a query to end of row
+	for i := 1; i < 24; i++ {
+		start := int64(i * chunkLen)
+		end := int64(24 * chunkLen)
+		assert.True(t, start <= end)
+
+		startTime := model.TimeFromUnix(start)
+		endTime := model.TimeFromUnix(end)
+
+		chunks, err := store.Get(ctx, startTime, endTime,
+			mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo"),
+			mustNewLabelMatcher(metric.Equal, "bar", "baz"),
+		)
+		if err != nil {
+			t.Fatal(t, err)
+		}
+
+		// We need to check that each chunk is in the time range
+		for _, chunk := range chunks {
+			assert.False(t, chunk.From.After(endTime))
+			assert.False(t, chunk.Through.Before(startTime))
+			samples, err := chunk.samples()
+			assert.NoError(t, err)
+			assert.Equal(t, 1, len(samples))
+		}
+
+		// And check we got all the chunks we want
+		numChunks := 24 - (start / chunkLen) + 1
+		assert.Equal(t, int(numChunks), len(chunks))
+	}
+}
diff --git a/chunk_test.go b/chunk_test.go
new file mode 100644
index 0000000000000..5b71b68579548
--- /dev/null
+++ b/chunk_test.go
@@ -0,0 +1,123 @@
+package chunk
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local/chunk"
+	"github.com/stretchr/testify/require"
+)
+
+const userID = "userID"
+
+func dummyChunk() Chunk {
+	return dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+	})
+}
+
+func dummyChunkFor(metric model.Metric) Chunk {
+	now := model.Now()
+	cs, _ := chunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	chunk := NewChunk(
+		userID,
+		metric.Fingerprint(),
+		metric,
+		cs[0],
+		now.Add(-time.Hour),
+		now,
+	)
+	return chunk
+}
+
+func TestChunkCodec(t *testing.T) {
+	for i, c := range []struct {
+		chunk Chunk
+		err   error
+		f     func(*Chunk, []byte)
+	}{
+		// Basic round trip
+		{chunk: dummyChunk()},
+
+		// Checksum should fail
+		{
+			chunk: dummyChunk(),
+			err:   ErrInvalidChecksum,
+			f:     func(_ *Chunk, buf []byte) { buf[4]++ },
+		},
+
+		// Checksum should fail
+		{
+			chunk: dummyChunk(),
+			err:   ErrInvalidChecksum,
+			f:     func(c *Chunk, _ []byte) { c.Checksum = 123 },
+		},
+
+		// Metadata test should fail
+		{
+			chunk: dummyChunk(),
+			err:   ErrWrongMetadata,
+			f:     func(c *Chunk, _ []byte) { c.Fingerprint++ },
+		},
+
+		// Metadata test should fail
+		{
+			chunk: dummyChunk(),
+			err:   ErrWrongMetadata,
+			f:     func(c *Chunk, _ []byte) { c.UserID = "foo" },
+		},
+	} {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			buf, err := c.chunk.encode()
+			require.NoError(t, err)
+
+			have, err := parseExternalKey(userID, c.chunk.externalKey())
+			require.NoError(t, err)
+
+			if c.f != nil {
+				c.f(&have, buf)
+			}
+
+			err = have.decode(buf)
+			require.Equal(t, err, c.err)
+
+			if c.err == nil {
+				require.Equal(t, have, c.chunk)
+			}
+		})
+	}
+}
+
+func TestParseExternalKey(t *testing.T) {
+	for _, c := range []struct {
+		key   string
+		chunk Chunk
+		err   error
+	}{
+		{key: "2:1484661279394:1484664879394", chunk: Chunk{
+			UserID:      userID,
+			Fingerprint: model.Fingerprint(2),
+			From:        model.Time(1484661279394),
+			Through:     model.Time(1484664879394),
+		}},
+
+		{key: userID + "/2:270d8f00:270d8f00:f84c5745", chunk: Chunk{
+			UserID:      userID,
+			Fingerprint: model.Fingerprint(2),
+			From:        model.Time(655200000),
+			Through:     model.Time(655200000),
+			ChecksumSet: true,
+			Checksum:    4165752645,
+		}},
+
+		{key: "invalidUserID/2:270d8f00:270d8f00:f84c5745", chunk: Chunk{}, err: ErrWrongMetadata},
+	} {
+		chunk, err := parseExternalKey(userID, c.key)
+		require.Equal(t, c.err, err)
+		require.Equal(t, c.chunk, chunk)
+	}
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
new file mode 100644
index 0000000000000..e1458f7c3228d
--- /dev/null
+++ b/inmemory_storage_client.go
@@ -0,0 +1,280 @@
+package chunk
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"sync"
+
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/prometheus/common/log"
+	"golang.org/x/net/context"
+)
+
+// MockStorage is a fake in-memory StorageClient.
+type MockStorage struct {
+	mtx     sync.RWMutex
+	tables  map[string]*mockTable
+	objects map[string][]byte
+}
+
+type mockTable struct {
+	items       map[string][]mockItem
+	write, read int64
+}
+
+type mockItem struct {
+	rangeValue []byte
+	value      []byte
+}
+
+// NewMockStorage creates a new MockStorage.
+func NewMockStorage() *MockStorage {
+	return &MockStorage{
+		tables:  map[string]*mockTable{},
+		objects: map[string][]byte{},
+	}
+}
+
+// ListTables implements StorageClient.
+func (m *MockStorage) ListTables(_ context.Context) ([]string, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	var tableNames []string
+	for tableName := range m.tables {
+		func(tableName string) {
+			tableNames = append(tableNames, tableName)
+		}(tableName)
+	}
+	return tableNames, nil
+}
+
+// CreateTable implements StorageClient.
+func (m *MockStorage) CreateTable(_ context.Context, name string, read, write int64) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	if _, ok := m.tables[name]; ok {
+		return fmt.Errorf("table already exists")
+	}
+
+	m.tables[name] = &mockTable{
+		items: map[string][]mockItem{},
+		write: write,
+		read:  read,
+	}
+
+	return nil
+}
+
+// DescribeTable implements StorageClient.
+func (m *MockStorage) DescribeTable(_ context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	table, ok := m.tables[name]
+	if !ok {
+		return 0, 0, "", fmt.Errorf("not found")
+	}
+
+	return table.read, table.write, dynamodb.TableStatusActive, nil
+}
+
+// UpdateTable implements StorageClient.
+func (m *MockStorage) UpdateTable(_ context.Context, name string, readCapacity, writeCapacity int64) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	table, ok := m.tables[name]
+	if !ok {
+		return fmt.Errorf("not found")
+	}
+
+	table.read = readCapacity
+	table.write = writeCapacity
+
+	return nil
+}
+
+// NewWriteBatch implements StorageClient.
+func (m *MockStorage) NewWriteBatch() WriteBatch {
+	return &mockWriteBatch{}
+}
+
+// BatchWrite implements StorageClient.
+func (m *MockStorage) BatchWrite(_ context.Context, batch WriteBatch) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	mockBatch := *batch.(*mockWriteBatch)
+	seenWrites := map[string]bool{}
+
+	for _, req := range mockBatch {
+		table, ok := m.tables[req.tableName]
+		if !ok {
+			return fmt.Errorf("table not found")
+		}
+
+		// Check for duplicate writes by RangeKey in same batch
+		key := fmt.Sprintf("%s:%s:%x", req.tableName, req.hashValue, req.rangeValue)
+		if _, ok := seenWrites[key]; ok {
+			return fmt.Errorf("Dupe write in batch")
+		}
+		seenWrites[key] = true
+
+		log.Debugf("Write %s/%x", req.hashValue, req.rangeValue)
+
+		items := table.items[req.hashValue]
+
+		// insert in order
+		i := sort.Search(len(items), func(i int) bool {
+			return bytes.Compare(items[i].rangeValue, req.rangeValue) >= 0
+		})
+		if i >= len(items) || !bytes.Equal(items[i].rangeValue, req.rangeValue) {
+			items = append(items, mockItem{})
+			copy(items[i+1:], items[i:])
+		} else {
+			// Return error if duplicate write and not metric name entry
+			itemComponents := decodeRangeKey(items[i].rangeValue)
+			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) {
+				return fmt.Errorf("Dupe write")
+			}
+		}
+		items[i] = mockItem{
+			rangeValue: req.rangeValue,
+			value:      req.value,
+		}
+
+		table.items[req.hashValue] = items
+	}
+	return nil
+}
+
+// QueryPages implements StorageClient.
+func (m *MockStorage) QueryPages(_ context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	table, ok := m.tables[query.TableName]
+	if !ok {
+		return fmt.Errorf("table not found")
+	}
+
+	items, ok := table.items[query.HashValue]
+	if !ok {
+		return nil
+	}
+
+	if query.RangeValuePrefix != nil {
+		log.Debugf("Lookup prefix %s/%x (%d)", query.HashValue, query.RangeValuePrefix, len(items))
+
+		// the smallest index i in [0, n) at which f(i) is true
+		i := sort.Search(len(items), func(i int) bool {
+			if bytes.Compare(items[i].rangeValue, query.RangeValuePrefix) > 0 {
+				return true
+			}
+			return bytes.HasPrefix(items[i].rangeValue, query.RangeValuePrefix)
+		})
+		j := sort.Search(len(items)-i, func(j int) bool {
+			if bytes.Compare(items[i+j].rangeValue, query.RangeValuePrefix) < 0 {
+				return false
+			}
+			return !bytes.HasPrefix(items[i+j].rangeValue, query.RangeValuePrefix)
+		})
+
+		log.Debugf("  found range [%d:%d)", i, i+j)
+		if i > len(items) || j == 0 {
+			return nil
+		}
+		items = items[i : i+j]
+
+	} else if query.RangeValueStart != nil {
+		log.Debugf("Lookup range %s/%x -> ... (%d)", query.HashValue, query.RangeValueStart, len(items))
+
+		// the smallest index i in [0, n) at which f(i) is true
+		i := sort.Search(len(items), func(i int) bool {
+			return bytes.Compare(items[i].rangeValue, query.RangeValueStart) >= 0
+		})
+
+		log.Debugf("  found range [%d)", i)
+		if i > len(items) {
+			return nil
+		}
+		items = items[i:]
+
+	} else {
+		log.Debugf("Lookup %s/* (%d)", query.HashValue, len(items))
+	}
+
+	// Filters
+	if query.ValueEqual != nil {
+		log.Debugf("Filter Value EQ = %s", query.ValueEqual)
+
+		filtered := make([]mockItem, 0)
+		for _, v := range items {
+			if bytes.Equal(v.value, query.ValueEqual) {
+				filtered = append(filtered, v)
+			}
+		}
+		items = filtered
+	}
+
+	result := mockReadBatch{}
+	for _, item := range items {
+		result = append(result, item)
+	}
+
+	callback(result, true)
+	return nil
+}
+
+// PutChunk implements S3Client.
+func (m *MockStorage) PutChunk(_ context.Context, key string, buf []byte) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	m.objects[key] = buf
+	return nil
+}
+
+// GetChunk implements S3Client.
+func (m *MockStorage) GetChunk(_ context.Context, key string) ([]byte, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	buf, ok := m.objects[key]
+	if !ok {
+		return nil, fmt.Errorf("%v not found", key)
+	}
+
+	return buf, nil
+}
+
+type mockWriteBatch []struct {
+	tableName, hashValue string
+	rangeValue           []byte
+	value                []byte
+}
+
+func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	*b = append(*b, struct {
+		tableName, hashValue string
+		rangeValue           []byte
+		value                []byte
+	}{tableName, hashValue, rangeValue, value})
+}
+
+type mockReadBatch []mockItem
+
+func (b mockReadBatch) Len() int {
+	return len(b)
+}
+
+func (b mockReadBatch) RangeValue(i int) []byte {
+	return b[i].rangeValue
+}
+
+func (b mockReadBatch) Value(i int) []byte {
+	return b[i].value
+}
diff --git a/memcache_client.go b/memcache_client.go
new file mode 100644
index 0000000000000..1b5b976d4a19f
--- /dev/null
+++ b/memcache_client.go
@@ -0,0 +1,106 @@
+package chunk
+
+import (
+	"flag"
+	"fmt"
+	"net"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/prometheus/common/log"
+)
+
+// MemcacheClient is a memcache client that gets its server list from SRV
+// records, and periodically updates that ServerList.
+type MemcacheClient struct {
+	*memcache.Client
+	serverList *memcache.ServerList
+	hostname   string
+	service    string
+
+	quit chan struct{}
+	wait sync.WaitGroup
+}
+
+// MemcacheConfig defines how a MemcacheClient should be constructed.
+type MemcacheConfig struct {
+	Host           string
+	Service        string
+	Timeout        time.Duration
+	UpdateInterval time.Duration
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *MemcacheConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Host, "memcached.hostname", "", "Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
+	f.StringVar(&cfg.Service, "memcached.service", "memcached", "SRV service used to discover memcache servers.")
+	f.DurationVar(&cfg.Timeout, "memcached.timeout", 100*time.Millisecond, "Maximum time to wait before giving up on memcached requests.")
+	f.DurationVar(&cfg.UpdateInterval, "memcached.update-interval", 1*time.Minute, "Period with which to poll DNS for memcache servers.")
+}
+
+// NewMemcacheClient creates a new MemcacheClient that gets its server list
+// from SRV and updates the server list on a regular basis.
+func NewMemcacheClient(cfg MemcacheConfig) *MemcacheClient {
+	var servers memcache.ServerList
+	client := memcache.NewFromSelector(&servers)
+	client.Timeout = cfg.Timeout
+
+	newClient := &MemcacheClient{
+		Client:     client,
+		serverList: &servers,
+		hostname:   cfg.Host,
+		service:    cfg.Service,
+		quit:       make(chan struct{}),
+	}
+	err := newClient.updateMemcacheServers()
+	if err != nil {
+		log.Errorf("Error setting memcache servers to '%v': %v", cfg.Host, err)
+	}
+
+	newClient.wait.Add(1)
+	go newClient.updateLoop(cfg.UpdateInterval)
+	return newClient
+}
+
+// Stop the memcache client.
+func (c *MemcacheClient) Stop() {
+	close(c.quit)
+	c.wait.Wait()
+}
+
+func (c *MemcacheClient) updateLoop(updateInterval time.Duration) error {
+	defer c.wait.Done()
+	ticker := time.NewTicker(updateInterval)
+	var err error
+	for {
+		select {
+		case <-ticker.C:
+			err = c.updateMemcacheServers()
+			if err != nil {
+				log.Warnf("Error updating memcache servers: %v", err)
+			}
+		case <-c.quit:
+			ticker.Stop()
+		}
+	}
+}
+
+// updateMemcacheServers sets a memcache server list from SRV records. SRV
+// priority & weight are ignored.
+func (c *MemcacheClient) updateMemcacheServers() error {
+	_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
+	if err != nil {
+		return err
+	}
+	var servers []string
+	for _, srv := range addrs {
+		servers = append(servers, fmt.Sprintf("%s:%d", srv.Target, srv.Port))
+	}
+	// ServerList deterministically maps keys to _index_ of the server list.
+	// Since DNS returns records in different order each time, we sort to
+	// guarantee best possible match between nodes.
+	sort.Strings(servers)
+	return c.serverList.SetServers(servers...)
+}
diff --git a/schema.go b/schema.go
new file mode 100644
index 0000000000000..298c4bedb39f6
--- /dev/null
+++ b/schema.go
@@ -0,0 +1,538 @@
+package chunk
+
+import (
+	"crypto/sha1"
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+var (
+	chunkTimeRangeKeyV1  = []byte{'1'}
+	chunkTimeRangeKeyV2  = []byte{'2'}
+	chunkTimeRangeKeyV3  = []byte{'3'}
+	chunkTimeRangeKeyV4  = []byte{'4'}
+	chunkTimeRangeKeyV5  = []byte{'5'}
+	metricNameRangeKeyV1 = []byte{'6'}
+)
+
+// Errors
+var (
+	ErrNoMetricNameNotSupported = errors.New("metric name required for pre-v7 schemas")
+)
+
+// Schema interface defines methods to calculate the hash and range keys needed
+// to write or read chunks from the external index.
+type Schema interface {
+	// When doing a write, use this method to return the list of entries you should write to.
+	GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+
+	// When doing a read, use these methods to return the list of entries you should query
+	GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error)
+	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
+	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
+	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+}
+
+// IndexQuery describes a query for entries
+type IndexQuery struct {
+	TableName string
+	HashValue string
+
+	// One of RangeValuePrefix or RangeValueStart might be set:
+	// - If RangeValuePrefix is not nil, must read all keys with that prefix.
+	// - If RangeValueStart is not nil, must read all keys from there onwards.
+	// - If neither is set, must read all keys for that row.
+	RangeValuePrefix []byte
+	RangeValueStart  []byte
+
+	// Filters for querying
+	ValueEqual []byte
+}
+
+// IndexEntry describes an entry in the chunk index
+type IndexEntry struct {
+	TableName string
+	HashValue string
+
+	// For writes, RangeValue will always be set.
+	RangeValue []byte
+
+	// New for v6 schema, label value is not written as part of the range key.
+	Value []byte
+}
+
+// v1Schema was:
+// - hash key: <userid>:<hour bucket>:<metric name>
+// - range key: <label name>\0<label value>\0<chunk name>
+func v1Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.hourlyBuckets,
+		originalEntries{},
+	}
+}
+
+// v2Schema went to daily buckets in the hash key
+// - hash key: <userid>:d<day bucket>:<metric name>
+func v2Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		originalEntries{},
+	}
+}
+
+// v3Schema went to base64 encoded label values & a version ID
+// - range key: <label name>\0<base64(label value)>\0<chunk name>\0<version 1>
+func v3Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		base64Entries{originalEntries{}},
+	}
+}
+
+// v4 schema went to two schemas in one:
+// 1) - hash key: <userid>:<hour bucket>:<metric name>:<label name>
+//    - range key: \0<base64(label value)>\0<chunk name>\0<version 2>
+// 2) - hash key: <userid>:<hour bucket>:<metric name>
+//    - range key: \0\0<chunk name>\0<version 3>
+func v4Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		labelNameInHashKeyEntries{},
+	}
+}
+
+// v5 schema is an extension of v4, with the chunk end time in the
+// range key to improve query latency.  However, it did it wrong
+// so the chunk end times are ignored.
+func v5Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		v5Entries{},
+	}
+}
+
+// v6 schema is an extension of v5, with correct chunk end times, and
+// the label value moved out of the range key.
+func v6Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		v6Entries{},
+	}
+}
+
+// v7 schema is an extension of v6, with support for queries with no metric names
+func v7Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		v7Entries{},
+	}
+}
+
+// schema implements Schema given a bucketing function and and set of range key callbacks
+type schema struct {
+	buckets func(from, through model.Time, userID string) []Bucket
+	entries entries
+}
+
+func (s schema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	var result []IndexEntry
+
+	for _, bucket := range s.buckets(from, through, userID) {
+		entries, err := s.entries.GetWriteEntries(bucket, metricName, labels, chunkID)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
+func (s schema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetReadQueries(bucket)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
+func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetReadMetricQueries(bucket, metricName)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
+func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetReadMetricLabelQueries(bucket, metricName, labelName)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
+func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetReadMetricLabelValueQueries(bucket, metricName, labelName, labelValue)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
+type entries interface {
+	GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetReadQueries(bucket Bucket) ([]IndexQuery, error)
+	GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error)
+	GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
+	GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+}
+
+type originalEntries struct{}
+
+func (originalEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	chunkIDBytes := []byte(chunkID)
+	result := []IndexEntry{}
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		if strings.ContainsRune(string(value), '\x00') {
+			return nil, fmt.Errorf("label values cannot contain null byte")
+		}
+		result = append(result, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey([]byte(key), []byte(value), chunkIDBytes),
+		})
+	}
+	return result, nil
+}
+
+func (originalEntries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName:        bucket.tableName,
+			HashValue:        bucket.hashKey + ":" + string(metricName),
+			RangeValuePrefix: nil,
+		},
+	}, nil
+}
+
+func (originalEntries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName:        bucket.tableName,
+			HashValue:        bucket.hashKey + ":" + string(metricName),
+			RangeValuePrefix: encodeRangeKey([]byte(labelName)),
+		},
+	}, nil
+}
+
+func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	if strings.ContainsRune(string(labelValue), '\x00') {
+		return nil, fmt.Errorf("label values cannot contain null byte")
+	}
+	return []IndexQuery{
+		{
+			TableName:        bucket.tableName,
+			HashValue:        bucket.hashKey + ":" + string(metricName),
+			RangeValuePrefix: encodeRangeKey([]byte(labelName), []byte(labelValue)),
+		},
+	}, nil
+}
+
+type base64Entries struct {
+	originalEntries
+}
+
+func (base64Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	chunkIDBytes := []byte(chunkID)
+	result := []IndexEntry{}
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+
+		encodedBytes := encodeBase64Value(value)
+		result = append(result, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey([]byte(key), encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
+		})
+	}
+	return result, nil
+}
+
+func (base64Entries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	encodedBytes := encodeBase64Value(labelValue)
+	return []IndexQuery{
+		{
+			TableName:        bucket.tableName,
+			HashValue:        bucket.hashKey + ":" + string(metricName),
+			RangeValuePrefix: encodeRangeKey([]byte(labelName), encodedBytes),
+		},
+	}, nil
+}
+
+type labelNameInHashKeyEntries struct{}
+
+func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	chunkIDBytes := []byte(chunkID)
+	entries := []IndexEntry{
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey(nil, nil, chunkIDBytes, chunkTimeRangeKeyV2),
+		},
+	}
+
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		encodedBytes := encodeBase64Value(value)
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			RangeValue: encodeRangeKey(nil, encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
+		})
+	}
+
+	return entries, nil
+}
+
+func (labelNameInHashKeyEntries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: bucket.hashKey + ":" + string(metricName),
+		},
+	}, nil
+}
+
+func (labelNameInHashKeyEntries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+		},
+	}, nil
+}
+
+func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	encodedBytes := encodeBase64Value(labelValue)
+	return []IndexQuery{
+		{
+			TableName:        bucket.tableName,
+			HashValue:        fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+			RangeValuePrefix: encodeRangeKey(nil, encodedBytes),
+		},
+	}, nil
+}
+
+// v5Entries includes chunk end time in range key - see #298.
+type v5Entries struct{}
+
+func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	chunkIDBytes := []byte(chunkID)
+	encodedThroughBytes := encodeTime(bucket.through)
+
+	entries := []IndexEntry{
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
+		},
+	}
+
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		encodedValueBytes := encodeBase64Value(value)
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			RangeValue: encodeRangeKey(encodedThroughBytes, encodedValueBytes, chunkIDBytes, chunkTimeRangeKeyV4),
+		})
+	}
+
+	return entries, nil
+}
+
+func (v5Entries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: bucket.hashKey + ":" + string(metricName),
+		},
+	}, nil
+}
+
+func (v5Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+		},
+	}, nil
+}
+
+func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, _ model.LabelValue) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+		},
+	}, nil
+}
+
+// v6Entries fixes issues with v5 time encoding being wrong (see #337), and
+// moves label value out of range key (see #199).
+type v6Entries struct{}
+
+func (v6Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	chunkIDBytes := []byte(chunkID)
+	encodedThroughBytes := encodeTime(bucket.through)
+
+	entries := []IndexEntry{
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
+		},
+	}
+
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV5),
+			Value:      []byte(value),
+		})
+	}
+
+	return entries, nil
+}
+
+func (v6Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	encodedFromBytes := encodeTime(bucket.from)
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       bucket.hashKey + ":" + string(metricName),
+			RangeValueStart: encodeRangeKey(encodedFromBytes),
+		},
+	}, nil
+}
+
+func (v6Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	encodedFromBytes := encodeTime(bucket.from)
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+			RangeValueStart: encodeRangeKey(encodedFromBytes),
+		},
+	}, nil
+}
+
+func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	encodedFromBytes := encodeTime(bucket.from)
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			ValueEqual:      []byte(labelValue),
+		},
+	}, nil
+}
+
+// v7Entries supports queries with no metric name
+type v7Entries struct {
+	v6Entries
+}
+
+func (entries v7Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	indexEntries, err := entries.v6Entries.GetWriteEntries(bucket, metricName, labels, chunkID)
+	if err != nil {
+		return nil, err
+	}
+
+	metricName, err = util.ExtractMetricNameFromMetric(labels)
+	if err != nil {
+		return nil, err
+	}
+	metricNameHashBytes := sha1.Sum([]byte(metricName))
+
+	// Add IndexEntry for metric name with userID:bigBucket HashValue
+	indexEntries = append(indexEntries, IndexEntry{
+		TableName:  bucket.tableName,
+		HashValue:  bucket.hashKey,
+		RangeValue: encodeRangeKey(encodeBase64Bytes(metricNameHashBytes[:]), nil, nil, metricNameRangeKeyV1),
+		Value:      []byte(metricName),
+	})
+
+	return indexEntries, nil
+}
+
+func (v7Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: bucket.hashKey,
+		},
+	}, nil
+}
diff --git a/schema_config.go b/schema_config.go
new file mode 100644
index 0000000000000..11398c71fdfcb
--- /dev/null
+++ b/schema_config.go
@@ -0,0 +1,329 @@
+package chunk
+
+import (
+	"flag"
+	"fmt"
+	"sort"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	secondsInHour      = int64(time.Hour / time.Second)
+	secondsInDay       = int64(24 * time.Hour / time.Second)
+	millisecondsInHour = int64(time.Hour / time.Millisecond)
+	millisecondsInDay  = int64(24 * time.Hour / time.Millisecond)
+)
+
+// SchemaConfig contains the config for our chunk index schemas
+type SchemaConfig struct {
+	PeriodicTableConfig
+	OriginalTableName string
+
+	// After midnight on this day, we start bucketing indexes by day instead of by
+	// hour.  Only the day matters, not the time within the day.
+	DailyBucketsFrom util.DayValue
+
+	// After this time, we will only query for base64-encoded label values.
+	Base64ValuesFrom util.DayValue
+
+	// After this time, we will read and write v4 schemas.
+	V4SchemaFrom util.DayValue
+
+	// After this time, we will read and write v5 schemas.
+	V5SchemaFrom util.DayValue
+
+	// After this time, we will read and write v6 schemas.
+	V6SchemaFrom util.DayValue
+
+	// After this time, we will read and write v7 schemas.
+	V7SchemaFrom util.DayValue
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
+	cfg.PeriodicTableConfig.RegisterFlags(f)
+
+	flag.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
+	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
+	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
+	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
+	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
+	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
+	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema.")
+}
+
+func (cfg *SchemaConfig) tableForBucket(bucketStart int64) string {
+	if !cfg.UsePeriodicTables || bucketStart < (cfg.PeriodicTableStartAt.Unix()) {
+		return cfg.OriginalTableName
+	}
+	// TODO remove reference to time package here
+	return cfg.TablePrefix + strconv.Itoa(int(bucketStart/int64(cfg.TablePeriod/time.Second)))
+}
+
+// Bucket describes a range of time with a tableName and hashKey
+type Bucket struct {
+	from      uint32
+	through   uint32
+	tableName string
+	hashKey   string
+}
+
+func (cfg SchemaConfig) hourlyBuckets(from, through model.Time, userID string) []Bucket {
+	var (
+		fromHour    = from.Unix() / secondsInHour
+		throughHour = through.Unix() / secondsInHour
+		result      = []Bucket{}
+	)
+
+	// If through ends on the hour, don't include the upcoming hour
+	if through.Unix()%secondsInHour == 0 {
+		throughHour--
+	}
+
+	for i := fromHour; i <= throughHour; i++ {
+		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInHour))
+		relativeThrough := util.Min64(millisecondsInHour, int64(through)-(i*millisecondsInHour))
+		result = append(result, Bucket{
+			from:      uint32(relativeFrom),
+			through:   uint32(relativeThrough),
+			tableName: cfg.tableForBucket(i * secondsInHour),
+			hashKey:   fmt.Sprintf("%s:%d", userID, i),
+		})
+	}
+	return result
+}
+
+func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []Bucket {
+	var (
+		fromDay    = from.Unix() / secondsInDay
+		throughDay = through.Unix() / secondsInDay
+		result     = []Bucket{}
+	)
+
+	// If through ends on 00:00 of the day, don't include the upcoming day
+	if through.Unix()%secondsInDay == 0 {
+		throughDay--
+	}
+
+	for i := fromDay; i <= throughDay; i++ {
+		// The idea here is that the hash key contains the bucket start time (rounded to
+		// the nearest day).  The range key can contain the offset from that, to the
+		// (start/end) of the chunk. For chunks that span multiple buckets, these
+		// offsets will be capped to the bucket boundaries, i.e. start will be
+		// positive in the first bucket, then zero in the next etc.
+		//
+		// The reason for doing all this is to reduce the size of the time stamps we
+		// include in the range keys - we use a uint32 - as we then have to base 32
+		// encode it.
+
+		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInDay))
+		relativeThrough := util.Min64(millisecondsInDay, int64(through)-(i*millisecondsInDay))
+		result = append(result, Bucket{
+			from:      uint32(relativeFrom),
+			through:   uint32(relativeThrough),
+			tableName: cfg.tableForBucket(i * secondsInDay),
+			hashKey:   fmt.Sprintf("%s:d%d", userID, i),
+		})
+	}
+	return result
+}
+
+// compositeSchema is a Schema which delegates to various schemas depending
+// on when they were activated.
+type compositeSchema struct {
+	schemas []compositeSchemaEntry
+}
+
+type compositeSchemaEntry struct {
+	start model.Time
+	Schema
+}
+
+type byStart []compositeSchemaEntry
+
+func (a byStart) Len() int           { return len(a) }
+func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byStart) Less(i, j int) bool { return a[i].start < a[j].start }
+
+func newCompositeSchema(cfg SchemaConfig) (Schema, error) {
+	schemas := []compositeSchemaEntry{
+		{0, v1Schema(cfg)},
+	}
+
+	if cfg.DailyBucketsFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.DailyBucketsFrom.Time, v2Schema(cfg)})
+	}
+
+	if cfg.Base64ValuesFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.Base64ValuesFrom.Time, v3Schema(cfg)})
+	}
+
+	if cfg.V4SchemaFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.V4SchemaFrom.Time, v4Schema(cfg)})
+	}
+
+	if cfg.V5SchemaFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.V5SchemaFrom.Time, v5Schema(cfg)})
+	}
+
+	if cfg.V6SchemaFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.V6SchemaFrom.Time, v6Schema(cfg)})
+	}
+
+	if cfg.V7SchemaFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.V7SchemaFrom.Time, v7Schema(cfg)})
+	}
+
+	if !sort.IsSorted(byStart(schemas)) {
+		return nil, fmt.Errorf("schemas not in time-sorted order")
+	}
+
+	return compositeSchema{schemas}, nil
+}
+
+func (c compositeSchema) forSchemasIndexQuery(from, through model.Time, callback func(from, through model.Time, schema Schema) ([]IndexQuery, error)) ([]IndexQuery, error) {
+	if len(c.schemas) == 0 {
+		return nil, nil
+	}
+
+	// first, find the schema with the highest start _before or at_ from
+	i := sort.Search(len(c.schemas), func(i int) bool {
+		return c.schemas[i].start > from
+	})
+	if i > 0 {
+		i--
+	} else {
+		// This could happen if we get passed a sample from before 1970.
+		i = 0
+		from = c.schemas[0].start
+	}
+
+	// next, find the schema with the lowest start _after_ through
+	j := sort.Search(len(c.schemas), func(j int) bool {
+		return c.schemas[j].start > through
+	})
+
+	min := func(a, b model.Time) model.Time {
+		if a < b {
+			return a
+		}
+		return b
+	}
+
+	start := from
+	result := []IndexQuery{}
+	for ; i < j; i++ {
+		nextSchemaStarts := model.Latest
+		if i+1 < len(c.schemas) {
+			nextSchemaStarts = c.schemas[i+1].start
+		}
+
+		// If the next schema starts at the same time as this one,
+		// skip this one.
+		if nextSchemaStarts == c.schemas[i].start {
+			continue
+		}
+
+		end := min(through, nextSchemaStarts-1)
+		entries, err := callback(start, end, c.schemas[i].Schema)
+		if err != nil {
+			return nil, err
+		}
+
+		result = append(result, entries...)
+		start = nextSchemaStarts
+	}
+
+	return result, nil
+}
+
+func (c compositeSchema) forSchemasIndexEntry(from, through model.Time, callback func(from, through model.Time, schema Schema) ([]IndexEntry, error)) ([]IndexEntry, error) {
+	if len(c.schemas) == 0 {
+		return nil, nil
+	}
+
+	// first, find the schema with the highest start _before or at_ from
+	i := sort.Search(len(c.schemas), func(i int) bool {
+		return c.schemas[i].start > from
+	})
+	if i > 0 {
+		i--
+	} else {
+		// This could happen if we get passed a sample from before 1970.
+		i = 0
+		from = c.schemas[0].start
+	}
+
+	// next, find the schema with the lowest start _after_ through
+	j := sort.Search(len(c.schemas), func(j int) bool {
+		return c.schemas[j].start > through
+	})
+
+	min := func(a, b model.Time) model.Time {
+		if a < b {
+			return a
+		}
+		return b
+	}
+
+	start := from
+	result := []IndexEntry{}
+	for ; i < j; i++ {
+		nextSchemaStarts := model.Latest
+		if i+1 < len(c.schemas) {
+			nextSchemaStarts = c.schemas[i+1].start
+		}
+
+		// If the next schema starts at the same time as this one,
+		// skip this one.
+		if nextSchemaStarts == c.schemas[i].start {
+			continue
+		}
+
+		end := min(through, nextSchemaStarts-1)
+		entries, err := callback(start, end, c.schemas[i].Schema)
+		if err != nil {
+			return nil, err
+		}
+
+		result = append(result, entries...)
+		start = nextSchemaStarts
+	}
+
+	return result, nil
+}
+
+func (c compositeSchema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return c.forSchemasIndexEntry(from, through, func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
+		return schema.GetWriteEntries(from, through, userID, metricName, labels, chunkID)
+	})
+}
+
+func (c compositeSchema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
+	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
+		return schema.GetReadQueries(from, through, userID)
+	})
+}
+
+func (c compositeSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
+		return schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	})
+}
+
+func (c compositeSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
+		return schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+	})
+}
+
+func (c compositeSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
+		return schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+	})
+}
diff --git a/schema_config_test.go b/schema_config_test.go
new file mode 100644
index 0000000000000..2ca3816ff0716
--- /dev/null
+++ b/schema_config_test.go
@@ -0,0 +1,321 @@
+package chunk
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/common/test"
+)
+
+func TestHourlyBuckets(t *testing.T) {
+	const (
+		userID     = "0"
+		metricName = model.LabelValue("name")
+		tableName  = "table"
+	)
+	var cfg = SchemaConfig{OriginalTableName: tableName}
+
+	type args struct {
+		from    model.Time
+		through model.Time
+	}
+	tests := []struct {
+		name string
+		args args
+		want []Bucket
+	}{
+		{
+			"0 hour window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(0),
+			},
+			[]Bucket{},
+		},
+		{
+			"30 minute window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(1800),
+			},
+			[]Bucket{{
+				from:      0,
+				through:   1800 * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:0",
+			}},
+		},
+		{
+			"1 hour window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(3600),
+			},
+			[]Bucket{{
+				from:      0,
+				through:   3600 * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:0",
+			}},
+		},
+		{
+			"window spanning 3 hours with non-zero start",
+			args{
+				from:    model.TimeFromUnix(900),
+				through: model.TimeFromUnix((2 * 3600) + 1800),
+			},
+			[]Bucket{{
+				from:      900 * 1000,  // ms
+				through:   3600 * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:0",
+			}, {
+				from:      0,
+				through:   3600 * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:1",
+			}, {
+				from:      0,
+				through:   1800 * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:2",
+			}},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := cfg.hourlyBuckets(tt.args.from, tt.args.through, userID); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("SchemaConfig.dailyBuckets() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestDailyBuckets(t *testing.T) {
+	const (
+		userID     = "0"
+		metricName = model.LabelValue("name")
+		tableName  = "table"
+	)
+	var cfg = SchemaConfig{OriginalTableName: tableName}
+
+	type args struct {
+		from    model.Time
+		through model.Time
+	}
+	tests := []struct {
+		name string
+		args args
+		want []Bucket
+	}{
+		{
+			"0 day window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(0),
+			},
+			[]Bucket{},
+		},
+		{
+			"6 hour window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(6 * 3600),
+			},
+			[]Bucket{{
+				from:      0,
+				through:   (6 * 3600) * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:d0",
+			}},
+		},
+		{
+			"1 day window",
+			args{
+				from:    model.TimeFromUnix(0),
+				through: model.TimeFromUnix(24 * 3600),
+			},
+			[]Bucket{{
+				from:      0,
+				through:   (24 * 3600) * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:d0",
+			}},
+		},
+		{
+			"window spanning 3 days with non-zero start",
+			args{
+				from:    model.TimeFromUnix(6 * 3600),
+				through: model.TimeFromUnix((2 * 24 * 3600) + (12 * 3600)),
+			},
+			[]Bucket{{
+				from:      (6 * 3600) * 1000,  // ms
+				through:   (24 * 3600) * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:d0",
+			}, {
+				from:      0,
+				through:   (24 * 3600) * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:d1",
+			}, {
+				from:      0,
+				through:   (12 * 3600) * 1000, // ms
+				tableName: "table",
+				hashKey:   "0:d2",
+			}},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := cfg.dailyBuckets(tt.args.from, tt.args.through, userID); !reflect.DeepEqual(got, tt.want) {
+				t.Errorf("SchemaConfig.dailyBuckets() = %v, want %v", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestCompositeSchema(t *testing.T) {
+	type result struct {
+		from, through model.Time
+		schema        Schema
+	}
+	collect := func(results *[]result) func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
+		return func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
+			*results = append(*results, result{from, through, schema})
+			return nil, nil
+		}
+	}
+	cs := compositeSchema{
+		schemas: []compositeSchemaEntry{
+			{model.TimeFromUnix(0), mockSchema(1)},
+			{model.TimeFromUnix(100), mockSchema(2)},
+			{model.TimeFromUnix(200), mockSchema(3)},
+		},
+	}
+
+	for i, tc := range []struct {
+		cs            compositeSchema
+		from, through int64
+		want          []result
+	}{
+		// Test we have sensible results when there are no schema's defined
+		{compositeSchema{}, 0, 1, []result{}},
+
+		// Test we have sensible results when there is a single schema
+		{
+			compositeSchema{
+				schemas: []compositeSchemaEntry{
+					{model.TimeFromUnix(0), mockSchema(1)},
+				},
+			},
+			0, 10,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockSchema(1)},
+			},
+		},
+
+		// Test we have sensible results for negative (ie pre 1970) times
+		{
+			compositeSchema{
+				schemas: []compositeSchemaEntry{
+					{model.TimeFromUnix(0), mockSchema(1)},
+				},
+			},
+			-10, -9,
+			[]result{},
+		},
+		{
+			compositeSchema{
+				schemas: []compositeSchemaEntry{
+					{model.TimeFromUnix(0), mockSchema(1)},
+				},
+			},
+			-10, 10,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockSchema(1)},
+			},
+		},
+
+		// Test we have sensible results when there is two schemas
+		{
+			compositeSchema{
+				schemas: []compositeSchemaEntry{
+					{model.TimeFromUnix(0), mockSchema(1)},
+					{model.TimeFromUnix(100), mockSchema(2)},
+				},
+			},
+			34, 165,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockSchema(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockSchema(2)},
+			},
+		},
+
+		// Test we get only one result when two schema start at same time
+		{
+			compositeSchema{
+				schemas: []compositeSchemaEntry{
+					{model.TimeFromUnix(0), mockSchema(1)},
+					{model.TimeFromUnix(10), mockSchema(2)},
+					{model.TimeFromUnix(10), mockSchema(3)},
+				},
+			},
+			0, 165,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10) - 1, mockSchema(1)},
+				{model.TimeFromUnix(10), model.TimeFromUnix(165), mockSchema(3)},
+			},
+		},
+
+		// Test all the various combination we can get when there are three schemas
+		{
+			cs, 34, 65,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(65), mockSchema(1)},
+			},
+		},
+
+		{
+			cs, 244, 6785,
+			[]result{
+				{model.TimeFromUnix(244), model.TimeFromUnix(6785), mockSchema(3)},
+			},
+		},
+
+		{
+			cs, 34, 165,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockSchema(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockSchema(2)},
+			},
+		},
+
+		{
+			cs, 151, 264,
+			[]result{
+				{model.TimeFromUnix(151), model.TimeFromUnix(200) - 1, mockSchema(2)},
+				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockSchema(3)},
+			},
+		},
+
+		{
+			cs, 32, 264,
+			[]result{
+				{model.TimeFromUnix(32), model.TimeFromUnix(100) - 1, mockSchema(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(200) - 1, mockSchema(2)},
+				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockSchema(3)},
+			},
+		},
+	} {
+		t.Run(fmt.Sprintf("TestSchemaComposite[%d]", i), func(t *testing.T) {
+			have := []result{}
+			tc.cs.forSchemasIndexEntry(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong schemas - %s", test.Diff(tc.want, have))
+			}
+		})
+	}
+}
diff --git a/schema_test.go b/schema_test.go
new file mode 100644
index 0000000000000..7eba7ed8aae79
--- /dev/null
+++ b/schema_test.go
@@ -0,0 +1,468 @@
+package chunk
+
+import (
+	"bytes"
+	"crypto/sha1"
+	"encoding/base64"
+	"fmt"
+	"reflect"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/test"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+type mockSchema int
+
+func (mockSchema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, nil
+}
+func (mockSchema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
+	return nil, nil
+}
+func (mockSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+	return nil, nil
+}
+func (mockSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return nil, nil
+}
+func (mockSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	return nil, nil
+}
+
+type ByHashRangeKey []IndexEntry
+
+func (a ByHashRangeKey) Len() int      { return len(a) }
+func (a ByHashRangeKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a ByHashRangeKey) Less(i, j int) bool {
+	if a[i].HashValue != a[j].HashValue {
+		return a[i].HashValue < a[j].HashValue
+	}
+	return bytes.Compare(a[i].RangeValue, a[j].RangeValue) < 0
+}
+
+func mergeResults(rss ...[]IndexEntry) []IndexEntry {
+	results := []IndexEntry{}
+	for _, rs := range rss {
+		results = append(results, rs...)
+	}
+	return results
+}
+
+func TestSchemaHashKeys(t *testing.T) {
+	mkResult := func(tableName, fmtStr string, from, through int) []IndexEntry {
+		want := []IndexEntry{}
+		for i := from; i < through; i++ {
+			want = append(want, IndexEntry{
+				TableName: tableName,
+				HashValue: fmt.Sprintf(fmtStr, i),
+			})
+		}
+		return want
+	}
+
+	const (
+		userID         = "userid"
+		table          = "table"
+		periodicPrefix = "periodicPrefix"
+	)
+
+	cfg := SchemaConfig{
+		OriginalTableName: table,
+
+		PeriodicTableConfig: PeriodicTableConfig{
+			UsePeriodicTables:    true,
+			TablePrefix:          periodicPrefix,
+			TablePeriod:          2 * 24 * time.Hour,
+			PeriodicTableStartAt: util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
+		},
+	}
+	compositeSchema := func(dailyBucketsFrom model.Time) Schema {
+		cfgCp := cfg
+		cfgCp.DailyBucketsFrom = util.NewDayValue(dailyBucketsFrom)
+		schema, err := newCompositeSchema(cfgCp)
+		if err != nil {
+			t.Fatal(err)
+		}
+		return schema
+	}
+	hourlyBuckets := v1Schema(cfg)
+	dailyBuckets := v3Schema(cfg)
+	labelBuckets := v4Schema(cfg)
+	metric := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar": "baz",
+	}
+	chunkID := "chunkID"
+
+	for i, tc := range []struct {
+		Schema
+		from, through int64
+		metricName    string
+		want          []IndexEntry
+	}{
+		// Basic test case for the various bucketing schemes
+		{
+			hourlyBuckets,
+			0, (30 * 60) - 1, "foo", // chunk is smaller than bucket
+			mkResult(table, "userid:%d:foo", 0, 1),
+		},
+		{
+			hourlyBuckets,
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mkResult(table, "userid:%d:foo", 0, 3*24),
+		},
+		{
+			hourlyBuckets,
+			0, 30 * 60, "foo", // chunk is smaller than bucket
+			mkResult(table, "userid:%d:foo", 0, 1),
+		},
+		{
+			dailyBuckets,
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mkResult(table, "userid:d%d:foo", 0, 3),
+		},
+		{
+			labelBuckets,
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(table, "userid:d%d:foo", 0, 3),
+				mkResult(table, "userid:d%d:foo:bar", 0, 3),
+			),
+		},
+
+		// Buckets are by hour until we reach the `dailyBucketsFrom`, after which they are by day.
+		{
+			compositeSchema(model.TimeFromUnix(0).Add(1 * 24 * time.Hour)),
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(table, "userid:%d:foo", 0, 1*24),
+				mkResult(table, "userid:d%d:foo", 1, 3),
+			),
+		},
+
+		// Only the day part of `dailyBucketsFrom` matters, not the time part.
+		{
+			compositeSchema(model.TimeFromUnix(0).Add(2*24*time.Hour) - 1),
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(table, "userid:%d:foo", 0, 1*24),
+				mkResult(table, "userid:d%d:foo", 1, 3),
+			),
+		},
+
+		// Moving dailyBucketsFrom to the previous day compared to the above makes 24 1-hour buckets disappear.
+		{
+			compositeSchema(model.TimeFromUnix(0).Add(1*24*time.Hour) - 1),
+			0, (3 * 24 * 60 * 60) - 1, "foo",
+			mkResult(table, "userid:d%d:foo", 0, 3),
+		},
+
+		// If `dailyBucketsFrom` is after the interval, everything will be bucketed by hour.
+		{
+			compositeSchema(model.TimeFromUnix(0).Add(99 * 24 * time.Hour)),
+			0, (2 * 24 * 60 * 60) - 1, "foo",
+			mkResult(table, "userid:%d:foo", 0, 2*24),
+		},
+
+		// Should only return daily buckets when dailyBucketsFrom is before the interval.
+		{
+			compositeSchema(model.TimeFromUnix(0)),
+			1 * 24 * 60 * 60, (3 * 24 * 60 * 60) - 1, "foo",
+			mkResult(table, "userid:d%d:foo", 1, 3),
+		},
+
+		// Basic weekly- ables.
+		{
+			compositeSchema(model.TimeFromUnix(0)),
+			5 * 24 * 60 * 60, (10 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
+				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
+				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
+			),
+		},
+
+		// Daily buckets + weekly tables.
+		{
+			compositeSchema(model.TimeFromUnix(0)),
+			0, (10 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(table, "userid:d%d:foo", 0, 5),
+				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
+				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
+				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
+			),
+		},
+
+		// Houly Buckets, then daily buckets, then weekly tables.
+		{
+			compositeSchema(model.TimeFromUnix(2 * 24 * 60 * 60)),
+			0, (10 * 24 * 60 * 60) - 1, "foo",
+			mergeResults(
+				mkResult(table, "userid:%d:foo", 0, 2*24),
+				mkResult(table, "userid:d%d:foo", 2, 5),
+				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
+				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
+				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
+			),
+		},
+	} {
+		t.Run(fmt.Sprintf("TestSchemaHashKeys[%d]", i), func(t *testing.T) {
+			have, err := tc.Schema.GetWriteEntries(
+				model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through),
+				userID, model.LabelValue(tc.metricName),
+				metric, chunkID,
+			)
+			if err != nil {
+				t.Fatal(err)
+			}
+			for i := range have {
+				have[i].RangeValue = nil
+			}
+			sort.Sort(ByHashRangeKey(have))
+			sort.Sort(ByHashRangeKey(tc.want))
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong hash buckets - %s", test.Diff(tc.want, have))
+			}
+		})
+	}
+}
+
+// range value types
+const (
+	MetricNameRangeValue = iota + 1
+	ChunkTimeRangeValue
+)
+
+// parseRangeValueType returns the type of rangeValue
+func parseRangeValueType(rangeValue []byte) (int, error) {
+	components := decodeRangeKey(rangeValue)
+	switch {
+	case len(components) < 3:
+		return 0, fmt.Errorf("invalid range value: %x", rangeValue)
+
+	// v1 & v2 chunk time range values
+	case len(components) == 3:
+		return ChunkTimeRangeValue, nil
+
+	// chunk time range values
+	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
+		return ChunkTimeRangeValue, nil
+
+	case bytes.Equal(components[3], chunkTimeRangeKeyV2):
+		return ChunkTimeRangeValue, nil
+
+	case bytes.Equal(components[3], chunkTimeRangeKeyV3):
+		return ChunkTimeRangeValue, nil
+
+	case bytes.Equal(components[3], chunkTimeRangeKeyV4):
+		return ChunkTimeRangeValue, nil
+
+	case bytes.Equal(components[3], chunkTimeRangeKeyV5):
+		return ChunkTimeRangeValue, nil
+
+	// metric name range values
+	case bytes.Equal(components[3], metricNameRangeKeyV1):
+		return MetricNameRangeValue, nil
+
+	default:
+		return 0, fmt.Errorf("unrecognised range value type. version: '%v'", string(components[3]))
+	}
+}
+
+func TestSchemaRangeKey(t *testing.T) {
+	const (
+		userID     = "userid"
+		table      = "table"
+		metricName = "foo"
+		chunkID    = "chunkID"
+	)
+
+	var (
+		cfg = SchemaConfig{
+			OriginalTableName: table,
+		}
+		hourlyBuckets = v1Schema(cfg)
+		dailyBuckets  = v2Schema(cfg)
+		base64Keys    = v3Schema(cfg)
+		labelBuckets  = v4Schema(cfg)
+		tsRangeKeys   = v5Schema(cfg)
+		v6RangeKeys   = v6Schema(cfg)
+		v7RangeKeys   = v7Schema(cfg)
+		metric        = model.Metric{
+			model.MetricNameLabel: metricName,
+			"bar": "bary",
+			"baz": "bazy",
+		}
+		fooSha1Hash = sha1.Sum([]byte("foo"))
+	)
+
+	mkEntries := func(hashKey string, callback func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte)) []IndexEntry {
+		result := []IndexEntry{}
+		for labelName, labelValue := range metric {
+			if labelName == model.MetricNameLabel {
+				continue
+			}
+			rangeValue, value := callback(labelName, labelValue)
+			result = append(result, IndexEntry{
+				TableName:  table,
+				HashValue:  hashKey,
+				RangeValue: rangeValue,
+				Value:      value,
+			})
+		}
+		return result
+	}
+
+	for i, tc := range []struct {
+		Schema
+		want []IndexEntry
+	}{
+		// Basic test case for the various bucketing schemes
+		{
+			hourlyBuckets,
+			mkEntries("userid:0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
+			}),
+		},
+		{
+			dailyBuckets,
+			mkEntries("userid:d0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
+			}),
+		},
+		{
+			base64Keys,
+			mkEntries("userid:d0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+				encodedValue := base64.RawStdEncoding.EncodeToString([]byte(labelValue))
+				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x001\x00", labelName, encodedValue, chunkID)), nil
+			}),
+		},
+		{
+			labelBuckets,
+			[]IndexEntry{
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo",
+					RangeValue: []byte("\x00\x00chunkID\x002\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:bar",
+					RangeValue: []byte("\x00YmFyeQ\x00chunkID\x001\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:baz",
+					RangeValue: []byte("\x00YmF6eQ\x00chunkID\x001\x00"),
+				},
+			},
+		},
+		{
+			tsRangeKeys,
+			[]IndexEntry{
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:bar",
+					RangeValue: []byte("0036ee7f\x00YmFyeQ\x00chunkID\x004\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:baz",
+					RangeValue: []byte("0036ee7f\x00YmF6eQ\x00chunkID\x004\x00"),
+				},
+			},
+		},
+		{
+			v6RangeKeys,
+			[]IndexEntry{
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:bar",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bary"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:baz",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bazy"),
+				},
+			},
+		},
+		{
+			v7RangeKeys,
+			[]IndexEntry{
+				{
+					TableName:  table,
+					HashValue:  "userid:d0",
+					RangeValue: append(encodeBase64Bytes(fooSha1Hash[:]), []byte("\x00\x00\x006\x00")...),
+					Value:      []byte("foo"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:bar",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bary"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:baz",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bazy"),
+				},
+			},
+		},
+	} {
+		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
+			have, err := tc.Schema.GetWriteEntries(
+				model.TimeFromUnix(0), model.TimeFromUnix(60*60)-1,
+				userID, model.LabelValue(metricName),
+				metric, chunkID,
+			)
+			if err != nil {
+				t.Fatal(err)
+			}
+			sort.Sort(ByHashRangeKey(have))
+			sort.Sort(ByHashRangeKey(tc.want))
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong hash buckets - %s", test.Diff(tc.want, have))
+			}
+
+			// Test we can parse the resulting range keys
+			for _, entry := range have {
+				rangeValueType, err := parseRangeValueType(entry.RangeValue)
+				require.NoError(t, err)
+
+				switch rangeValueType {
+				case MetricNameRangeValue:
+					_, err := parseMetricNameRangeValue(entry.RangeValue, entry.Value)
+					require.NoError(t, err)
+				case ChunkTimeRangeValue:
+					_, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+					require.NoError(t, err)
+				}
+			}
+		})
+	}
+}
diff --git a/schema_util.go b/schema_util.go
new file mode 100644
index 0000000000000..6ffb92778fe50
--- /dev/null
+++ b/schema_util.go
@@ -0,0 +1,142 @@
+package chunk
+
+import (
+	"bytes"
+	"encoding/base64"
+	"encoding/binary"
+	"encoding/hex"
+
+	"fmt"
+
+	"github.com/prometheus/common/model"
+)
+
+func encodeRangeKey(ss ...[]byte) []byte {
+	length := 0
+	for _, s := range ss {
+		length += len(s) + 1
+	}
+	output, i := make([]byte, length, length), 0
+	for _, s := range ss {
+		copy(output[i:i+len(s)], s)
+		i += len(s) + 1
+	}
+	return output
+}
+
+func decodeRangeKey(value []byte) [][]byte {
+	components := make([][]byte, 0, 5)
+	i, j := 0, 0
+	for j < len(value) {
+		if value[j] != 0 {
+			j++
+			continue
+		}
+		components = append(components, value[i:j])
+		j++
+		i = j
+	}
+	return components
+}
+
+func encodeBase64Bytes(bytes []byte) []byte {
+	encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes))
+	encoded := make([]byte, encodedLen, encodedLen)
+	base64.RawStdEncoding.Encode(encoded, bytes)
+	return encoded
+}
+
+func encodeBase64Value(value model.LabelValue) []byte {
+	encodedLen := base64.RawStdEncoding.EncodedLen(len(value))
+	encoded := make([]byte, encodedLen, encodedLen)
+	base64.RawStdEncoding.Encode(encoded, []byte(value))
+	return encoded
+}
+
+func decodeBase64Value(bs []byte) (model.LabelValue, error) {
+	decodedLen := base64.RawStdEncoding.DecodedLen(len(bs))
+	decoded := make([]byte, decodedLen, decodedLen)
+	if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil {
+		return "", err
+	}
+	return model.LabelValue(decoded), nil
+}
+
+func encodeTime(t uint32) []byte {
+	// timestamps are hex encoded such that it doesn't contain null byte,
+	// but is still lexicographically sortable.
+	throughBytes := make([]byte, 4, 4)
+	binary.BigEndian.PutUint32(throughBytes, t)
+	encodedThroughBytes := make([]byte, 8, 8)
+	hex.Encode(encodedThroughBytes, throughBytes)
+	return encodedThroughBytes
+}
+
+func decodeTime(bs []byte) uint32 {
+	buf := make([]byte, 4, 4)
+	hex.Decode(buf, bs)
+	return binary.BigEndian.Uint32(buf)
+}
+
+// parseMetricNameRangeValue returns the metric name stored in metric name
+// range values. Currently checks range value key and returns the value as the
+// metric name.
+func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) {
+	components := decodeRangeKey(rangeValue)
+	switch {
+	case len(components) < 4:
+		return "", fmt.Errorf("invalid metric name range value: %x", rangeValue)
+
+	// v1 has the metric name as the value (with the hash as the first component)
+	case bytes.Equal(components[3], metricNameRangeKeyV1):
+		return model.LabelValue(value), nil
+
+	default:
+		return "", fmt.Errorf("unrecognised metricNameRangeKey version: '%v'", string(components[3]))
+	}
+}
+
+// parseChunkTimeRangeValue returns the chunkKey, labelValue and metadataInIndex
+// for chunk time range values.
+func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (string, model.LabelValue, bool, error) {
+	components := decodeRangeKey(rangeValue)
+
+	switch {
+	case len(components) < 3:
+		return "", "", false, fmt.Errorf("invalid chunk time range value: %x", rangeValue)
+
+	// v1 & v2 schema had three components - label name, label value and chunk ID.
+	// No version number.
+	case len(components) == 3:
+		return string(components[2]), model.LabelValue(components[1]), true, nil
+
+	// v3 schema had four components - label name, label value, chunk ID and version.
+	// "version" is 1 and label value is base64 encoded.
+	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
+		labelValue, err := decodeBase64Value(components[1])
+		return string(components[2]), labelValue, false, err
+
+	// v4 schema wrote v3 range keys and a new range key - version 2,
+	// with four components - <empty>, <empty>, chunk ID and version.
+	case bytes.Equal(components[3], chunkTimeRangeKeyV2):
+		return string(components[2]), model.LabelValue(""), false, nil
+
+	// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
+	case bytes.Equal(components[3], chunkTimeRangeKeyV3):
+		return string(components[2]), model.LabelValue(""), false, nil
+
+	// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
+	case bytes.Equal(components[3], chunkTimeRangeKeyV4):
+		labelValue, err := decodeBase64Value(components[1])
+		return string(components[2]), labelValue, false, err
+
+	// v6 schema added version 5 range keys, which have the label value written in
+	// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
+	case bytes.Equal(components[3], chunkTimeRangeKeyV5):
+		labelValue := model.LabelValue(value)
+		return string(components[2]), labelValue, false, nil
+
+	default:
+		return "", model.LabelValue(""), false, fmt.Errorf("unrecognised chunkTimeRangeKey version: '%v'", string(components[3]))
+	}
+}
diff --git a/schema_util_test.go b/schema_util_test.go
new file mode 100644
index 0000000000000..660e72fbab6a2
--- /dev/null
+++ b/schema_util_test.go
@@ -0,0 +1,85 @@
+package chunk
+
+import (
+	"bytes"
+	"math"
+	"math/rand"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSchemaTimeEncoding(t *testing.T) {
+	assert.Equal(t, uint32(0), decodeTime(encodeTime(0)), "0")
+	assert.Equal(t, uint32(math.MaxUint32), decodeTime(encodeTime(math.MaxUint32)), "MaxUint32")
+
+	for i := 0; i < 100; i++ {
+		a, b := uint32(rand.Int31()), uint32(rand.Int31())
+
+		assert.Equal(t, a, decodeTime(encodeTime(a)), "a")
+		assert.Equal(t, b, decodeTime(encodeTime(b)), "b")
+
+		if a < b {
+			assert.Equal(t, -1, bytes.Compare(encodeTime(a), encodeTime(b)), "lt")
+		} else if a > b {
+			assert.Equal(t, 1, bytes.Compare(encodeTime(a), encodeTime(b)), "gt")
+		} else {
+			assert.Equal(t, 1, bytes.Compare(encodeTime(a), encodeTime(b)), "eq")
+		}
+	}
+}
+
+func TestParseChunkTimeRangeValue(t *testing.T) {
+	// Test we can decode legacy range values
+	for _, c := range []struct {
+		encoded        []byte
+		value, chunkID string
+	}{
+		{[]byte("1\x002\x003\x00"), "2", "3"},
+
+		// version 1 range keys (v3 Schema) base64-encodes the label value
+		{[]byte("toms\x00Y29kZQ\x002:1484661279394:1484664879394\x001\x00"),
+			"code", "2:1484661279394:1484664879394"},
+
+		// version 1 range keys (v4 Schema) doesn't have the label name in the range key
+		{[]byte("\x00Y29kZQ\x002:1484661279394:1484664879394\x001\x00"),
+			"code", "2:1484661279394:1484664879394"},
+
+		// version 2 range keys (also v4 Schema) don't have the label name or value in the range key
+		{[]byte("\x00\x002:1484661279394:1484664879394\x002\x00"),
+			"", "2:1484661279394:1484664879394"},
+
+		// version 3 range keys (v5 Schema) have timestamp in first 'dimension'
+		{[]byte("a1b2c3d4\x00\x002:1484661279394:1484664879394\x003\x00"),
+			"", "2:1484661279394:1484664879394"},
+
+		// version 4 range keys (also v5 Schema) have timestamp in first 'dimension',
+		// base64 value in second
+		{[]byte("a1b2c3d4\x00Y29kZQ\x002:1484661279394:1484664879394\x004\x00"),
+			"code", "2:1484661279394:1484664879394"},
+	} {
+		chunkID, labelValue, _, err := parseChunkTimeRangeValue(c.encoded, nil)
+		require.NoError(t, err)
+		assert.Equal(t, model.LabelValue(c.value), labelValue)
+		assert.Equal(t, c.chunkID, chunkID)
+	}
+}
+
+func TestParseMetricNameRangeValue(t *testing.T) {
+	for _, c := range []struct {
+		encoded       []byte
+		value         string
+		expMetricName string
+	}{
+		// version 1 (id 6) metric name range keys (used in v7 Schema) have
+		// metric name hash in first 'dimension', however just returns the value
+		{[]byte("a1b2c3d4\x00\x00\x006\x00"), "foo", "foo"},
+		{encodeRangeKey([]byte("bar"), nil, nil, metricNameRangeKeyV1), "bar", "bar"},
+	} {
+		metricName, err := parseMetricNameRangeValue(c.encoded, []byte(c.value))
+		require.NoError(t, err)
+		assert.Equal(t, model.LabelValue(c.expMetricName), metricName)
+	}
+}
diff --git a/storage_client.go b/storage_client.go
new file mode 100644
index 0000000000000..3010079887edd
--- /dev/null
+++ b/storage_client.go
@@ -0,0 +1,64 @@
+package chunk
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+
+	"github.com/prometheus/common/log"
+	"golang.org/x/net/context"
+)
+
+// StorageClient is a client for the persistent storage for Cortex. (e.g. DynamoDB + S3).
+type StorageClient interface {
+	// For the write path.
+	NewWriteBatch() WriteBatch
+	BatchWrite(context.Context, WriteBatch) error
+
+	// For the read path.
+	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error
+
+	// For storing and retrieving chunks.
+	PutChunk(ctx context.Context, key string, data []byte) error
+	GetChunk(ctx context.Context, key string) ([]byte, error)
+}
+
+// WriteBatch represents a batch of writes.
+type WriteBatch interface {
+	Add(tableName, hashValue string, rangeValue []byte, value []byte)
+}
+
+// ReadBatch represents the results of a QueryPages.
+type ReadBatch interface {
+	Len() int
+	RangeValue(index int) []byte
+	Value(index int) []byte
+}
+
+// StorageClientConfig chooses which storage client to use.
+type StorageClientConfig struct {
+	StorageClient string
+	AWSStorageConfig
+}
+
+// RegisterFlags adds the flags required to configure this flag set.
+func (cfg *StorageClientConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, inmemory).")
+	cfg.AWSStorageConfig.RegisterFlags(f)
+}
+
+// NewStorageClient makes a storage client based on the configuration.
+func NewStorageClient(cfg StorageClientConfig) (StorageClient, error) {
+	switch cfg.StorageClient {
+	case "inmemory":
+		return NewMockStorage(), nil
+	case "aws":
+		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		if len(path) > 0 {
+			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+		}
+		return NewAWSStorageClient(cfg.AWSStorageConfig)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, inmemory", cfg.StorageClient)
+	}
+}
diff --git a/table_manager.go b/table_manager.go
new file mode 100644
index 0000000000000..fa2e4a982cc79
--- /dev/null
+++ b/table_manager.go
@@ -0,0 +1,342 @@
+package chunk
+
+import (
+	"flag"
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/common/mtime"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	readLabel  = "read"
+	writeLabel = "write"
+)
+
+var (
+	syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_sync_tables_seconds",
+		Help:      "Time spent doing syncTables.",
+		Buckets:   prometheus.DefBuckets,
+	}, []string{"operation", "status_code"})
+	tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_table_capacity_units",
+		Help:      "Per-table DynamoDB capacity, measured in DynamoDB capacity units.",
+	}, []string{"op", "table"})
+)
+
+func init() {
+	prometheus.MustRegister(tableCapacity)
+}
+
+// DynamoTableClient is a client for telling Dynamo what to do with tables.
+type DynamoTableClient interface {
+	ListTables(ctx context.Context) ([]string, error)
+	CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
+	DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error)
+	UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
+}
+
+// DynamoTableClientConfig configures the DynamoDB table client.
+type DynamoTableClientConfig struct {
+	DynamoClient string
+	DynamoDBConfig
+}
+
+// RegisterFlags adds the flags required to configure this flag set.
+func (cfg *DynamoTableClientConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.StringVar(&cfg.DynamoClient, "table-manager.dynamo-client", "aws", "Which DynamoDB table client to use (aws, inmemory).")
+	cfg.DynamoDBConfig.RegisterFlags(f)
+}
+
+// NewDynamoTableClient creates a new DynamoTableClient.
+func NewDynamoTableClient(cfg DynamoTableClientConfig) (DynamoTableClient, error) {
+	switch cfg.DynamoClient {
+	case "inmemory":
+		return NewMockStorage(), nil
+	case "aws":
+		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		if len(path) > 0 {
+			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+		}
+		return newDynamoTableClient(cfg.DynamoDBConfig)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, inmemory", cfg.DynamoClient)
+	}
+}
+
+// TableManagerConfig is the config for a DynamoTableManager
+type TableManagerConfig struct {
+	DynamoDBPollInterval time.Duration
+
+	PeriodicTableConfig
+	OriginalTableName string
+
+	// duration a table will be created before it is needed.
+	CreationGracePeriod        time.Duration
+	MaxChunkAge                time.Duration
+	ProvisionedWriteThroughput int64
+	ProvisionedReadThroughput  int64
+	InactiveWriteThroughput    int64
+	InactiveReadThroughput     int64
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
+	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
+	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
+	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age time before flushing.")
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, "dynamodb.periodic-table.write-throughput", 3000, "DynamoDB periodic tables write throughput")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, "dynamodb.periodic-table.read-throughput", 300, "DynamoDB periodic tables read throughput")
+	f.Int64Var(&cfg.InactiveWriteThroughput, "dynamodb.periodic-table.inactive-write-throughput", 1, "DynamoDB periodic tables write throughput for inactive tables.")
+	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables")
+
+	cfg.PeriodicTableConfig.RegisterFlags(f)
+	// XXX: Should this be in PeriodicTableConfig?
+	flag.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
+}
+
+// PeriodicTableConfig for the use of periodic tables (ie, weekly tables).  Can
+// control when to start the periodic tables, how long the period should be,
+// and the prefix to give the tables.
+type PeriodicTableConfig struct {
+	UsePeriodicTables    bool
+	TablePrefix          string
+	TablePeriod          time.Duration
+	PeriodicTableStartAt util.DayValue
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *PeriodicTableConfig) RegisterFlags(f *flag.FlagSet) {
+	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", true, "Should we use periodic tables.")
+	f.StringVar(&cfg.TablePrefix, "dynamodb.periodic-table.prefix", "cortex_", "DynamoDB table prefix for the periodic tables.")
+	f.DurationVar(&cfg.TablePeriod, "dynamodb.periodic-table.period", 7*24*time.Hour, "DynamoDB periodic tables period.")
+	f.Var(&cfg.PeriodicTableStartAt, "dynamodb.periodic-table.start", "DynamoDB periodic tables start time.")
+}
+
+// DynamoTableManager creates and manages the provisioned throughput on DynamoDB tables
+type DynamoTableManager struct {
+	dynamoDB DynamoTableClient
+	cfg      TableManagerConfig
+	done     chan struct{}
+	wait     sync.WaitGroup
+}
+
+// NewDynamoTableManager makes a new DynamoTableManager
+func NewDynamoTableManager(cfg TableManagerConfig, dynamoDBClient DynamoTableClient) (*DynamoTableManager, error) {
+	return &DynamoTableManager{
+		cfg:      cfg,
+		dynamoDB: dynamoDBClient,
+		done:     make(chan struct{}),
+	}, nil
+}
+
+// Start the DynamoTableManager
+func (m *DynamoTableManager) Start() {
+	m.wait.Add(1)
+	go m.loop()
+}
+
+// Stop the DynamoTableManager
+func (m *DynamoTableManager) Stop() {
+	close(m.done)
+	m.wait.Wait()
+}
+
+func (m *DynamoTableManager) loop() {
+	defer m.wait.Done()
+
+	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
+	defer ticker.Stop()
+
+	if err := instrument.TimeRequestHistogram(context.Background(), "DynamoTableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
+		return m.syncTables(ctx)
+	}); err != nil {
+		log.Errorf("Error syncing tables: %v", err)
+	}
+
+	for {
+		select {
+		case <-ticker.C:
+			if err := instrument.TimeRequestHistogram(context.Background(), "DynamoTableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
+				return m.syncTables(ctx)
+			}); err != nil {
+				log.Errorf("Error syncing tables: %v", err)
+			}
+		case <-m.done:
+			return
+		}
+	}
+}
+
+func (m *DynamoTableManager) syncTables(ctx context.Context) error {
+	expected := m.calculateExpectedTables()
+	log.Infof("Expecting %d tables", len(expected))
+
+	toCreate, toCheckThroughput, err := m.partitionTables(ctx, expected)
+	if err != nil {
+		return err
+	}
+
+	if err := m.createTables(ctx, toCreate); err != nil {
+		return err
+	}
+
+	return m.updateTables(ctx, toCheckThroughput)
+}
+
+type tableDescription struct {
+	name             string
+	provisionedRead  int64
+	provisionedWrite int64
+}
+
+type byName []tableDescription
+
+func (a byName) Len() int           { return len(a) }
+func (a byName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byName) Less(i, j int) bool { return a[i].name < a[j].name }
+
+func (m *DynamoTableManager) calculateExpectedTables() []tableDescription {
+	if !m.cfg.UsePeriodicTables {
+		return []tableDescription{
+			{
+				name:             m.cfg.OriginalTableName,
+				provisionedRead:  m.cfg.ProvisionedReadThroughput,
+				provisionedWrite: m.cfg.ProvisionedWriteThroughput,
+			},
+		}
+	}
+
+	result := []tableDescription{}
+
+	var (
+		tablePeriodSecs = int64(m.cfg.TablePeriod / time.Second)
+		gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
+		maxChunkAgeSecs = int64(m.cfg.MaxChunkAge / time.Second)
+		firstTable      = m.cfg.PeriodicTableStartAt.Unix() / tablePeriodSecs
+		lastTable       = (mtime.Now().Unix() + gracePeriodSecs) / tablePeriodSecs
+		now             = mtime.Now().Unix()
+	)
+
+	// Add the legacy table
+	{
+		legacyTable := tableDescription{
+			name:             m.cfg.OriginalTableName,
+			provisionedRead:  m.cfg.InactiveReadThroughput,
+			provisionedWrite: m.cfg.InactiveWriteThroughput,
+		}
+
+		// if we are before the switch to periodic table, we need to give this table write throughput
+		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
+			legacyTable.provisionedRead = m.cfg.ProvisionedReadThroughput
+			legacyTable.provisionedWrite = m.cfg.ProvisionedWriteThroughput
+		}
+		result = append(result, legacyTable)
+	}
+
+	for i := firstTable; i <= lastTable; i++ {
+		table := tableDescription{
+			// Name construction needs to be consistent with chunk_store.bigBuckets
+			name:             m.cfg.TablePrefix + strconv.Itoa(int(i)),
+			provisionedRead:  m.cfg.InactiveReadThroughput,
+			provisionedWrite: m.cfg.InactiveWriteThroughput,
+		}
+
+		// if now is within table [start - grace, end + grace), then we need some write throughput
+		if (i*tablePeriodSecs)-gracePeriodSecs <= now && now < (i*tablePeriodSecs)+tablePeriodSecs+gracePeriodSecs+maxChunkAgeSecs {
+			table.provisionedRead = m.cfg.ProvisionedReadThroughput
+			table.provisionedWrite = m.cfg.ProvisionedWriteThroughput
+		}
+		result = append(result, table)
+	}
+
+	sort.Sort(byName(result))
+	return result
+}
+
+// partitionTables works out tables that need to be created vs tables that need to be updated
+func (m *DynamoTableManager) partitionTables(ctx context.Context, descriptions []tableDescription) ([]tableDescription, []tableDescription, error) {
+	existingTables, err := m.dynamoDB.ListTables(ctx)
+	if err != nil {
+		return nil, nil, err
+	}
+	sort.Strings(existingTables)
+
+	toCreate, toCheckThroughput := []tableDescription{}, []tableDescription{}
+	i, j := 0, 0
+	for i < len(descriptions) && j < len(existingTables) {
+		if descriptions[i].name < existingTables[j] {
+			// Table descriptions[i] doesn't exist
+			toCreate = append(toCreate, descriptions[i])
+			i++
+		} else if descriptions[i].name > existingTables[j] {
+			// existingTables[j].name isn't in descriptions, can ignore
+			j++
+		} else {
+			// Table exists, need to check it has correct throughput
+			toCheckThroughput = append(toCheckThroughput, descriptions[i])
+			i++
+			j++
+		}
+	}
+	for ; i < len(descriptions); i++ {
+		toCreate = append(toCreate, descriptions[i])
+	}
+
+	return toCreate, toCheckThroughput, nil
+}
+
+func (m *DynamoTableManager) createTables(ctx context.Context, descriptions []tableDescription) error {
+	for _, desc := range descriptions {
+		log.Infof("Creating table %s", desc.name)
+		err := m.dynamoDB.CreateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (m *DynamoTableManager) updateTables(ctx context.Context, descriptions []tableDescription) error {
+	for _, desc := range descriptions {
+		log.Infof("Checking provisioned throughput on table %s", desc.name)
+		readCapacity, writeCapacity, status, err := m.dynamoDB.DescribeTable(ctx, desc.name)
+		if err != nil {
+			return err
+		}
+
+		if status != dynamodb.TableStatusActive {
+			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", desc.name, status)
+			continue
+		}
+
+		tableCapacity.WithLabelValues(readLabel, desc.name).Set(float64(readCapacity))
+		tableCapacity.WithLabelValues(writeLabel, desc.name).Set(float64(writeCapacity))
+
+		if readCapacity == desc.provisionedRead && writeCapacity == desc.provisionedWrite {
+			log.Infof("  Provisioned throughput: read = %d, write = %d, skipping.", readCapacity, writeCapacity)
+			continue
+		}
+
+		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", desc.name, desc.provisionedRead, desc.provisionedWrite)
+		err = m.dynamoDB.UpdateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/table_manager_test.go b/table_manager_test.go
new file mode 100644
index 0000000000000..6e8eac7a2a046
--- /dev/null
+++ b/table_manager_test.go
@@ -0,0 +1,178 @@
+package chunk
+
+import (
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/common/mtime"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	tablePrefix   = "cortex_"
+	tablePeriod   = 7 * 24 * time.Hour
+	gracePeriod   = 15 * time.Minute
+	maxChunkAge   = 12 * time.Hour
+	inactiveWrite = 1
+	inactiveRead  = 2
+	write         = 200
+	read          = 100
+)
+
+func TestDynamoTableManager(t *testing.T) {
+	dynamoDB := NewMockStorage()
+
+	cfg := TableManagerConfig{
+		PeriodicTableConfig: PeriodicTableConfig{
+			UsePeriodicTables: true,
+			TablePrefix:       tablePrefix,
+			TablePeriod:       tablePeriod,
+			PeriodicTableStartAt: util.DayValue{
+				Time: model.TimeFromUnix(0),
+			},
+		},
+
+		CreationGracePeriod:        gracePeriod,
+		MaxChunkAge:                maxChunkAge,
+		ProvisionedWriteThroughput: write,
+		ProvisionedReadThroughput:  read,
+		InactiveWriteThroughput:    inactiveWrite,
+		InactiveReadThroughput:     inactiveRead,
+	}
+	tableManager, err := NewDynamoTableManager(cfg, dynamoDB)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	test := func(name string, tm time.Time, expected []tableDescription) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.syncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			expectTables(ctx, t, dynamoDB, expected)
+		})
+	}
+
+	// Check at time zero, we have the base table and one weekly table
+	test(
+		"Initial test",
+		time.Unix(0, 0),
+		[]tableDescription{
+			{name: "", provisionedRead: read, provisionedWrite: write},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Check running twice doesn't change anything
+	test(
+		"Nothing changed",
+		time.Unix(0, 0),
+		[]tableDescription{
+			{name: "", provisionedRead: read, provisionedWrite: write},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Fast forward grace period, check we still have write throughput on base table
+	test(
+		"Move forward by grace period",
+		time.Unix(0, 0).Add(gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: read, provisionedWrite: write},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Fast forward max chunk age + grace period, check write throughput on base table has gone
+	test(
+		"Move forward by max chunk age + grace period",
+		time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period - grace period, check we add another weekly table
+	test(
+		"Move forward by table period - grace period",
+		time.Unix(0, 0).Add(tablePeriod).Add(-gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + grace period, check we still have provisioned throughput
+	test(
+		"Move forward by table period + grace period",
+		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
+	test(
+		"Move forward by table period + max chunk age + grace period",
+		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+
+	// Check running twice doesn't change anything
+	test(
+		"Nothing changed",
+		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		[]tableDescription{
+			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		},
+	)
+}
+
+func expectTables(ctx context.Context, t *testing.T, dynamo DynamoTableClient, expected []tableDescription) {
+	tables, err := dynamo.ListTables(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	if len(expected) != len(tables) {
+		t.Fatalf("Unexpected number of tables: %v != %v", expected, tables)
+	}
+
+	sort.Strings(tables)
+	sort.Sort(byName(expected))
+
+	for i, desc := range expected {
+		if tables[i] != desc.name {
+			t.Fatalf("Expected '%s', found '%s'", desc.name, tables[i])
+		}
+
+		read, write, _, err := dynamo.DescribeTable(ctx, desc.name)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if read != desc.provisionedRead {
+			t.Fatalf("Expected '%d', found '%d' for table '%s'", desc.provisionedRead, read, desc.name)
+		}
+
+		if write != desc.provisionedWrite {
+			t.Fatalf("Expected '%d', found '%d' for table '%s'", desc.provisionedWrite, write, desc.name)
+		}
+	}
+}

From 141d2b7d73cef6002ca756575bcccab2a9516fb7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 10 May 2017 13:53:54 +0100
Subject: [PATCH 002/660] Various chunk store / DynamoDB config refactorings.
 (#417)

* Various chunk store / DynamoDB config refactorings.

(Motivation: tidying a few things up before handling #141)

- Move /dep.go into /pkg/ingester/client, where is belongs.
- Rename DynamoTableManager to TableManager, as there is nothing DynamoDB-specific about it - it would be useful to BigTable too.
- Rename DynamoTableClient interface to TableClient, as there is nothing DynamoDB-specici about it - it would be useful to BigTable too.
- Move dynamoTableClient (implementation of TableClient) into /pkg/chunk/table_manager.go, where it belongs.
- Delete chunk.DynamoTableClientConfig, as the table-manager-specific in-memory construction wasn't used.
- Move warning about unused DynamoDB table name in path to dynamoClientFromURL.
- Move duplicated OriginalTableName into PeriodicTableConfig, as per comment.
- Fix usage of flag.StringVar - should be f.StringVar.

* Review feedback
---
 aws_storage_client.go |  94 ++------------------
 chunk_store_test.go   |   2 +-
 schema_config.go      |   2 -
 schema_config_test.go |  12 ++-
 schema_test.go        |   7 +-
 table_manager.go      | 200 +++++++++++++++++++++++++++---------------
 table_manager_test.go |   6 +-
 7 files changed, 153 insertions(+), 170 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index a0c4c3afa58c1..70e3bb55f2d41 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -20,6 +20,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
@@ -399,95 +400,6 @@ func (b dynamoDBReadBatch) Value(i int) []byte {
 	return chunkValue.B
 }
 
-type dynamoTableClient struct {
-	DynamoDB dynamodbiface.DynamoDBAPI
-}
-
-// newDynamoTableClient makes a new DynamoTableClient.
-func newDynamoTableClient(cfg DynamoDBConfig) (DynamoTableClient, error) {
-	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
-	if err != nil {
-		return nil, err
-	}
-	return dynamoTableClient{
-		DynamoDB: dynamoDB,
-	}, nil
-}
-
-func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
-	table := []string{}
-	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(_ context.Context) error {
-		return d.DynamoDB.ListTablesPages(&dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
-			for _, s := range resp.TableNames {
-				table = append(table, *s)
-			}
-			return true
-		})
-	})
-	return table, err
-}
-
-func (d dynamoTableClient) CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(_ context.Context) error {
-		input := &dynamodb.CreateTableInput{
-			TableName: aws.String(name),
-			AttributeDefinitions: []*dynamodb.AttributeDefinition{
-				{
-					AttributeName: aws.String(hashKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
-				},
-				{
-					AttributeName: aws.String(rangeKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
-				},
-			},
-			KeySchema: []*dynamodb.KeySchemaElement{
-				{
-					AttributeName: aws.String(hashKey),
-					KeyType:       aws.String(dynamodb.KeyTypeHash),
-				},
-				{
-					AttributeName: aws.String(rangeKey),
-					KeyType:       aws.String(dynamodb.KeyTypeRange),
-				},
-			},
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(readCapacity),
-				WriteCapacityUnits: aws.Int64(writeCapacity),
-			},
-		}
-		_, err := d.DynamoDB.CreateTable(input)
-		return err
-	})
-}
-
-func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
-	var out *dynamodb.DescribeTableOutput
-	instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(_ context.Context) error {
-		out, err = d.DynamoDB.DescribeTable(&dynamodb.DescribeTableInput{
-			TableName: aws.String(name),
-		})
-		readCapacity = *out.Table.ProvisionedThroughput.ReadCapacityUnits
-		writeCapacity = *out.Table.ProvisionedThroughput.WriteCapacityUnits
-		status = *out.Table.TableStatus
-		return err
-	})
-	return
-}
-
-func (d dynamoTableClient) UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(_ context.Context) error {
-		_, err := d.DynamoDB.UpdateTable(&dynamodb.UpdateTableInput{
-			TableName: aws.String(name),
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(readCapacity),
-				WriteCapacityUnits: aws.Int64(writeCapacity),
-			},
-		})
-		return err
-	})
-}
-
 func nextBackoff(lastBackoff time.Duration) time.Duration {
 	// Based on the "Decorrelated Jitter" approach from https://www.awsarchitectureblog.com/2015/03/backoff.html
 	// sleep = min(cap, random_between(base, sleep * 3))
@@ -538,6 +450,10 @@ func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
 	if awsURL == nil {
 		return nil, fmt.Errorf("no URL specified for DynamoDB")
 	}
+	path := strings.TrimPrefix(awsURL.Path, "/")
+	if len(path) > 0 {
+		log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+	}
 	config, err := awsConfigFromURL(awsURL)
 	if err != nil {
 		return nil, err
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 53225fb2d19db..933dfd28d7f47 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -22,7 +22,7 @@ import (
 // newTestStore creates a new Store for testing.
 func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	storage := NewMockStorage()
-	tableManager, err := NewDynamoTableManager(TableManagerConfig{}, storage)
+	tableManager, err := NewTableManager(TableManagerConfig{}, storage)
 	require.NoError(t, err)
 	err = tableManager.syncTables(context.Background())
 	require.NoError(t, err)
diff --git a/schema_config.go b/schema_config.go
index 11398c71fdfcb..3c69150f60201 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -22,7 +22,6 @@ const (
 // SchemaConfig contains the config for our chunk index schemas
 type SchemaConfig struct {
 	PeriodicTableConfig
-	OriginalTableName string
 
 	// After midnight on this day, we start bucketing indexes by day instead of by
 	// hour.  Only the day matters, not the time within the day.
@@ -48,7 +47,6 @@ type SchemaConfig struct {
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.PeriodicTableConfig.RegisterFlags(f)
 
-	flag.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
 	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
 	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
diff --git a/schema_config_test.go b/schema_config_test.go
index 2ca3816ff0716..fd3dd7989b7e6 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -15,7 +15,11 @@ func TestHourlyBuckets(t *testing.T) {
 		metricName = model.LabelValue("name")
 		tableName  = "table"
 	)
-	var cfg = SchemaConfig{OriginalTableName: tableName}
+	var cfg = SchemaConfig{
+		PeriodicTableConfig: PeriodicTableConfig{
+			OriginalTableName: tableName,
+		},
+	}
 
 	type args struct {
 		from    model.Time
@@ -99,7 +103,11 @@ func TestDailyBuckets(t *testing.T) {
 		metricName = model.LabelValue("name")
 		tableName  = "table"
 	)
-	var cfg = SchemaConfig{OriginalTableName: tableName}
+	var cfg = SchemaConfig{
+		PeriodicTableConfig: PeriodicTableConfig{
+			OriginalTableName: tableName,
+		},
+	}
 
 	type args struct {
 		from    model.Time
diff --git a/schema_test.go b/schema_test.go
index 7eba7ed8aae79..2af6663e57757 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -72,9 +72,8 @@ func TestSchemaHashKeys(t *testing.T) {
 	)
 
 	cfg := SchemaConfig{
-		OriginalTableName: table,
-
 		PeriodicTableConfig: PeriodicTableConfig{
+			OriginalTableName:    table,
 			UsePeriodicTables:    true,
 			TablePrefix:          periodicPrefix,
 			TablePeriod:          2 * 24 * time.Hour,
@@ -285,7 +284,9 @@ func TestSchemaRangeKey(t *testing.T) {
 
 	var (
 		cfg = SchemaConfig{
-			OriginalTableName: table,
+			PeriodicTableConfig: PeriodicTableConfig{
+				OriginalTableName: table,
+			},
 		}
 		hourlyBuckets = v1Schema(cfg)
 		dailyBuckets  = v2Schema(cfg)
diff --git a/table_manager.go b/table_manager.go
index fa2e4a982cc79..bf8bebbb33fb1 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -2,14 +2,14 @@ package chunk
 
 import (
 	"flag"
-	"fmt"
 	"sort"
 	"strconv"
-	"strings"
 	"sync"
 	"time"
 
+	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
@@ -42,48 +42,11 @@ func init() {
 	prometheus.MustRegister(tableCapacity)
 }
 
-// DynamoTableClient is a client for telling Dynamo what to do with tables.
-type DynamoTableClient interface {
-	ListTables(ctx context.Context) ([]string, error)
-	CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
-	DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error)
-	UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
-}
-
-// DynamoTableClientConfig configures the DynamoDB table client.
-type DynamoTableClientConfig struct {
-	DynamoClient string
-	DynamoDBConfig
-}
-
-// RegisterFlags adds the flags required to configure this flag set.
-func (cfg *DynamoTableClientConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.DynamoClient, "table-manager.dynamo-client", "aws", "Which DynamoDB table client to use (aws, inmemory).")
-	cfg.DynamoDBConfig.RegisterFlags(f)
-}
-
-// NewDynamoTableClient creates a new DynamoTableClient.
-func NewDynamoTableClient(cfg DynamoTableClientConfig) (DynamoTableClient, error) {
-	switch cfg.DynamoClient {
-	case "inmemory":
-		return NewMockStorage(), nil
-	case "aws":
-		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
-		if len(path) > 0 {
-			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
-		}
-		return newDynamoTableClient(cfg.DynamoDBConfig)
-	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, inmemory", cfg.DynamoClient)
-	}
-}
-
-// TableManagerConfig is the config for a DynamoTableManager
+// TableManagerConfig is the config for a TableManager
 type TableManagerConfig struct {
 	DynamoDBPollInterval time.Duration
 
 	PeriodicTableConfig
-	OriginalTableName string
 
 	// duration a table will be created before it is needed.
 	CreationGracePeriod        time.Duration
@@ -105,14 +68,13 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables")
 
 	cfg.PeriodicTableConfig.RegisterFlags(f)
-	// XXX: Should this be in PeriodicTableConfig?
-	flag.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
 }
 
 // PeriodicTableConfig for the use of periodic tables (ie, weekly tables).  Can
 // control when to start the periodic tables, how long the period should be,
 // and the prefix to give the tables.
 type PeriodicTableConfig struct {
+	OriginalTableName    string
 	UsePeriodicTables    bool
 	TablePrefix          string
 	TablePeriod          time.Duration
@@ -121,48 +83,49 @@ type PeriodicTableConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *PeriodicTableConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", true, "Should we use periodic tables.")
 	f.StringVar(&cfg.TablePrefix, "dynamodb.periodic-table.prefix", "cortex_", "DynamoDB table prefix for the periodic tables.")
 	f.DurationVar(&cfg.TablePeriod, "dynamodb.periodic-table.period", 7*24*time.Hour, "DynamoDB periodic tables period.")
 	f.Var(&cfg.PeriodicTableStartAt, "dynamodb.periodic-table.start", "DynamoDB periodic tables start time.")
 }
 
-// DynamoTableManager creates and manages the provisioned throughput on DynamoDB tables
-type DynamoTableManager struct {
-	dynamoDB DynamoTableClient
-	cfg      TableManagerConfig
-	done     chan struct{}
-	wait     sync.WaitGroup
+// TableManager creates and manages the provisioned throughput on DynamoDB tables
+type TableManager struct {
+	client TableClient
+	cfg    TableManagerConfig
+	done   chan struct{}
+	wait   sync.WaitGroup
 }
 
-// NewDynamoTableManager makes a new DynamoTableManager
-func NewDynamoTableManager(cfg TableManagerConfig, dynamoDBClient DynamoTableClient) (*DynamoTableManager, error) {
-	return &DynamoTableManager{
-		cfg:      cfg,
-		dynamoDB: dynamoDBClient,
-		done:     make(chan struct{}),
+// NewTableManager makes a new TableManager
+func NewTableManager(cfg TableManagerConfig, tableClient TableClient) (*TableManager, error) {
+	return &TableManager{
+		cfg:    cfg,
+		client: tableClient,
+		done:   make(chan struct{}),
 	}, nil
 }
 
-// Start the DynamoTableManager
-func (m *DynamoTableManager) Start() {
+// Start the TableManager
+func (m *TableManager) Start() {
 	m.wait.Add(1)
 	go m.loop()
 }
 
-// Stop the DynamoTableManager
-func (m *DynamoTableManager) Stop() {
+// Stop the TableManager
+func (m *TableManager) Stop() {
 	close(m.done)
 	m.wait.Wait()
 }
 
-func (m *DynamoTableManager) loop() {
+func (m *TableManager) loop() {
 	defer m.wait.Done()
 
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
-	if err := instrument.TimeRequestHistogram(context.Background(), "DynamoTableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
+	if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
 		return m.syncTables(ctx)
 	}); err != nil {
 		log.Errorf("Error syncing tables: %v", err)
@@ -171,7 +134,7 @@ func (m *DynamoTableManager) loop() {
 	for {
 		select {
 		case <-ticker.C:
-			if err := instrument.TimeRequestHistogram(context.Background(), "DynamoTableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
+			if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
 				return m.syncTables(ctx)
 			}); err != nil {
 				log.Errorf("Error syncing tables: %v", err)
@@ -182,7 +145,7 @@ func (m *DynamoTableManager) loop() {
 	}
 }
 
-func (m *DynamoTableManager) syncTables(ctx context.Context) error {
+func (m *TableManager) syncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
 	log.Infof("Expecting %d tables", len(expected))
 
@@ -210,7 +173,7 @@ func (a byName) Len() int           { return len(a) }
 func (a byName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a byName) Less(i, j int) bool { return a[i].name < a[j].name }
 
-func (m *DynamoTableManager) calculateExpectedTables() []tableDescription {
+func (m *TableManager) calculateExpectedTables() []tableDescription {
 	if !m.cfg.UsePeriodicTables {
 		return []tableDescription{
 			{
@@ -269,8 +232,8 @@ func (m *DynamoTableManager) calculateExpectedTables() []tableDescription {
 }
 
 // partitionTables works out tables that need to be created vs tables that need to be updated
-func (m *DynamoTableManager) partitionTables(ctx context.Context, descriptions []tableDescription) ([]tableDescription, []tableDescription, error) {
-	existingTables, err := m.dynamoDB.ListTables(ctx)
+func (m *TableManager) partitionTables(ctx context.Context, descriptions []tableDescription) ([]tableDescription, []tableDescription, error) {
+	existingTables, err := m.client.ListTables(ctx)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -300,10 +263,10 @@ func (m *DynamoTableManager) partitionTables(ctx context.Context, descriptions [
 	return toCreate, toCheckThroughput, nil
 }
 
-func (m *DynamoTableManager) createTables(ctx context.Context, descriptions []tableDescription) error {
+func (m *TableManager) createTables(ctx context.Context, descriptions []tableDescription) error {
 	for _, desc := range descriptions {
 		log.Infof("Creating table %s", desc.name)
-		err := m.dynamoDB.CreateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		err := m.client.CreateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
 		if err != nil {
 			return err
 		}
@@ -311,10 +274,10 @@ func (m *DynamoTableManager) createTables(ctx context.Context, descriptions []ta
 	return nil
 }
 
-func (m *DynamoTableManager) updateTables(ctx context.Context, descriptions []tableDescription) error {
+func (m *TableManager) updateTables(ctx context.Context, descriptions []tableDescription) error {
 	for _, desc := range descriptions {
 		log.Infof("Checking provisioned throughput on table %s", desc.name)
-		readCapacity, writeCapacity, status, err := m.dynamoDB.DescribeTable(ctx, desc.name)
+		readCapacity, writeCapacity, status, err := m.client.DescribeTable(ctx, desc.name)
 		if err != nil {
 			return err
 		}
@@ -333,10 +296,107 @@ func (m *DynamoTableManager) updateTables(ctx context.Context, descriptions []ta
 		}
 
 		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", desc.name, desc.provisionedRead, desc.provisionedWrite)
-		err = m.dynamoDB.UpdateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		err = m.client.UpdateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
 		if err != nil {
 			return err
 		}
 	}
 	return nil
 }
+
+// TableClient is a client for telling Dynamo what to do with tables.
+type TableClient interface {
+	ListTables(ctx context.Context) ([]string, error)
+	CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
+	DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error)
+	UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
+}
+
+type dynamoTableClient struct {
+	DynamoDB dynamodbiface.DynamoDBAPI
+}
+
+// NewDynamoDBTableClient makes a new DynamoTableClient.
+func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
+	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
+	if err != nil {
+		return nil, err
+	}
+	return dynamoTableClient{
+		DynamoDB: dynamoDB,
+	}, nil
+}
+
+func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
+	table := []string{}
+	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(_ context.Context) error {
+		return d.DynamoDB.ListTablesPages(&dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
+			for _, s := range resp.TableNames {
+				table = append(table, *s)
+			}
+			return true
+		})
+	})
+	return table, err
+}
+
+func (d dynamoTableClient) CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(_ context.Context) error {
+		input := &dynamodb.CreateTableInput{
+			TableName: aws.String(name),
+			AttributeDefinitions: []*dynamodb.AttributeDefinition{
+				{
+					AttributeName: aws.String(hashKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
+				},
+			},
+			KeySchema: []*dynamodb.KeySchemaElement{
+				{
+					AttributeName: aws.String(hashKey),
+					KeyType:       aws.String(dynamodb.KeyTypeHash),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					KeyType:       aws.String(dynamodb.KeyTypeRange),
+				},
+			},
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(readCapacity),
+				WriteCapacityUnits: aws.Int64(writeCapacity),
+			},
+		}
+		_, err := d.DynamoDB.CreateTable(input)
+		return err
+	})
+}
+
+func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+	var out *dynamodb.DescribeTableOutput
+	instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(_ context.Context) error {
+		out, err = d.DynamoDB.DescribeTable(&dynamodb.DescribeTableInput{
+			TableName: aws.String(name),
+		})
+		readCapacity = *out.Table.ProvisionedThroughput.ReadCapacityUnits
+		writeCapacity = *out.Table.ProvisionedThroughput.WriteCapacityUnits
+		status = *out.Table.TableStatus
+		return err
+	})
+	return
+}
+
+func (d dynamoTableClient) UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(_ context.Context) error {
+		_, err := d.DynamoDB.UpdateTable(&dynamodb.UpdateTableInput{
+			TableName: aws.String(name),
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(readCapacity),
+				WriteCapacityUnits: aws.Int64(writeCapacity),
+			},
+		})
+		return err
+	})
+}
diff --git a/table_manager_test.go b/table_manager_test.go
index 6e8eac7a2a046..9e03ce85d22b5 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -23,7 +23,7 @@ const (
 	read          = 100
 )
 
-func TestDynamoTableManager(t *testing.T) {
+func TestTableManager(t *testing.T) {
 	dynamoDB := NewMockStorage()
 
 	cfg := TableManagerConfig{
@@ -43,7 +43,7 @@ func TestDynamoTableManager(t *testing.T) {
 		InactiveWriteThroughput:    inactiveWrite,
 		InactiveReadThroughput:     inactiveRead,
 	}
-	tableManager, err := NewDynamoTableManager(cfg, dynamoDB)
+	tableManager, err := NewTableManager(cfg, dynamoDB)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -144,7 +144,7 @@ func TestDynamoTableManager(t *testing.T) {
 	)
 }
 
-func expectTables(ctx context.Context, t *testing.T, dynamo DynamoTableClient, expected []tableDescription) {
+func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []tableDescription) {
 	tables, err := dynamo.ListTables(ctx)
 	if err != nil {
 		t.Fatal(err)

From 3272314c1e767386504ef866afa9362a1ed0bcc8 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 18 May 2017 14:26:26 +0100
Subject: [PATCH 003/660] Store chunks in DynamoDB (#418)

* Expand the StorageClient interface to take lists of chunks to get/put, so it is responsible for parallelism.

* Have the table manager build period chunk tables too.

* Split out ChunkTableConfig so ingesters can use it too.

* Read and writes chunks from DynamoDB.

* First stab at getDynamoDBChunks

* Finish off the getDynamoDBChunks function.

* Fix lint

* Unit tests for dynamodb chunks.

* Make the tests pass

* Some of the review feedback

* Simplify the StorageClient interface.

* Review feedback

* Add handler for triggering chunk flushing.

* Don't use nil range key.

* Config for testing dynamodb chunks locally.

* Spelling
---
 aws_storage_client.go      | 401 ++++++++++++++++++++++++++++++++-----
 aws_storage_client_test.go | 285 ++++++++++++++++++++++++--
 chunk_store.go             |  82 +-------
 chunk_store_test.go        |  12 --
 chunk_test.go              |   5 +
 inmemory_storage_client.go |  33 ++-
 storage_client.go          |   4 +-
 table_manager.go           | 119 +++++++----
 table_manager_test.go      |  61 ++++--
 9 files changed, 769 insertions(+), 233 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 70e3bb55f2d41..4840eb59866a8 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -7,6 +7,7 @@ import (
 	"io/ioutil"
 	"math/rand"
 	"net/url"
+	"strconv"
 	"strings"
 	"time"
 
@@ -21,6 +22,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
@@ -46,7 +48,8 @@ const (
 	maxRetries = 20
 
 	// See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html.
-	dynamoMaxBatchSize = 25
+	dynamoDBMaxWriteBatchSize = 25
+	dynamoDBMaxReadBatchSize  = 100
 )
 
 var (
@@ -98,17 +101,22 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 // AWSStorageConfig specifies config for storing data on AWS.
 type AWSStorageConfig struct {
 	DynamoDBConfig
+	PeriodicChunkTableConfig
+
 	S3 util.URLValue
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *AWSStorageConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.DynamoDBConfig.RegisterFlags(f)
+	cfg.PeriodicChunkTableConfig.RegisterFlags(f)
+
 	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 }
 
 type awsStorageClient struct {
+	cfg        AWSStorageConfig
 	DynamoDB   dynamodbiface.DynamoDBAPI
 	S3         s3iface.S3API
 	bucketName string
@@ -136,6 +144,7 @@ func NewAWSStorageClient(cfg AWSStorageConfig) (StorageClient, error) {
 	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
 
 	storageClient := awsStorageClient{
+		cfg:        cfg,
 		DynamoDB:   dynamoDB,
 		S3:         s3Client,
 		bucketName: bucketName,
@@ -151,12 +160,12 @@ func (a awsStorageClient) NewWriteBatch() WriteBatch {
 // batchWrite writes requests to the underlying storage, handling retires and backoff.
 func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
-	unprocessed := map[string][]*dynamodb.WriteRequest{}
+	unprocessed := dynamoDBWriteBatch{}
 	backoff, numRetries := minBackoff, 0
-	for dictLen(outstanding)+dictLen(unprocessed) > 0 && numRetries < maxRetries {
-		reqs := map[string][]*dynamodb.WriteRequest{}
-		takeReqs(unprocessed, reqs, dynamoMaxBatchSize)
-		takeReqs(outstanding, reqs, dynamoMaxBatchSize)
+	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
+		reqs := dynamoDBWriteBatch{}
+		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
+		reqs.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
 		var resp *dynamodb.BatchWriteItemOutput
 
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
@@ -179,8 +188,8 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		}
 
 		// If there are unprocessed items, backoff and retry those items.
-		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dictLen(unprocessedItems) > 0 {
-			takeReqs(unprocessedItems, unprocessed, -1)
+		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
+			unprocessed.TakeReqs(unprocessedItems, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
 			continue
@@ -189,7 +198,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		// If we get provisionedThroughputExceededException, then no items were processed,
 		// so back off and retry all.
 		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
-			takeReqs(reqs, unprocessed, -1)
+			unprocessed.TakeReqs(reqs, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
 			numRetries++
@@ -205,7 +214,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		numRetries = 0
 	}
 
-	if valuesLeft := dictLen(outstanding) + dictLen(unprocessed); valuesLeft > 0 {
+	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
 		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", numRetries, valuesLeft)
 	}
 	return nil
@@ -276,7 +285,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		}
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		if getNextPage := callback(dynamoDBReadBatch(queryOutput.Items), !page.HasNextPage()); !getNextPage {
+		if getNextPage := callback(dynamoDBReadResponse(queryOutput.Items), !page.HasNextPage()); !getNextPage {
 			if err != nil {
 				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
 			}
@@ -331,28 +340,261 @@ func (a dynamoDBRequestAdapter) HasNextPage() bool {
 	return a.request.HasNextPage()
 }
 
-func (a awsStorageClient) GetChunk(ctx context.Context, key string) ([]byte, error) {
+func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	var (
+		s3Chunks       []Chunk
+		dynamoDBChunks []Chunk
+	)
+
+	for _, chunk := range chunks {
+		if !a.cfg.ChunkTableFrom.IsSet() || chunk.From.Before(a.cfg.ChunkTableFrom.Time) {
+			s3Chunks = append(s3Chunks, chunk)
+		} else {
+			dynamoDBChunks = append(dynamoDBChunks, chunk)
+		}
+	}
+
+	// Get chunks from S3, then get chunks from DynamoDB.  I don't expect us to be
+	// doing both simultaneously except for when we migrate, when it will only
+	// occur for a couple or hours. So I didn't think it is worth the extra code
+	// to parallelise.
+
+	var err error
+	s3Chunks, err = a.getS3Chunks(ctx, s3Chunks)
+	if err != nil {
+		return nil, err
+	}
+
+	dynamoDBChunks, err = a.getDynamoDBChunks(ctx, dynamoDBChunks)
+	if err != nil {
+		return nil, err
+	}
+
+	return append(dynamoDBChunks, s3Chunks...), nil
+}
+
+func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	incomingChunks := make(chan Chunk)
+	incomingErrors := make(chan error)
+	for _, chunk := range chunks {
+		go func(chunk Chunk) {
+			chunk, err := a.getS3Chunk(ctx, chunk)
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+			incomingChunks <- chunk
+		}(chunk)
+	}
+
+	result := []Chunk{}
+	errors := []error{}
+	for i := 0; i < len(chunks); i++ {
+		select {
+		case chunk := <-incomingChunks:
+			result = append(result, chunk)
+		case err := <-incomingErrors:
+			errors = append(errors, err)
+		}
+	}
+	if len(errors) > 0 {
+		return nil, errors[0]
+	}
+	return result, nil
+}
+
+func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, error) {
 	var resp *s3.GetObjectOutput
 	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
 		var err error
 		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
 			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(key),
+			Key:    aws.String(chunk.externalKey()),
 		})
 		return err
 	})
 	if err != nil {
-		return nil, err
+		return Chunk{}, err
 	}
 	defer resp.Body.Close()
 	buf, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
-		return nil, err
+		return Chunk{}, err
+	}
+	if err := chunk.decode(buf); err != nil {
+		return Chunk{}, err
+	}
+	return chunk, nil
+}
+
+// As we're re-using the DynamoDB schema from the index for the chunk tables,
+// we need to provide a non-null, non-empty value for the range value.
+var placeholder = []byte{'c'}
+
+func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	outstanding := dynamoDBReadRequest{}
+	chunksByKey := map[string]Chunk{}
+	for _, chunk := range chunks {
+		key := chunk.externalKey()
+		chunksByKey[key] = chunk
+		tableName := a.chunkTableFor(chunk.From)
+		outstanding.Add(tableName, key, placeholder)
+	}
+
+	result := []Chunk{}
+	unprocessed := dynamoDBReadRequest{}
+	backoff, numRetries := minBackoff, 0
+	for outstanding.Len()+unprocessed.Len() > 0 {
+		requests := dynamoDBReadRequest{}
+		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
+		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
+
+		var response *dynamodb.BatchGetItemOutput
+		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
+			var err error
+			response, err = a.DynamoDB.BatchGetItemWithContext(ctx, &dynamodb.BatchGetItemInput{
+				RequestItems:           requests,
+				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+			})
+			return err
+		})
+
+		for _, cc := range response.ConsumedCapacity {
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages").
+				Add(float64(*cc.CapacityUnits))
+		}
+
+		if err != nil {
+			for tableName := range requests {
+				recordDynamoError(tableName, err)
+			}
+
+			// If we get provisionedThroughputExceededException, then no items were processed,
+			// so back off and retry all.
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+				unprocessed.TakeReqs(requests, -1)
+				time.Sleep(backoff)
+				backoff = nextBackoff(backoff)
+				numRetries++
+				continue
+			}
+
+			// All other errors are critical.
+			return nil, err
+		}
+
+		processedChunks, err := processChunkResponse(response, chunksByKey)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, processedChunks...)
+
+		// If there are unprocessed items, backoff and retry those items.
+		if unprocessedKeys := response.UnprocessedKeys; unprocessedKeys != nil && dynamoDBReadRequest(unprocessedKeys).Len() > 0 {
+			unprocessed.TakeReqs(unprocessedKeys, -1)
+			time.Sleep(backoff)
+			backoff = nextBackoff(backoff)
+			continue
+		}
+
+		backoff = minBackoff
+		numRetries = 0
+	}
+	return result, nil
+}
+
+func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]Chunk) ([]Chunk, error) {
+	result := []Chunk{}
+	for _, items := range response.Responses {
+		for _, item := range items {
+			key, ok := item[hashKey]
+			if !ok || key == nil || key.S == nil {
+				return nil, fmt.Errorf("Got response from DynamoDB with no hash key: %+v", item)
+			}
+
+			chunk, ok := chunksByKey[*key.S]
+			if !ok {
+				return nil, fmt.Errorf("Got response from DynamoDB with chunk I didn't ask for: %s", *key.S)
+			}
+
+			buf, ok := item[valueKey]
+			if !ok || buf == nil || buf.B == nil {
+				return nil, fmt.Errorf("Got response from DynamoDB with no value: %+v", item)
+			}
+
+			if err := chunk.decode(buf.B); err != nil {
+				return nil, err
+			}
+
+			result = append(result, chunk)
+		}
+	}
+	return result, nil
+}
+
+func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
+	var (
+		s3ChunkKeys    []string
+		s3ChunkBufs    [][]byte
+		dynamoDBWrites = dynamoDBWriteBatch{}
+	)
+
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].encode()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].externalKey()
+
+		if !a.cfg.ChunkTableFrom.IsSet() || chunks[i].From.Before(a.cfg.ChunkTableFrom.Time) {
+			s3ChunkKeys = append(s3ChunkKeys, key)
+			s3ChunkBufs = append(s3ChunkBufs, buf)
+		} else {
+			table := a.chunkTableFor(chunks[i].From)
+			dynamoDBWrites.Add(table, key, placeholder, buf)
+		}
+	}
+
+	// Put chunks to S3, then put chunks to DynamoDB.  I don't expect us to be
+	// doing both simultaneously except for when we migrate, when it will only
+	// occur for a couple or hours. So I didn't think it is worth the extra code
+	// to parallelise.
+
+	if err := a.putS3Chunks(ctx, s3ChunkKeys, s3ChunkBufs); err != nil {
+		return err
+	}
+
+	return a.BatchWrite(ctx, dynamoDBWrites)
+}
+
+func (a awsStorageClient) chunkTableFor(t model.Time) string {
+	var (
+		periodSecs = int64(a.cfg.ChunkTablePeriod / time.Second)
+		table      = t.Unix() / periodSecs
+	)
+	return a.cfg.ChunkTablePrefix + strconv.Itoa(int(table))
+}
+
+func (a awsStorageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
+	incomingErrors := make(chan error)
+	for i := range bufs {
+		go func(i int) {
+			incomingErrors <- a.putS3Chunk(ctx, keys[i], bufs[i])
+		}(i)
+	}
+
+	var lastErr error
+	for range keys {
+		err := <-incomingErrors
+		if err != nil {
+			lastErr = err
+		}
 	}
-	return buf, nil
+	return lastErr
 }
 
-func (a awsStorageClient) PutChunk(ctx context.Context, key string, buf []byte) error {
+func (a awsStorageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
 	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
 			Body:   bytes.NewReader(buf),
@@ -363,8 +605,34 @@ func (a awsStorageClient) PutChunk(ctx context.Context, key string, buf []byte)
 	})
 }
 
+type dynamoDBReadResponse []map[string]*dynamodb.AttributeValue
+
+func (b dynamoDBReadResponse) Len() int {
+	return len(b)
+}
+
+func (b dynamoDBReadResponse) RangeValue(i int) []byte {
+	return b[i][rangeKey].B
+}
+
+func (b dynamoDBReadResponse) Value(i int) []byte {
+	chunkValue, ok := b[i][valueKey]
+	if !ok {
+		return nil
+	}
+	return chunkValue.B
+}
+
 type dynamoDBWriteBatch map[string][]*dynamodb.WriteRequest
 
+func (b dynamoDBWriteBatch) Len() int {
+	result := 0
+	for _, reqs := range b {
+		result += len(reqs)
+	}
+	return result
+}
+
 func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
 	item := map[string]*dynamodb.AttributeValue{
 		hashKey:  {S: aws.String(hashValue)},
@@ -382,22 +650,74 @@ func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 	})
 }
 
-type dynamoDBReadBatch []map[string]*dynamodb.AttributeValue
-
-func (b dynamoDBReadBatch) Len() int {
-	return len(b)
+// Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
+func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
+	outLen, inLen := b.Len(), from.Len()
+	toFill := inLen
+	if max > 0 {
+		toFill = util.Min(inLen, max-outLen)
+	}
+	for toFill > 0 {
+		for tableName, fromReqs := range from {
+			taken := util.Min(len(fromReqs), toFill)
+			if taken > 0 {
+				b[tableName] = append(b[tableName], fromReqs[:taken]...)
+				from[tableName] = fromReqs[taken:]
+				toFill -= taken
+			}
+		}
+	}
 }
 
-func (b dynamoDBReadBatch) RangeValue(i int) []byte {
-	return b[i][rangeKey].B
+type dynamoDBReadRequest map[string]*dynamodb.KeysAndAttributes
+
+func (b dynamoDBReadRequest) Len() int {
+	result := 0
+	for _, reqs := range b {
+		result += len(reqs.Keys)
+	}
+	return result
 }
 
-func (b dynamoDBReadBatch) Value(i int) []byte {
-	chunkValue, ok := b[i][valueKey]
+func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte) {
+	requests, ok := b[tableName]
 	if !ok {
-		return nil
+		requests = &dynamodb.KeysAndAttributes{
+			AttributesToGet: []*string{aws.String(valueKey)},
+			ConsistentRead:  aws.Bool(true),
+		}
+		b[tableName] = requests
+	}
+	requests.Keys = append(requests.Keys, map[string]*dynamodb.AttributeValue{
+		hashKey:  {S: aws.String(hashValue)},
+		rangeKey: {B: rangeValue},
+	})
+}
+
+// Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
+func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
+	outLen, inLen := b.Len(), from.Len()
+	toFill := inLen
+	if max > 0 {
+		toFill = util.Min(inLen, max-outLen)
+	}
+	for toFill > 0 {
+		for tableName, fromReqs := range from {
+			taken := util.Min(len(fromReqs.Keys), toFill)
+			if taken > 0 {
+				if _, ok := b[tableName]; !ok {
+					b[tableName] = &dynamodb.KeysAndAttributes{
+						AttributesToGet: []*string{aws.String(valueKey)},
+						ConsistentRead:  aws.Bool(true),
+					}
+				}
+
+				b[tableName].Keys = append(b[tableName].Keys, fromReqs.Keys[:taken]...)
+				from[tableName].Keys = fromReqs.Keys[taken:]
+				toFill -= taken
+			}
+		}
 	}
-	return chunkValue.B
 }
 
 func nextBackoff(lastBackoff time.Duration) time.Duration {
@@ -418,33 +738,6 @@ func recordDynamoError(tableName string, err error) {
 	}
 }
 
-func dictLen(b map[string][]*dynamodb.WriteRequest) int {
-	result := 0
-	for _, reqs := range b {
-		result += len(reqs)
-	}
-	return result
-}
-
-// Fill 'to' with WriteRequests from 'from' until 'to' has at most max requests. Remove those requests from 'from'.
-func takeReqs(from, to map[string][]*dynamodb.WriteRequest, max int) {
-	outLen, inLen := dictLen(to), dictLen(from)
-	toFill := inLen
-	if max > 0 {
-		toFill = util.Min(inLen, max-outLen)
-	}
-	for toFill > 0 {
-		for tableName, fromReqs := range from {
-			taken := util.Min(len(fromReqs), toFill)
-			if taken > 0 {
-				to[tableName] = append(to[tableName], fromReqs[:taken]...)
-				from[tableName] = fromReqs[taken:]
-				toFill -= taken
-			}
-		}
-	}
-}
-
 // dynamoClientFromURL creates a new DynamoDB client from a URL.
 func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
 	if awsURL == nil {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 640582b1b8ff0..ba81bc151d072 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -3,20 +3,29 @@ package chunk
 import (
 	"bytes"
 	"fmt"
+	"io/ioutil"
+	"math/rand"
 	"net/url"
 	"sort"
+	"strconv"
 	"sync"
 	"testing"
+	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/request"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/aws/aws-sdk-go/service/s3"
+	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"golang.org/x/net/context"
+
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 type mockDynamoDBClient struct {
@@ -29,7 +38,8 @@ type mockDynamoDBClient struct {
 }
 
 type mockDynamoDBTable struct {
-	items map[string][]mockDynamoDBItem
+	items       map[string][]mockDynamoDBItem
+	read, write int64
 }
 
 type mockDynamoDBItem map[string]*dynamodb.AttributeValue
@@ -66,7 +76,7 @@ func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dyn
 	for tableName, writeRequests := range input.RequestItems {
 		table, ok := m.tables[tableName]
 		if !ok {
-			return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("table not found")
+			return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("table not found: %s", tableName)
 		}
 
 		for _, writeRequest := range writeRequests {
@@ -99,6 +109,57 @@ func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dyn
 	return resp, nil
 }
 
+func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynamodb.BatchGetItemInput, _ ...request.Option) (*dynamodb.BatchGetItemOutput, error) {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	resp := &dynamodb.BatchGetItemOutput{
+		Responses:       map[string][]map[string]*dynamodb.AttributeValue{},
+		UnprocessedKeys: map[string]*dynamodb.KeysAndAttributes{},
+	}
+
+	if m.provisionedErr > 0 {
+		m.provisionedErr--
+		return resp, awserr.New(provisionedThroughputExceededException, "", nil)
+	}
+
+	for tableName, readRequests := range input.RequestItems {
+		table, ok := m.tables[tableName]
+		if !ok {
+			return &dynamodb.BatchGetItemOutput{}, fmt.Errorf("table not found")
+		}
+
+		unprocessed := &dynamodb.KeysAndAttributes{
+			AttributesToGet:          readRequests.AttributesToGet,
+			ConsistentRead:           readRequests.ConsistentRead,
+			ExpressionAttributeNames: readRequests.ExpressionAttributeNames,
+		}
+		for _, readRequest := range readRequests.Keys {
+			if m.unprocessed > 0 {
+				m.unprocessed--
+				unprocessed.Keys = append(unprocessed.Keys, readRequest)
+				resp.UnprocessedKeys[tableName] = unprocessed
+				continue
+			}
+
+			hashValue := *readRequest[hashKey].S
+			rangeValue := readRequest[rangeKey].B
+			items := table.items[hashValue]
+
+			// insert in order
+			i := sort.Search(len(items), func(i int) bool {
+				return bytes.Compare(items[i][rangeKey].B, rangeValue) >= 0
+			})
+			if i >= len(items) || !bytes.Equal(items[i][rangeKey].B, rangeValue) {
+				return &dynamodb.BatchGetItemOutput{}, fmt.Errorf("Couldn't find ite,")
+			}
+
+			resp.Responses[tableName] = append(resp.Responses[tableName], items[i])
+		}
+	}
+	return resp, nil
+}
+
 func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
 	result := &dynamodb.QueryOutput{
 		Items: []map[string]*dynamodb.AttributeValue{},
@@ -172,7 +233,110 @@ func (m *dynamoDBMockRequest) HasNextPage() bool {
 	return false
 }
 
-func TestDynamoDBClient(t *testing.T) {
+type mockDynamoDBTableClient struct {
+	*mockDynamoDBClient
+}
+
+func (m *mockDynamoDBTableClient) ListTables(_ context.Context) ([]string, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	var tableNames []string
+	for tableName := range m.tables {
+		func(tableName string) {
+			tableNames = append(tableNames, tableName)
+		}(tableName)
+	}
+	return tableNames, nil
+}
+
+// CreateTable implements StorageClient.
+func (m *mockDynamoDBTableClient) CreateTable(_ context.Context, name string, read, write int64) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	if _, ok := m.tables[name]; ok {
+		return fmt.Errorf("table already exists")
+	}
+
+	m.tables[name] = &mockDynamoDBTable{
+		items: map[string][]mockDynamoDBItem{},
+		write: write,
+		read:  read,
+	}
+
+	return nil
+}
+
+// DescribeTable implements StorageClient.
+func (m *mockDynamoDBTableClient) DescribeTable(_ context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	table, ok := m.tables[name]
+	if !ok {
+		return 0, 0, "", fmt.Errorf("not found")
+	}
+
+	return table.read, table.write, dynamodb.TableStatusActive, nil
+}
+
+// UpdateTable implements StorageClient.
+func (m *mockDynamoDBTableClient) UpdateTable(_ context.Context, name string, readCapacity, writeCapacity int64) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	table, ok := m.tables[name]
+	if !ok {
+		return fmt.Errorf("not found")
+	}
+
+	table.read = readCapacity
+	table.write = writeCapacity
+
+	return nil
+}
+
+type mockS3 struct {
+	s3iface.S3API
+	sync.RWMutex
+	objects map[string][]byte
+}
+
+func newMockS3() *mockS3 {
+	return &mockS3{
+		objects: map[string][]byte{},
+	}
+}
+
+func (m *mockS3) PutObjectWithContext(_ aws.Context, req *s3.PutObjectInput, _ ...request.Option) (*s3.PutObjectOutput, error) {
+	m.Lock()
+	defer m.Unlock()
+
+	buf, err := ioutil.ReadAll(req.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	m.objects[*req.Key] = buf
+	return &s3.PutObjectOutput{}, nil
+}
+
+func (m *mockS3) GetObjectWithContext(_ aws.Context, req *s3.GetObjectInput, _ ...request.Option) (*s3.GetObjectOutput, error) {
+	m.RLock()
+	defer m.RUnlock()
+
+	buf, ok := m.objects[*req.Key]
+	if !ok {
+		return nil, fmt.Errorf("Not found")
+	}
+
+	return &s3.GetObjectOutput{
+		Body: ioutil.NopCloser(bytes.NewReader(buf)),
+	}, nil
+}
+
+func TestAWSStorageClient(t *testing.T) {
 	dynamoDB := newMockDynamoDB(0, 0)
 	client := awsStorageClient{
 		DynamoDB:       dynamoDB,
@@ -194,9 +358,9 @@ func TestDynamoDBClient(t *testing.T) {
 		}
 		var have []IndexEntry
 		err := client.QueryPages(context.Background(), entry, func(read ReadBatch, lastPage bool) bool {
-			for i := 0; i < read.Len(); i++ {
+			for j := 0; j < read.Len(); j++ {
 				have = append(have, IndexEntry{
-					RangeValue: read.RangeValue(i),
+					RangeValue: read.RangeValue(j),
 				})
 			}
 			return !lastPage
@@ -208,13 +372,88 @@ func TestDynamoDBClient(t *testing.T) {
 	}
 }
 
-func TestDynamoDBClientQueryPages(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	client := awsStorageClient{
-		DynamoDB:       dynamoDB,
-		queryRequestFn: dynamoDB.queryRequest,
+func TestAWSStorageClientChunks(t *testing.T) {
+	t.Run("S3 chunks", func(t *testing.T) {
+		dynamoDB := newMockDynamoDB(0, 0)
+		client := awsStorageClient{
+			DynamoDB:       dynamoDB,
+			S3:             newMockS3(),
+			queryRequestFn: dynamoDB.queryRequest,
+		}
+
+		testStorageClientChunks(t, client)
+	})
+
+	t.Run("DynamoDB chunks", func(t *testing.T) {
+		dynamoDB := newMockDynamoDB(0, 0)
+		periodicChunkTableConfig := PeriodicChunkTableConfig{
+			ChunkTableFrom:   util.NewDayValue(model.Now()),
+			ChunkTablePeriod: 1 * time.Minute,
+			ChunkTablePrefix: "chunks",
+		}
+		tableManager, err := NewTableManager(TableManagerConfig{
+			PeriodicChunkTableConfig: periodicChunkTableConfig,
+		}, &mockDynamoDBTableClient{dynamoDB})
+		require.NoError(t, err)
+		err = tableManager.syncTables(context.Background())
+		require.NoError(t, err)
+
+		client := awsStorageClient{
+			DynamoDB:       dynamoDB,
+			queryRequestFn: dynamoDB.queryRequest,
+			cfg: AWSStorageConfig{
+				PeriodicChunkTableConfig: periodicChunkTableConfig,
+			},
+		}
+
+		testStorageClientChunks(t, client)
+	})
+}
+
+func testStorageClientChunks(t *testing.T, client StorageClient) {
+	const batchSize = 50
+
+	// Write a few batches of chunks.
+	written := []string{}
+	for i := 0; i < 50; i++ {
+		chunks := []Chunk{}
+		for j := 0; j < batchSize; j++ {
+			chunk := dummyChunkFor(model.Metric{
+				model.MetricNameLabel: "foo",
+				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
+			})
+			chunks = append(chunks, chunk)
+			_, err := chunk.encode() // Need to encode it, side effect calculates crc
+			require.NoError(t, err)
+			written = append(written, chunk.externalKey())
+		}
+		err := client.PutChunks(context.Background(), chunks)
+		require.NoError(t, err)
 	}
 
+	// Get a few batches of chunks.
+	for i := 0; i < 50; i++ {
+		chunksToGet := []Chunk{}
+		for j := 0; j < batchSize; j++ {
+			key := written[rand.Intn(len(written))]
+			chunk, err := parseNewExternalKey(key)
+			require.NoError(t, err)
+			chunksToGet = append(chunksToGet, chunk)
+		}
+
+		chunksWeGot, err := client.GetChunks(context.Background(), chunksToGet)
+		require.NoError(t, err)
+
+		sort.Sort(ByKey(chunksToGet))
+		sort.Sort(ByKey(chunksWeGot))
+		require.Equal(t, len(chunksToGet), len(chunksWeGot))
+		for j := 0; j < len(chunksWeGot); j++ {
+			require.Equal(t, chunksToGet[i].externalKey(), chunksWeGot[i].externalKey())
+		}
+	}
+}
+
+func TestAWSStorageClientQueryPages(t *testing.T) {
 	entries := []IndexEntry{
 		{
 			TableName:  "table",
@@ -309,19 +548,25 @@ func TestDynamoDBClientQueryPages(t *testing.T) {
 		},
 	}
 
-	batch := client.NewWriteBatch()
-	for _, entry := range entries {
-		batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
-	}
-	dynamoDB.createTable("table")
-
-	err := client.BatchWrite(context.Background(), batch)
-	require.NoError(t, err)
-
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
+			dynamoDB := newMockDynamoDB(0, 0)
+			client := awsStorageClient{
+				DynamoDB:       dynamoDB,
+				queryRequestFn: dynamoDB.queryRequest,
+			}
+
+			batch := client.NewWriteBatch()
+			for _, entry := range entries {
+				batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+			}
+			dynamoDB.createTable("table")
+
+			err := client.BatchWrite(context.Background(), batch)
+			require.NoError(t, err)
+
 			var have []IndexEntry
-			err := client.QueryPages(context.Background(), tt.query, func(read ReadBatch, lastPage bool) bool {
+			err = client.QueryPages(context.Background(), tt.query, func(read ReadBatch, lastPage bool) bool {
 				for i := 0; i < read.Len(); i++ {
 					have = append(have, IndexEntry{
 						TableName:  tt.query.TableName,
diff --git a/chunk_store.go b/chunk_store.go
index 5248f4ebd3fbe..a84ee193bfb82 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -97,19 +97,7 @@ func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
 		return err
 	}
 
-	// Encode the chunk first - checksum is calculated as a side effect.
-	bufs := [][]byte{}
-	keys := []string{}
-	for i := range chunks {
-		encoded, err := chunks[i].encode()
-		if err != nil {
-			return err
-		}
-		bufs = append(bufs, encoded)
-		keys = append(keys, chunks[i].externalKey())
-	}
-
-	err = c.putChunks(ctx, keys, bufs)
+	err = c.storage.PutChunks(ctx, chunks)
 	if err != nil {
 		return err
 	}
@@ -117,38 +105,6 @@ func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
 	return c.updateIndex(ctx, userID, chunks)
 }
 
-// putChunks writes a collection of chunks to S3 in parallel.
-func (c *Store) putChunks(ctx context.Context, keys []string, bufs [][]byte) error {
-	incomingErrors := make(chan error)
-	for i := range bufs {
-		go func(i int) {
-			incomingErrors <- c.putChunk(ctx, keys[i], bufs[i])
-		}(i)
-	}
-
-	var lastErr error
-	for range keys {
-		err := <-incomingErrors
-		if err != nil {
-			lastErr = err
-		}
-	}
-	return lastErr
-}
-
-// putChunk puts a chunk into S3.
-func (c *Store) putChunk(ctx context.Context, key string, buf []byte) error {
-	err := c.storage.PutChunk(ctx, key, buf)
-	if err != nil {
-		return err
-	}
-
-	if err := c.cache.StoreChunk(ctx, key, buf); err != nil {
-		log.Warnf("Could not store %v in chunk cache: %v", key, err)
-	}
-	return nil
-}
-
 func (c *Store) updateIndex(ctx context.Context, userID string, chunks []Chunk) error {
 	writeReqs, err := c.calculateDynamoWrites(userID, chunks)
 	if err != nil {
@@ -223,7 +179,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 		log.Warnf("Error fetching from cache: %v", err)
 	}
 
-	fromS3, err := c.fetchChunkData(ctx, missing)
+	fromS3, err := c.storage.GetChunks(ctx, missing)
 	if err != nil {
 		return nil, promql.ErrStorage(err)
 	}
@@ -480,40 +436,6 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 	return unique(chunkSet), nil
 }
 
-func (c *Store) fetchChunkData(ctx context.Context, chunkSet []Chunk) ([]Chunk, error) {
-	incomingChunks := make(chan Chunk)
-	incomingErrors := make(chan error)
-	for _, chunk := range chunkSet {
-		go func(chunk Chunk) {
-			buf, err := c.storage.GetChunk(ctx, chunk.externalKey())
-			if err != nil {
-				incomingErrors <- err
-				return
-			}
-			if err := chunk.decode(buf); err != nil {
-				incomingErrors <- err
-				return
-			}
-			incomingChunks <- chunk
-		}(chunk)
-	}
-
-	chunks := []Chunk{}
-	errors := []error{}
-	for i := 0; i < len(chunkSet); i++ {
-		select {
-		case chunk := <-incomingChunks:
-			chunks = append(chunks, chunk)
-		case err := <-incomingErrors:
-			errors = append(errors, err)
-		}
-	}
-	if len(errors) > 0 {
-		return nil, errors[0]
-	}
-	return chunks, nil
-}
-
 func (c *Store) writeBackCache(_ context.Context, chunks []Chunk) error {
 	for i := range chunks {
 		encoded, err := chunks[i].encode()
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 933dfd28d7f47..d3a5a882e5995 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -125,12 +125,6 @@ func TestChunkStore(t *testing.T) {
 				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
 				require.NoError(t, err)
 
-				// Zero out the checksums, as the inputs above didn't have the checksums calculated
-				for i := range chunks {
-					chunks[i].Checksum = 0
-					chunks[i].ChecksumSet = false
-				}
-
 				if !reflect.DeepEqual(tc.expect, chunks) {
 					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
 				}
@@ -258,12 +252,6 @@ func TestChunkStoreMetricNames(t *testing.T) {
 				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
 				require.NoError(t, err)
 
-				// Zero out the checksums, as the inputs above didn't have the checksums calculated
-				for i := range chunks {
-					chunks[i].Checksum = 0
-					chunks[i].ChecksumSet = false
-				}
-
 				if !reflect.DeepEqual(tc.expect, chunks) {
 					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
 				}
diff --git a/chunk_test.go b/chunk_test.go
index 5b71b68579548..50d76b0113d49 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -31,6 +31,11 @@ func dummyChunkFor(metric model.Metric) Chunk {
 		now.Add(-time.Hour),
 		now,
 	)
+	// Force checksum calculation.
+	_, err := chunk.encode()
+	if err != nil {
+		panic(err)
+	}
 	return chunk
 }
 
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index e1458f7c3228d..f113476e3a69e 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -229,26 +229,39 @@ func (m *MockStorage) QueryPages(_ context.Context, query IndexQuery, callback f
 	return nil
 }
 
-// PutChunk implements S3Client.
-func (m *MockStorage) PutChunk(_ context.Context, key string, buf []byte) error {
+// PutChunks implements StorageClient.
+func (m *MockStorage) PutChunks(_ context.Context, chunks []Chunk) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
-	m.objects[key] = buf
+	for i := range chunks {
+		buf, err := chunks[i].encode()
+		if err != nil {
+			return err
+		}
+		m.objects[chunks[i].externalKey()] = buf
+	}
 	return nil
 }
 
-// GetChunk implements S3Client.
-func (m *MockStorage) GetChunk(_ context.Context, key string) ([]byte, error) {
+// GetChunks implements StorageClient.
+func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk, error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
-	buf, ok := m.objects[key]
-	if !ok {
-		return nil, fmt.Errorf("%v not found", key)
+	result := []Chunk{}
+	for _, chunk := range chunkSet {
+		key := chunk.externalKey()
+		buf, ok := m.objects[key]
+		if !ok {
+			return nil, fmt.Errorf("%v not found", key)
+		}
+		if err := chunk.decode(buf); err != nil {
+			return nil, err
+		}
+		result = append(result, chunk)
 	}
-
-	return buf, nil
+	return result, nil
 }
 
 type mockWriteBatch []struct {
diff --git a/storage_client.go b/storage_client.go
index 3010079887edd..ab767035018f0 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -19,8 +19,8 @@ type StorageClient interface {
 	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error
 
 	// For storing and retrieving chunks.
-	PutChunk(ctx context.Context, key string, data []byte) error
-	GetChunk(ctx context.Context, key string) ([]byte, error)
+	PutChunks(ctx context.Context, chunks []Chunk) error
+	GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error)
 }
 
 // WriteBatch represents a batch of writes.
diff --git a/table_manager.go b/table_manager.go
index bf8bebbb33fb1..6914eb8526294 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -12,6 +12,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
+	"github.com/prometheus/common/model"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
@@ -47,6 +48,7 @@ type TableManagerConfig struct {
 	DynamoDBPollInterval time.Duration
 
 	PeriodicTableConfig
+	PeriodicChunkTableConfig
 
 	// duration a table will be created before it is needed.
 	CreationGracePeriod        time.Duration
@@ -55,6 +57,11 @@ type TableManagerConfig struct {
 	ProvisionedReadThroughput  int64
 	InactiveWriteThroughput    int64
 	InactiveReadThroughput     int64
+
+	ChunkTableProvisionedWriteThroughput int64
+	ChunkTableProvisionedReadThroughput  int64
+	ChunkTableInactiveWriteThroughput    int64
+	ChunkTableInactiveReadThroughput     int64
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -66,8 +73,13 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Int64Var(&cfg.ProvisionedReadThroughput, "dynamodb.periodic-table.read-throughput", 300, "DynamoDB periodic tables read throughput")
 	f.Int64Var(&cfg.InactiveWriteThroughput, "dynamodb.periodic-table.inactive-write-throughput", 1, "DynamoDB periodic tables write throughput for inactive tables.")
 	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables")
+	f.Int64Var(&cfg.ChunkTableProvisionedWriteThroughput, "dynamodb.chunk-table.write-throughput", 3000, "DynamoDB chunk tables write throughput")
+	f.Int64Var(&cfg.ChunkTableProvisionedReadThroughput, "dynamodb.chunk-table.read-throughput", 300, "DynamoDB chunk tables read throughput")
+	f.Int64Var(&cfg.ChunkTableInactiveWriteThroughput, "dynamodb.chunk-table.inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
+	f.Int64Var(&cfg.ChunkTableInactiveReadThroughput, "dynamodb.chunk-table.inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables")
 
 	cfg.PeriodicTableConfig.RegisterFlags(f)
+	cfg.PeriodicChunkTableConfig.RegisterFlags(f)
 }
 
 // PeriodicTableConfig for the use of periodic tables (ie, weekly tables).  Can
@@ -90,6 +102,20 @@ func (cfg *PeriodicTableConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.PeriodicTableStartAt, "dynamodb.periodic-table.start", "DynamoDB periodic tables start time.")
 }
 
+// PeriodicChunkTableConfig contains the various parameters for the chunk table.
+type PeriodicChunkTableConfig struct {
+	ChunkTableFrom   util.DayValue
+	ChunkTablePrefix string
+	ChunkTablePeriod time.Duration
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *PeriodicChunkTableConfig) RegisterFlags(f *flag.FlagSet) {
+	f.Var(&cfg.ChunkTableFrom, "dynamodb.chunk-table.from", "Date after which to write chunks to DynamoDB.")
+	f.StringVar(&cfg.ChunkTablePrefix, "dynamodb.chunk-table.prefix", "cortex_chunks_", "DynamoDB table prefix for period chunk tables.")
+	f.DurationVar(&cfg.ChunkTablePeriod, "dynamodb.chunk-table.period", 7*24*time.Hour, "DynamoDB chunk tables period.")
+}
+
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	client TableClient
@@ -147,7 +173,7 @@ func (m *TableManager) loop() {
 
 func (m *TableManager) syncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
-	log.Infof("Expecting %d tables", len(expected))
+	log.Infof("Expecting %d tables: %+v", len(expected), expected)
 
 	toCreate, toCheckThroughput, err := m.partitionTables(ctx, expected)
 	if err != nil {
@@ -174,60 +200,83 @@ func (a byName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a byName) Less(i, j int) bool { return a[i].name < a[j].name }
 
 func (m *TableManager) calculateExpectedTables() []tableDescription {
-	if !m.cfg.UsePeriodicTables {
-		return []tableDescription{
-			{
-				name:             m.cfg.OriginalTableName,
-				provisionedRead:  m.cfg.ProvisionedReadThroughput,
-				provisionedWrite: m.cfg.ProvisionedWriteThroughput,
-			},
-		}
-	}
-
 	result := []tableDescription{}
 
-	var (
-		tablePeriodSecs = int64(m.cfg.TablePeriod / time.Second)
-		gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
-		maxChunkAgeSecs = int64(m.cfg.MaxChunkAge / time.Second)
-		firstTable      = m.cfg.PeriodicTableStartAt.Unix() / tablePeriodSecs
-		lastTable       = (mtime.Now().Unix() + gracePeriodSecs) / tablePeriodSecs
-		now             = mtime.Now().Unix()
-	)
-
 	// Add the legacy table
-	{
-		legacyTable := tableDescription{
-			name:             m.cfg.OriginalTableName,
-			provisionedRead:  m.cfg.InactiveReadThroughput,
-			provisionedWrite: m.cfg.InactiveWriteThroughput,
-		}
+	legacyTable := tableDescription{
+		name:             m.cfg.OriginalTableName,
+		provisionedRead:  m.cfg.InactiveReadThroughput,
+		provisionedWrite: m.cfg.InactiveWriteThroughput,
+	}
 
+	if m.cfg.UsePeriodicTables {
 		// if we are before the switch to periodic table, we need to give this table write throughput
+		var (
+			tablePeriodSecs = int64(m.cfg.TablePeriod / time.Second)
+			gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
+			maxChunkAgeSecs = int64(m.cfg.MaxChunkAge / time.Second)
+			firstTable      = m.cfg.PeriodicTableStartAt.Unix() / tablePeriodSecs
+			now             = mtime.Now().Unix()
+		)
+
 		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
 			legacyTable.provisionedRead = m.cfg.ProvisionedReadThroughput
 			legacyTable.provisionedWrite = m.cfg.ProvisionedWriteThroughput
 		}
-		result = append(result, legacyTable)
+	}
+	result = append(result, legacyTable)
+
+	if m.cfg.UsePeriodicTables {
+		result = append(result, periodicTables(
+			m.cfg.TablePrefix, m.cfg.PeriodicTableStartAt.Time, m.cfg.TablePeriod,
+			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
+			m.cfg.ProvisionedReadThroughput, m.cfg.ProvisionedWriteThroughput,
+			m.cfg.InactiveReadThroughput, m.cfg.InactiveWriteThroughput,
+		)...)
 	}
 
+	if m.cfg.ChunkTableFrom.IsSet() {
+		result = append(result, periodicTables(
+			m.cfg.ChunkTablePrefix, m.cfg.ChunkTableFrom.Time, m.cfg.ChunkTablePeriod,
+			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
+			m.cfg.ChunkTableProvisionedReadThroughput, m.cfg.ChunkTableProvisionedWriteThroughput,
+			m.cfg.ChunkTableInactiveReadThroughput, m.cfg.ChunkTableInactiveWriteThroughput,
+		)...)
+	}
+
+	sort.Sort(byName(result))
+	return result
+}
+
+func periodicTables(
+	prefix string, start model.Time, period, beginGrace, endGrace time.Duration,
+	activeRead, activeWrite, inactiveRead, inactiveWrite int64,
+) []tableDescription {
+	var (
+		periodSecs     = int64(period / time.Second)
+		beginGraceSecs = int64(beginGrace / time.Second)
+		endGraceSecs   = int64(endGrace / time.Second)
+		firstTable     = start.Unix() / periodSecs
+		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
+		now            = mtime.Now().Unix()
+		result         = []tableDescription{}
+	)
 	for i := firstTable; i <= lastTable; i++ {
 		table := tableDescription{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
-			name:             m.cfg.TablePrefix + strconv.Itoa(int(i)),
-			provisionedRead:  m.cfg.InactiveReadThroughput,
-			provisionedWrite: m.cfg.InactiveWriteThroughput,
+			name:             prefix + strconv.Itoa(int(i)),
+			provisionedRead:  inactiveRead,
+			provisionedWrite: inactiveWrite,
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
-		if (i*tablePeriodSecs)-gracePeriodSecs <= now && now < (i*tablePeriodSecs)+tablePeriodSecs+gracePeriodSecs+maxChunkAgeSecs {
-			table.provisionedRead = m.cfg.ProvisionedReadThroughput
-			table.provisionedWrite = m.cfg.ProvisionedWriteThroughput
+		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
+			table.provisionedRead = activeRead
+			table.provisionedWrite = activeWrite
 		}
 		result = append(result, table)
 	}
-
-	sort.Sort(byName(result))
+	log.Infof("periodicTables: %+v", result)
 	return result
 }
 
diff --git a/table_manager_test.go b/table_manager_test.go
index 9e03ce85d22b5..0b80724e1566e 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -13,14 +13,15 @@ import (
 )
 
 const (
-	tablePrefix   = "cortex_"
-	tablePeriod   = 7 * 24 * time.Hour
-	gracePeriod   = 15 * time.Minute
-	maxChunkAge   = 12 * time.Hour
-	inactiveWrite = 1
-	inactiveRead  = 2
-	write         = 200
-	read          = 100
+	tablePrefix      = "cortex_"
+	chunkTablePrefix = "chunks_"
+	tablePeriod      = 7 * 24 * time.Hour
+	gracePeriod      = 15 * time.Minute
+	maxChunkAge      = 12 * time.Hour
+	inactiveWrite    = 1
+	inactiveRead     = 2
+	write            = 200
+	read             = 100
 )
 
 func TestTableManager(t *testing.T) {
@@ -28,20 +29,28 @@ func TestTableManager(t *testing.T) {
 
 	cfg := TableManagerConfig{
 		PeriodicTableConfig: PeriodicTableConfig{
-			UsePeriodicTables: true,
-			TablePrefix:       tablePrefix,
-			TablePeriod:       tablePeriod,
-			PeriodicTableStartAt: util.DayValue{
-				Time: model.TimeFromUnix(0),
-			},
+			UsePeriodicTables:    true,
+			TablePrefix:          tablePrefix,
+			TablePeriod:          tablePeriod,
+			PeriodicTableStartAt: util.NewDayValue(model.TimeFromUnix(0)),
 		},
 
-		CreationGracePeriod:        gracePeriod,
-		MaxChunkAge:                maxChunkAge,
-		ProvisionedWriteThroughput: write,
-		ProvisionedReadThroughput:  read,
-		InactiveWriteThroughput:    inactiveWrite,
-		InactiveReadThroughput:     inactiveRead,
+		PeriodicChunkTableConfig: PeriodicChunkTableConfig{
+			ChunkTablePrefix: chunkTablePrefix,
+			ChunkTablePeriod: tablePeriod,
+			ChunkTableFrom:   util.NewDayValue(model.TimeFromUnix(0)),
+		},
+
+		CreationGracePeriod:                  gracePeriod,
+		MaxChunkAge:                          maxChunkAge,
+		ProvisionedWriteThroughput:           write,
+		ProvisionedReadThroughput:            read,
+		InactiveWriteThroughput:              inactiveWrite,
+		InactiveReadThroughput:               inactiveRead,
+		ChunkTableProvisionedWriteThroughput: write,
+		ChunkTableProvisionedReadThroughput:  read,
+		ChunkTableInactiveWriteThroughput:    inactiveWrite,
+		ChunkTableInactiveReadThroughput:     inactiveRead,
 	}
 	tableManager, err := NewTableManager(cfg, dynamoDB)
 	if err != nil {
@@ -66,6 +75,7 @@ func TestTableManager(t *testing.T) {
 		[]tableDescription{
 			{name: "", provisionedRead: read, provisionedWrite: write},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -76,6 +86,7 @@ func TestTableManager(t *testing.T) {
 		[]tableDescription{
 			{name: "", provisionedRead: read, provisionedWrite: write},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -86,6 +97,7 @@ func TestTableManager(t *testing.T) {
 		[]tableDescription{
 			{name: "", provisionedRead: read, provisionedWrite: write},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -96,6 +108,7 @@ func TestTableManager(t *testing.T) {
 		[]tableDescription{
 			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -107,6 +120,8 @@ func TestTableManager(t *testing.T) {
 			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -118,6 +133,8 @@ func TestTableManager(t *testing.T) {
 			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
 			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -129,6 +146,8 @@ func TestTableManager(t *testing.T) {
 			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 
@@ -140,6 +159,8 @@ func TestTableManager(t *testing.T) {
 			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
 			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+			{name: chunkTablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
+			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
 		},
 	)
 }

From 077ff5c618933c1edc87084eb36924d6f8b3f7ed Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 19 May 2017 11:49:42 +0100
Subject: [PATCH 004/660] Fix "Error: Got response from DynamoDB with no hash
 key" (#432)

* Tickly bug with unit test; only return AttributesToGet.

* And fix bug by asking for the haskey too.
---
 aws_storage_client.go      | 14 ++++++++++----
 aws_storage_client_test.go |  7 ++++++-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 4840eb59866a8..4798f088740f4 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -683,8 +683,11 @@ func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte)
 	requests, ok := b[tableName]
 	if !ok {
 		requests = &dynamodb.KeysAndAttributes{
-			AttributesToGet: []*string{aws.String(valueKey)},
-			ConsistentRead:  aws.Bool(true),
+			AttributesToGet: []*string{
+				aws.String(hashKey),
+				aws.String(valueKey),
+			},
+			ConsistentRead: aws.Bool(true),
 		}
 		b[tableName] = requests
 	}
@@ -707,8 +710,11 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 			if taken > 0 {
 				if _, ok := b[tableName]; !ok {
 					b[tableName] = &dynamodb.KeysAndAttributes{
-						AttributesToGet: []*string{aws.String(valueKey)},
-						ConsistentRead:  aws.Bool(true),
+						AttributesToGet: []*string{
+							aws.String(hashKey),
+							aws.String(valueKey),
+						},
+						ConsistentRead: aws.Bool(true),
 					}
 				}
 
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index ba81bc151d072..3101179f2d990 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -154,7 +154,12 @@ func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynam
 				return &dynamodb.BatchGetItemOutput{}, fmt.Errorf("Couldn't find ite,")
 			}
 
-			resp.Responses[tableName] = append(resp.Responses[tableName], items[i])
+			// Only return AttributesToGet!
+			item := map[string]*dynamodb.AttributeValue{}
+			for _, key := range readRequests.AttributesToGet {
+				item[*key] = items[i][*key]
+			}
+			resp.Responses[tableName] = append(resp.Responses[tableName], item)
 		}
 	}
 	return resp, nil

From 0ee2ba980398a2e01a22451fde5715dc35c2d6a7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 23 May 2017 13:15:36 +0100
Subject: [PATCH 005/660] Table Manager can now tag tables it manages. (#436)

* Table Manager can now tag tables it manages.

* Fizx lint and table_manager.go was getting a bit big.

* Rename flag to tags.

* Add basic test.
---
 aws_storage_client_test.go | 116 ++++++++++++----
 dynamodb_table_client.go   | 145 ++++++++++++++++++++
 inmemory_storage_client.go |  28 ++--
 table_client.go            |  42 ++++++
 table_manager.go           | 263 +++++++++++++++----------------------
 table_manager_test.go      | 166 +++++++++++++++--------
 6 files changed, 511 insertions(+), 249 deletions(-)
 create mode 100644 dynamodb_table_client.go
 create mode 100644 table_client.go

diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 3101179f2d990..b25af609bd7f2 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -8,6 +8,7 @@ import (
 	"net/url"
 	"sort"
 	"strconv"
+	"strings"
 	"sync"
 	"testing"
 	"time"
@@ -28,6 +29,8 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
+const arnPrefix = "arn:"
+
 type mockDynamoDBClient struct {
 	dynamodbiface.DynamoDBAPI
 
@@ -40,6 +43,7 @@ type mockDynamoDBClient struct {
 type mockDynamoDBTable struct {
 	items       map[string][]mockDynamoDBItem
 	read, write int64
+	tags        []*dynamodb.Tag
 }
 
 type mockDynamoDBItem map[string]*dynamodb.AttributeValue
@@ -238,68 +242,121 @@ func (m *dynamoDBMockRequest) HasNextPage() bool {
 	return false
 }
 
-type mockDynamoDBTableClient struct {
-	*mockDynamoDBClient
-}
-
-func (m *mockDynamoDBTableClient) ListTables(_ context.Context) ([]string, error) {
+func (m *mockDynamoDBClient) ListTablesPagesWithContext(_ aws.Context, input *dynamodb.ListTablesInput, fn func(*dynamodb.ListTablesOutput, bool) bool, _ ...request.Option) error {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
-	var tableNames []string
+	var tableNames []*string
 	for tableName := range m.tables {
 		func(tableName string) {
-			tableNames = append(tableNames, tableName)
+			tableNames = append(tableNames, &tableName)
 		}(tableName)
 	}
-	return tableNames, nil
+	fn(&dynamodb.ListTablesOutput{
+		TableNames: tableNames,
+	}, true)
+
+	return nil
 }
 
 // CreateTable implements StorageClient.
-func (m *mockDynamoDBTableClient) CreateTable(_ context.Context, name string, read, write int64) error {
+func (m *mockDynamoDBClient) CreateTableWithContext(_ aws.Context, input *dynamodb.CreateTableInput, _ ...request.Option) (*dynamodb.CreateTableOutput, error) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
-	if _, ok := m.tables[name]; ok {
-		return fmt.Errorf("table already exists")
+	if _, ok := m.tables[*input.TableName]; ok {
+		return nil, fmt.Errorf("table already exists")
 	}
 
-	m.tables[name] = &mockDynamoDBTable{
+	m.tables[*input.TableName] = &mockDynamoDBTable{
 		items: map[string][]mockDynamoDBItem{},
-		write: write,
-		read:  read,
+		write: *input.ProvisionedThroughput.WriteCapacityUnits,
+		read:  *input.ProvisionedThroughput.ReadCapacityUnits,
 	}
 
-	return nil
+	return &dynamodb.CreateTableOutput{
+		TableDescription: &dynamodb.TableDescription{
+			TableArn: aws.String(arnPrefix + *input.TableName),
+		},
+	}, nil
 }
 
 // DescribeTable implements StorageClient.
-func (m *mockDynamoDBTableClient) DescribeTable(_ context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+func (m *mockDynamoDBClient) DescribeTableWithContext(_ aws.Context, input *dynamodb.DescribeTableInput, _ ...request.Option) (*dynamodb.DescribeTableOutput, error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
-	table, ok := m.tables[name]
+	table, ok := m.tables[*input.TableName]
 	if !ok {
-		return 0, 0, "", fmt.Errorf("not found")
+		return nil, fmt.Errorf("not found")
 	}
 
-	return table.read, table.write, dynamodb.TableStatusActive, nil
+	return &dynamodb.DescribeTableOutput{
+		Table: &dynamodb.TableDescription{
+			TableName:   input.TableName,
+			TableStatus: aws.String(dynamodb.TableStatusActive),
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughputDescription{
+				ReadCapacityUnits:  aws.Int64(table.read),
+				WriteCapacityUnits: aws.Int64(table.write),
+			},
+			TableArn: aws.String(arnPrefix + *input.TableName),
+		},
+	}, nil
 }
 
 // UpdateTable implements StorageClient.
-func (m *mockDynamoDBTableClient) UpdateTable(_ context.Context, name string, readCapacity, writeCapacity int64) error {
+func (m *mockDynamoDBClient) UpdateTableWithContext(_ aws.Context, input *dynamodb.UpdateTableInput, _ ...request.Option) (*dynamodb.UpdateTableOutput, error) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
-	table, ok := m.tables[name]
+	table, ok := m.tables[*input.TableName]
 	if !ok {
-		return fmt.Errorf("not found")
+		return nil, fmt.Errorf("not found")
 	}
 
-	table.read = readCapacity
-	table.write = writeCapacity
+	table.read = *input.ProvisionedThroughput.ReadCapacityUnits
+	table.write = *input.ProvisionedThroughput.WriteCapacityUnits
 
-	return nil
+	return &dynamodb.UpdateTableOutput{
+		TableDescription: &dynamodb.TableDescription{
+			TableArn: aws.String(arnPrefix + *input.TableName),
+		},
+	}, nil
+}
+
+func (m *mockDynamoDBClient) TagResourceWithContext(_ aws.Context, input *dynamodb.TagResourceInput, _ ...request.Option) (*dynamodb.TagResourceOutput, error) {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	if !strings.HasPrefix(*input.ResourceArn, arnPrefix) {
+		return nil, fmt.Errorf("not an arn: %v", *input.ResourceArn)
+	}
+
+	table, ok := m.tables[strings.TrimPrefix(*input.ResourceArn, arnPrefix)]
+	if !ok {
+		return nil, fmt.Errorf("not found")
+	}
+
+	table.tags = input.Tags
+	return &dynamodb.TagResourceOutput{}, nil
+}
+
+func (m *mockDynamoDBClient) ListTagsOfResourceWithContext(_ aws.Context, input *dynamodb.ListTagsOfResourceInput, _ ...request.Option) (*dynamodb.ListTagsOfResourceOutput, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	if !strings.HasPrefix(*input.ResourceArn, arnPrefix) {
+		return nil, fmt.Errorf("not an arn: %v", *input.ResourceArn)
+	}
+
+	table, ok := m.tables[strings.TrimPrefix(*input.ResourceArn, arnPrefix)]
+	if !ok {
+		return nil, fmt.Errorf("not found")
+	}
+
+	return &dynamodb.ListTagsOfResourceOutput{
+		Tags: table.tags,
+	}, nil
 }
 
 type mockS3 struct {
@@ -396,9 +453,12 @@ func TestAWSStorageClientChunks(t *testing.T) {
 			ChunkTablePeriod: 1 * time.Minute,
 			ChunkTablePrefix: "chunks",
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{
-			PeriodicChunkTableConfig: periodicChunkTableConfig,
-		}, &mockDynamoDBTableClient{dynamoDB})
+		tableManager, err := NewTableManager(
+			TableManagerConfig{
+				PeriodicChunkTableConfig: periodicChunkTableConfig,
+			},
+			&dynamoTableClient{dynamoDB},
+		)
 		require.NoError(t, err)
 		err = tableManager.syncTables(context.Background())
 		require.NoError(t, err)
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
new file mode 100644
index 0000000000000..6f2891364c3bf
--- /dev/null
+++ b/dynamodb_table_client.go
@@ -0,0 +1,145 @@
+package chunk
+
+import (
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/common/instrument"
+)
+
+type dynamoTableClient struct {
+	DynamoDB dynamodbiface.DynamoDBAPI
+}
+
+// NewDynamoDBTableClient makes a new DynamoTableClient.
+func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
+	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
+	if err != nil {
+		return nil, err
+	}
+	return dynamoTableClient{
+		DynamoDB: dynamoDB,
+	}, nil
+}
+
+func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
+	table := []string{}
+	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(ctx context.Context) error {
+		return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
+			for _, s := range resp.TableNames {
+				table = append(table, *s)
+			}
+			return true
+		})
+	})
+	return table, err
+}
+
+func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) error {
+	var tableARN *string
+	if err := instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
+		input := &dynamodb.CreateTableInput{
+			TableName: aws.String(desc.Name),
+			AttributeDefinitions: []*dynamodb.AttributeDefinition{
+				{
+					AttributeName: aws.String(hashKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
+				},
+			},
+			KeySchema: []*dynamodb.KeySchemaElement{
+				{
+					AttributeName: aws.String(hashKey),
+					KeyType:       aws.String(dynamodb.KeyTypeHash),
+				},
+				{
+					AttributeName: aws.String(rangeKey),
+					KeyType:       aws.String(dynamodb.KeyTypeRange),
+				},
+			},
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
+				WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
+			},
+		}
+		output, err := d.DynamoDB.CreateTableWithContext(ctx, input)
+		if err != nil {
+			return err
+		}
+		tableARN = output.TableDescription.TableArn
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+		_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+			ResourceArn: tableARN,
+			Tags:        desc.Tags.AWSTags(),
+		})
+		return err
+	})
+}
+
+func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error) {
+	var tableARN *string
+	err = instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
+		out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
+			TableName: aws.String(name),
+		})
+		desc.Name = name
+		desc.ProvisionedRead = *out.Table.ProvisionedThroughput.ReadCapacityUnits
+		desc.ProvisionedWrite = *out.Table.ProvisionedThroughput.WriteCapacityUnits
+		status = *out.Table.TableStatus
+		tableARN = out.Table.TableArn
+		return err
+	})
+	if err != nil {
+		return
+	}
+
+	err = instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, func(ctx context.Context) error {
+		out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
+			ResourceArn: tableARN,
+		})
+		desc.Tags = make(map[string]string, len(out.Tags))
+		for _, tag := range out.Tags {
+			desc.Tags[*tag.Key] = *tag.Value
+		}
+		return err
+	})
+	return
+}
+
+func (d dynamoTableClient) UpdateTable(ctx context.Context, desc TableDesc) error {
+	var tableARN *string
+	if err := instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
+		out, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
+			TableName: aws.String(desc.Name),
+			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+				ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
+				WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
+			},
+		})
+		if err != nil {
+			return err
+		}
+		tableARN = out.TableDescription.TableArn
+		return nil
+	}); err != nil {
+		return err
+	}
+
+	return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+		_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+			ResourceArn: tableARN,
+			Tags:        desc.Tags.AWSTags(),
+		})
+		return err
+	})
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index f113476e3a69e..ccc1256e80b88 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -51,48 +51,52 @@ func (m *MockStorage) ListTables(_ context.Context) ([]string, error) {
 }
 
 // CreateTable implements StorageClient.
-func (m *MockStorage) CreateTable(_ context.Context, name string, read, write int64) error {
+func (m *MockStorage) CreateTable(_ context.Context, desc TableDesc) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
-	if _, ok := m.tables[name]; ok {
+	if _, ok := m.tables[desc.Name]; ok {
 		return fmt.Errorf("table already exists")
 	}
 
-	m.tables[name] = &mockTable{
+	m.tables[desc.Name] = &mockTable{
 		items: map[string][]mockItem{},
-		write: write,
-		read:  read,
+		write: desc.ProvisionedWrite,
+		read:  desc.ProvisionedRead,
 	}
 
 	return nil
 }
 
 // DescribeTable implements StorageClient.
-func (m *MockStorage) DescribeTable(_ context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
+func (m *MockStorage) DescribeTable(_ context.Context, name string) (desc TableDesc, status string, err error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
 	table, ok := m.tables[name]
 	if !ok {
-		return 0, 0, "", fmt.Errorf("not found")
+		return TableDesc{}, "", fmt.Errorf("not found")
 	}
 
-	return table.read, table.write, dynamodb.TableStatusActive, nil
+	return TableDesc{
+		Name:             name,
+		ProvisionedRead:  table.read,
+		ProvisionedWrite: table.write,
+	}, dynamodb.TableStatusActive, nil
 }
 
 // UpdateTable implements StorageClient.
-func (m *MockStorage) UpdateTable(_ context.Context, name string, readCapacity, writeCapacity int64) error {
+func (m *MockStorage) UpdateTable(_ context.Context, desc TableDesc) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
-	table, ok := m.tables[name]
+	table, ok := m.tables[desc.Name]
 	if !ok {
 		return fmt.Errorf("not found")
 	}
 
-	table.read = readCapacity
-	table.write = writeCapacity
+	table.read = desc.ProvisionedRead
+	table.write = desc.ProvisionedWrite
 
 	return nil
 }
diff --git a/table_client.go b/table_client.go
new file mode 100644
index 0000000000000..8755bd5759278
--- /dev/null
+++ b/table_client.go
@@ -0,0 +1,42 @@
+package chunk
+
+import "golang.org/x/net/context"
+
+// TableClient is a client for telling Dynamo what to do with tables.
+type TableClient interface {
+	ListTables(ctx context.Context) ([]string, error)
+	CreateTable(ctx context.Context, desc TableDesc) error
+	DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error)
+	UpdateTable(ctx context.Context, desc TableDesc) error
+}
+
+// TableDesc describes a table.
+type TableDesc struct {
+	Name             string
+	ProvisionedRead  int64
+	ProvisionedWrite int64
+	Tags             Tags
+}
+
+// Equals returns true if other matches desc.
+func (desc TableDesc) Equals(other TableDesc) bool {
+	if desc.ProvisionedRead != other.ProvisionedRead {
+		return false
+	}
+
+	if desc.ProvisionedWrite != other.ProvisionedWrite {
+		return false
+	}
+
+	if !desc.Tags.Equals(other.Tags) {
+		return false
+	}
+
+	return true
+}
+
+type byName []TableDesc
+
+func (a byName) Len() int           { return len(a) }
+func (a byName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byName) Less(i, j int) bool { return a[i].Name < a[j].Name }
diff --git a/table_manager.go b/table_manager.go
index 6914eb8526294..41b1c1971f434 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -2,14 +2,15 @@ package chunk
 
 import (
 	"flag"
+	"fmt"
 	"sort"
 	"strconv"
+	"strings"
 	"sync"
 	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
-	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
@@ -62,6 +63,66 @@ type TableManagerConfig struct {
 	ChunkTableProvisionedReadThroughput  int64
 	ChunkTableInactiveWriteThroughput    int64
 	ChunkTableInactiveReadThroughput     int64
+
+	TableTags Tags
+}
+
+// Tags is a string-string map that implements flag.Value.
+type Tags map[string]string
+
+// String implements flag.Value
+func (ts Tags) String() string {
+	if ts == nil {
+		return ""
+	}
+
+	return fmt.Sprintf("%v", map[string]string(ts))
+}
+
+// Set implements flag.Value
+func (ts *Tags) Set(s string) error {
+	if ts == nil {
+		*ts = map[string]string{}
+	}
+
+	parts := strings.SplitN(s, "=", 2)
+	if len(parts) != 2 {
+		return fmt.Errorf("tag must of the format key=value")
+	}
+	(*ts)[parts[0]] = parts[1]
+	return nil
+}
+
+// Equals returns true is other matches ts.
+func (ts Tags) Equals(other Tags) bool {
+	if len(ts) != len(other) {
+		return false
+	}
+
+	for k, v1 := range ts {
+		v2, ok := other[k]
+		if !ok || v1 != v2 {
+			return false
+		}
+	}
+
+	return true
+}
+
+// AWSTags converts ts into a []*dynamodb.Tag.
+func (ts Tags) AWSTags() []*dynamodb.Tag {
+	if ts == nil {
+		return nil
+	}
+
+	var result []*dynamodb.Tag
+	for k, v := range ts {
+		result = append(result, &dynamodb.Tag{
+			Key:   aws.String(k),
+			Value: aws.String(v),
+		})
+	}
+	return result
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -69,14 +130,15 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age time before flushing.")
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, "dynamodb.periodic-table.write-throughput", 3000, "DynamoDB periodic tables write throughput")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, "dynamodb.periodic-table.read-throughput", 300, "DynamoDB periodic tables read throughput")
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, "dynamodb.periodic-table.write-throughput", 3000, "DynamoDB periodic tables write throughput.")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, "dynamodb.periodic-table.read-throughput", 300, "DynamoDB periodic tables read throughput.")
 	f.Int64Var(&cfg.InactiveWriteThroughput, "dynamodb.periodic-table.inactive-write-throughput", 1, "DynamoDB periodic tables write throughput for inactive tables.")
-	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables")
-	f.Int64Var(&cfg.ChunkTableProvisionedWriteThroughput, "dynamodb.chunk-table.write-throughput", 3000, "DynamoDB chunk tables write throughput")
-	f.Int64Var(&cfg.ChunkTableProvisionedReadThroughput, "dynamodb.chunk-table.read-throughput", 300, "DynamoDB chunk tables read throughput")
+	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables.")
+	f.Int64Var(&cfg.ChunkTableProvisionedWriteThroughput, "dynamodb.chunk-table.write-throughput", 3000, "DynamoDB chunk tables write throughput.")
+	f.Int64Var(&cfg.ChunkTableProvisionedReadThroughput, "dynamodb.chunk-table.read-throughput", 300, "DynamoDB chunk tables read throughput.")
 	f.Int64Var(&cfg.ChunkTableInactiveWriteThroughput, "dynamodb.chunk-table.inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
-	f.Int64Var(&cfg.ChunkTableInactiveReadThroughput, "dynamodb.chunk-table.inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables")
+	f.Int64Var(&cfg.ChunkTableInactiveReadThroughput, "dynamodb.chunk-table.inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables.")
+	f.Var(&cfg.TableTags, "dynamodb.table.tag", "Tag (of the form key=value) to be added to all tables under management.")
 
 	cfg.PeriodicTableConfig.RegisterFlags(f)
 	cfg.PeriodicChunkTableConfig.RegisterFlags(f)
@@ -187,26 +249,15 @@ func (m *TableManager) syncTables(ctx context.Context) error {
 	return m.updateTables(ctx, toCheckThroughput)
 }
 
-type tableDescription struct {
-	name             string
-	provisionedRead  int64
-	provisionedWrite int64
-}
-
-type byName []tableDescription
-
-func (a byName) Len() int           { return len(a) }
-func (a byName) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-func (a byName) Less(i, j int) bool { return a[i].name < a[j].name }
-
-func (m *TableManager) calculateExpectedTables() []tableDescription {
-	result := []tableDescription{}
+func (m *TableManager) calculateExpectedTables() []TableDesc {
+	result := []TableDesc{}
 
 	// Add the legacy table
-	legacyTable := tableDescription{
-		name:             m.cfg.OriginalTableName,
-		provisionedRead:  m.cfg.InactiveReadThroughput,
-		provisionedWrite: m.cfg.InactiveWriteThroughput,
+	legacyTable := TableDesc{
+		Name:             m.cfg.OriginalTableName,
+		ProvisionedRead:  m.cfg.InactiveReadThroughput,
+		ProvisionedWrite: m.cfg.InactiveWriteThroughput,
+		Tags:             m.cfg.TableTags,
 	}
 
 	if m.cfg.UsePeriodicTables {
@@ -220,14 +271,14 @@ func (m *TableManager) calculateExpectedTables() []tableDescription {
 		)
 
 		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
-			legacyTable.provisionedRead = m.cfg.ProvisionedReadThroughput
-			legacyTable.provisionedWrite = m.cfg.ProvisionedWriteThroughput
+			legacyTable.ProvisionedRead = m.cfg.ProvisionedReadThroughput
+			legacyTable.ProvisionedWrite = m.cfg.ProvisionedWriteThroughput
 		}
 	}
 	result = append(result, legacyTable)
 
 	if m.cfg.UsePeriodicTables {
-		result = append(result, periodicTables(
+		result = append(result, m.periodicTables(
 			m.cfg.TablePrefix, m.cfg.PeriodicTableStartAt.Time, m.cfg.TablePeriod,
 			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
 			m.cfg.ProvisionedReadThroughput, m.cfg.ProvisionedWriteThroughput,
@@ -236,7 +287,7 @@ func (m *TableManager) calculateExpectedTables() []tableDescription {
 	}
 
 	if m.cfg.ChunkTableFrom.IsSet() {
-		result = append(result, periodicTables(
+		result = append(result, m.periodicTables(
 			m.cfg.ChunkTablePrefix, m.cfg.ChunkTableFrom.Time, m.cfg.ChunkTablePeriod,
 			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
 			m.cfg.ChunkTableProvisionedReadThroughput, m.cfg.ChunkTableProvisionedWriteThroughput,
@@ -248,10 +299,10 @@ func (m *TableManager) calculateExpectedTables() []tableDescription {
 	return result
 }
 
-func periodicTables(
+func (m *TableManager) periodicTables(
 	prefix string, start model.Time, period, beginGrace, endGrace time.Duration,
 	activeRead, activeWrite, inactiveRead, inactiveWrite int64,
-) []tableDescription {
+) []TableDesc {
 	var (
 		periodSecs     = int64(period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
@@ -259,20 +310,21 @@ func periodicTables(
 		firstTable     = start.Unix() / periodSecs
 		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
 		now            = mtime.Now().Unix()
-		result         = []tableDescription{}
+		result         = []TableDesc{}
 	)
 	for i := firstTable; i <= lastTable; i++ {
-		table := tableDescription{
+		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
-			name:             prefix + strconv.Itoa(int(i)),
-			provisionedRead:  inactiveRead,
-			provisionedWrite: inactiveWrite,
+			Name:             prefix + strconv.Itoa(int(i)),
+			ProvisionedRead:  inactiveRead,
+			ProvisionedWrite: inactiveWrite,
+			Tags:             m.cfg.TableTags,
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
-			table.provisionedRead = activeRead
-			table.provisionedWrite = activeWrite
+			table.ProvisionedRead = activeRead
+			table.ProvisionedWrite = activeWrite
 		}
 		result = append(result, table)
 	}
@@ -281,26 +333,26 @@ func periodicTables(
 }
 
 // partitionTables works out tables that need to be created vs tables that need to be updated
-func (m *TableManager) partitionTables(ctx context.Context, descriptions []tableDescription) ([]tableDescription, []tableDescription, error) {
+func (m *TableManager) partitionTables(ctx context.Context, descriptions []TableDesc) ([]TableDesc, []TableDesc, error) {
 	existingTables, err := m.client.ListTables(ctx)
 	if err != nil {
 		return nil, nil, err
 	}
 	sort.Strings(existingTables)
 
-	toCreate, toCheckThroughput := []tableDescription{}, []tableDescription{}
+	toCreate, toCheck := []TableDesc{}, []TableDesc{}
 	i, j := 0, 0
 	for i < len(descriptions) && j < len(existingTables) {
-		if descriptions[i].name < existingTables[j] {
+		if descriptions[i].Name < existingTables[j] {
 			// Table descriptions[i] doesn't exist
 			toCreate = append(toCreate, descriptions[i])
 			i++
-		} else if descriptions[i].name > existingTables[j] {
+		} else if descriptions[i].Name > existingTables[j] {
 			// existingTables[j].name isn't in descriptions, can ignore
 			j++
 		} else {
 			// Table exists, need to check it has correct throughput
-			toCheckThroughput = append(toCheckThroughput, descriptions[i])
+			toCheck = append(toCheck, descriptions[i])
 			i++
 			j++
 		}
@@ -309,13 +361,13 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []table
 		toCreate = append(toCreate, descriptions[i])
 	}
 
-	return toCreate, toCheckThroughput, nil
+	return toCreate, toCheck, nil
 }
 
-func (m *TableManager) createTables(ctx context.Context, descriptions []tableDescription) error {
+func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, desc := range descriptions {
-		log.Infof("Creating table %s", desc.name)
-		err := m.client.CreateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		log.Infof("Creating table %s", desc.Name)
+		err := m.client.CreateTable(ctx, desc)
 		if err != nil {
 			return err
 		}
@@ -323,129 +375,32 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []tableDes
 	return nil
 }
 
-func (m *TableManager) updateTables(ctx context.Context, descriptions []tableDescription) error {
+func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, desc := range descriptions {
-		log.Infof("Checking provisioned throughput on table %s", desc.name)
-		readCapacity, writeCapacity, status, err := m.client.DescribeTable(ctx, desc.name)
+		log.Infof("Checking provisioned throughput on table %s", desc.Name)
+		current, status, err := m.client.DescribeTable(ctx, desc.Name)
 		if err != nil {
 			return err
 		}
 
 		if status != dynamodb.TableStatusActive {
-			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", desc.name, status)
+			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", desc.Name, status)
 			continue
 		}
 
-		tableCapacity.WithLabelValues(readLabel, desc.name).Set(float64(readCapacity))
-		tableCapacity.WithLabelValues(writeLabel, desc.name).Set(float64(writeCapacity))
+		tableCapacity.WithLabelValues(readLabel, desc.Name).Set(float64(current.ProvisionedRead))
+		tableCapacity.WithLabelValues(writeLabel, desc.Name).Set(float64(current.ProvisionedWrite))
 
-		if readCapacity == desc.provisionedRead && writeCapacity == desc.provisionedWrite {
-			log.Infof("  Provisioned throughput: read = %d, write = %d, skipping.", readCapacity, writeCapacity)
+		if desc.Equals(current) {
+			log.Infof("  Provisioned throughput: read = %d, write = %d, skipping.", current.ProvisionedRead, current.ProvisionedWrite)
 			continue
 		}
 
-		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", desc.name, desc.provisionedRead, desc.provisionedWrite)
-		err = m.client.UpdateTable(ctx, desc.name, desc.provisionedRead, desc.provisionedWrite)
+		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", desc.Name, desc.ProvisionedRead, desc.ProvisionedWrite)
+		err = m.client.UpdateTable(ctx, desc)
 		if err != nil {
 			return err
 		}
 	}
 	return nil
 }
-
-// TableClient is a client for telling Dynamo what to do with tables.
-type TableClient interface {
-	ListTables(ctx context.Context) ([]string, error)
-	CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
-	DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error)
-	UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error
-}
-
-type dynamoTableClient struct {
-	DynamoDB dynamodbiface.DynamoDBAPI
-}
-
-// NewDynamoDBTableClient makes a new DynamoTableClient.
-func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
-	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
-	if err != nil {
-		return nil, err
-	}
-	return dynamoTableClient{
-		DynamoDB: dynamoDB,
-	}, nil
-}
-
-func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
-	table := []string{}
-	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(_ context.Context) error {
-		return d.DynamoDB.ListTablesPages(&dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
-			for _, s := range resp.TableNames {
-				table = append(table, *s)
-			}
-			return true
-		})
-	})
-	return table, err
-}
-
-func (d dynamoTableClient) CreateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(_ context.Context) error {
-		input := &dynamodb.CreateTableInput{
-			TableName: aws.String(name),
-			AttributeDefinitions: []*dynamodb.AttributeDefinition{
-				{
-					AttributeName: aws.String(hashKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
-				},
-				{
-					AttributeName: aws.String(rangeKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
-				},
-			},
-			KeySchema: []*dynamodb.KeySchemaElement{
-				{
-					AttributeName: aws.String(hashKey),
-					KeyType:       aws.String(dynamodb.KeyTypeHash),
-				},
-				{
-					AttributeName: aws.String(rangeKey),
-					KeyType:       aws.String(dynamodb.KeyTypeRange),
-				},
-			},
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(readCapacity),
-				WriteCapacityUnits: aws.Int64(writeCapacity),
-			},
-		}
-		_, err := d.DynamoDB.CreateTable(input)
-		return err
-	})
-}
-
-func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (readCapacity, writeCapacity int64, status string, err error) {
-	var out *dynamodb.DescribeTableOutput
-	instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(_ context.Context) error {
-		out, err = d.DynamoDB.DescribeTable(&dynamodb.DescribeTableInput{
-			TableName: aws.String(name),
-		})
-		readCapacity = *out.Table.ProvisionedThroughput.ReadCapacityUnits
-		writeCapacity = *out.Table.ProvisionedThroughput.WriteCapacityUnits
-		status = *out.Table.TableStatus
-		return err
-	})
-	return
-}
-
-func (d dynamoTableClient) UpdateTable(ctx context.Context, name string, readCapacity, writeCapacity int64) error {
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(_ context.Context) error {
-		_, err := d.DynamoDB.UpdateTable(&dynamodb.UpdateTableInput{
-			TableName: aws.String(name),
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(readCapacity),
-				WriteCapacityUnits: aws.Int64(writeCapacity),
-			},
-		})
-		return err
-	})
-}
diff --git a/table_manager_test.go b/table_manager_test.go
index 0b80724e1566e..ce4de332f69c6 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -25,7 +25,10 @@ const (
 )
 
 func TestTableManager(t *testing.T) {
-	dynamoDB := NewMockStorage()
+	dynamoDB := newMockDynamoDB(0, 0)
+	client := dynamoTableClient{
+		DynamoDB: dynamoDB,
+	}
 
 	cfg := TableManagerConfig{
 		PeriodicTableConfig: PeriodicTableConfig{
@@ -52,19 +55,19 @@ func TestTableManager(t *testing.T) {
 		ChunkTableInactiveWriteThroughput:    inactiveWrite,
 		ChunkTableInactiveReadThroughput:     inactiveRead,
 	}
-	tableManager, err := NewTableManager(cfg, dynamoDB)
+	tableManager, err := NewTableManager(cfg, client)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	test := func(name string, tm time.Time, expected []tableDescription) {
+	test := func(name string, tm time.Time, expected []TableDesc) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
 			if err := tableManager.syncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
-			expectTables(ctx, t, dynamoDB, expected)
+			expectTables(ctx, t, client, expected)
 		})
 	}
 
@@ -72,10 +75,10 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Initial test",
 		time.Unix(0, 0),
-		[]tableDescription{
-			{name: "", provisionedRead: read, provisionedWrite: write},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -83,10 +86,10 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Nothing changed",
 		time.Unix(0, 0),
-		[]tableDescription{
-			{name: "", provisionedRead: read, provisionedWrite: write},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -94,10 +97,10 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Move forward by grace period",
 		time.Unix(0, 0).Add(gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: read, provisionedWrite: write},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -105,10 +108,10 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Move forward by max chunk age + grace period",
 		time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -116,12 +119,12 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Move forward by table period - grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(-gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -129,12 +132,12 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Move forward by table period + grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -142,12 +145,12 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Move forward by table period + max chunk age + grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -155,17 +158,74 @@ func TestTableManager(t *testing.T) {
 	test(
 		"Nothing changed",
 		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-		[]tableDescription{
-			{name: "", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: tablePrefix + "1", provisionedRead: read, provisionedWrite: write},
-			{name: chunkTablePrefix + "0", provisionedRead: inactiveRead, provisionedWrite: inactiveWrite},
-			{name: chunkTablePrefix + "1", provisionedRead: read, provisionedWrite: write},
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 }
 
-func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []tableDescription) {
+func TestTableManagerTags(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	client := dynamoTableClient{
+		DynamoDB: dynamoDB,
+	}
+
+	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.syncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			expectTables(ctx, t, client, expected)
+		})
+	}
+
+	// Check at time zero, we have the base table with no tags.
+	{
+		tableManager, err := NewTableManager(TableManagerConfig{}, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Initial test",
+			time.Unix(0, 0),
+			[]TableDesc{
+				{Name: ""},
+			},
+		)
+	}
+
+	// Check after restarting table manager we get some tags.
+	{
+		cfg := TableManagerConfig{
+			TableTags: Tags{
+				"foo": "bar",
+			},
+		}
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Tagged test",
+			time.Unix(0, 0),
+			[]TableDesc{
+				{Name: "", Tags: Tags{"foo": "bar"}},
+			},
+		)
+	}
+}
+
+func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []TableDesc) {
 	tables, err := dynamo.ListTables(ctx)
 	if err != nil {
 		t.Fatal(err)
@@ -178,22 +238,18 @@ func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expecte
 	sort.Strings(tables)
 	sort.Sort(byName(expected))
 
-	for i, desc := range expected {
-		if tables[i] != desc.name {
-			t.Fatalf("Expected '%s', found '%s'", desc.name, tables[i])
+	for i, expect := range expected {
+		if tables[i] != expect.Name {
+			t.Fatalf("Expected '%s', found '%s'", expect.Name, tables[i])
 		}
 
-		read, write, _, err := dynamo.DescribeTable(ctx, desc.name)
+		desc, _, err := dynamo.DescribeTable(ctx, expect.Name)
 		if err != nil {
 			t.Fatal(err)
 		}
 
-		if read != desc.provisionedRead {
-			t.Fatalf("Expected '%d', found '%d' for table '%s'", desc.provisionedRead, read, desc.name)
-		}
-
-		if write != desc.provisionedWrite {
-			t.Fatalf("Expected '%d', found '%d' for table '%s'", desc.provisionedWrite, write, desc.name)
+		if !desc.Equals(expect) {
+			t.Fatalf("Expected '%v', found '%v' for table '%s'", expect, desc, desc.Name)
 		}
 	}
 }

From 27c1351680047d841027677a250919a54271e295 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 23 May 2017 14:51:53 +0100
Subject: [PATCH 006/660] Backoff and retry throttled requests in the table
 manager. (#437)

---
 aws_storage_client.go      |   8 +-
 aws_storage_client_test.go |   4 +-
 dynamodb_table_client.go   | 189 ++++++++++++++++++++++---------------
 3 files changed, 118 insertions(+), 83 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 4798f088740f4..b2587181a2995 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -39,8 +39,6 @@ const (
 	errorReasonLabel = "error"
 	otherError       = "other"
 
-	provisionedThroughputExceededException = "ProvisionedThroughputExceededException"
-
 	// Backoff for dynamoDB requests, to match AWS lib - see:
 	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
 	minBackoff = 50 * time.Millisecond
@@ -197,7 +195,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 
 		// If we get provisionedThroughputExceededException, then no items were processed,
 		// so back off and retry all.
-		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 			unprocessed.TakeReqs(reqs, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
@@ -275,7 +273,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		if err != nil {
 			recordDynamoError(*input.TableName, err)
 
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
@@ -471,7 +469,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == provisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 				unprocessed.TakeReqs(requests, -1)
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index b25af609bd7f2..c4e5eccc75476 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -74,7 +74,7 @@ func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dyn
 
 	if m.provisionedErr > 0 {
 		m.provisionedErr--
-		return resp, awserr.New(provisionedThroughputExceededException, "", nil)
+		return resp, awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil)
 	}
 
 	for tableName, writeRequests := range input.RequestItems {
@@ -124,7 +124,7 @@ func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynam
 
 	if m.provisionedErr > 0 {
 		m.provisionedErr--
-		return resp, awserr.New(provisionedThroughputExceededException, "", nil)
+		return resp, awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil)
 	}
 
 	for tableName, readRequests := range input.RequestItems {
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 6f2891364c3bf..302ef841b5d9a 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -1,9 +1,14 @@
 package chunk
 
 import (
+	"fmt"
+	"time"
+
 	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
@@ -24,14 +29,34 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
 	}, nil
 }
 
+func backoffAndRetry(fn func() error) error {
+	backoff, numRetries := minBackoff, 10
+	for i := 0; i < numRetries; i++ {
+		if err := fn(); err != nil {
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
+				log.Errorf("Got error %v on try %d, backing off and retrying.", err, i)
+				time.Sleep(backoff)
+				backoff = nextBackoff(backoff)
+				continue
+			} else {
+				return err
+			}
+		}
+		return nil
+	}
+	return fmt.Errorf("retried %d times, failing", numRetries)
+}
+
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	table := []string{}
-	err := instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(ctx context.Context) error {
-		return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
-			for _, s := range resp.TableNames {
-				table = append(table, *s)
-			}
-			return true
+	err := backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(ctx context.Context) error {
+			return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
+				for _, s := range resp.TableNames {
+					table = append(table, *s)
+				}
+				return true
+			})
 		})
 	})
 	return table, err
@@ -39,107 +64,119 @@ func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 
 func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) error {
 	var tableARN *string
-	if err := instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
-		input := &dynamodb.CreateTableInput{
-			TableName: aws.String(desc.Name),
-			AttributeDefinitions: []*dynamodb.AttributeDefinition{
-				{
-					AttributeName: aws.String(hashKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
+	if err := backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
+			input := &dynamodb.CreateTableInput{
+				TableName: aws.String(desc.Name),
+				AttributeDefinitions: []*dynamodb.AttributeDefinition{
+					{
+						AttributeName: aws.String(hashKey),
+						AttributeType: aws.String(dynamodb.ScalarAttributeTypeS),
+					},
+					{
+						AttributeName: aws.String(rangeKey),
+						AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
+					},
 				},
-				{
-					AttributeName: aws.String(rangeKey),
-					AttributeType: aws.String(dynamodb.ScalarAttributeTypeB),
+				KeySchema: []*dynamodb.KeySchemaElement{
+					{
+						AttributeName: aws.String(hashKey),
+						KeyType:       aws.String(dynamodb.KeyTypeHash),
+					},
+					{
+						AttributeName: aws.String(rangeKey),
+						KeyType:       aws.String(dynamodb.KeyTypeRange),
+					},
 				},
-			},
-			KeySchema: []*dynamodb.KeySchemaElement{
-				{
-					AttributeName: aws.String(hashKey),
-					KeyType:       aws.String(dynamodb.KeyTypeHash),
+				ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+					ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
+					WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
 				},
-				{
-					AttributeName: aws.String(rangeKey),
-					KeyType:       aws.String(dynamodb.KeyTypeRange),
-				},
-			},
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
-				WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
-			},
-		}
-		output, err := d.DynamoDB.CreateTableWithContext(ctx, input)
-		if err != nil {
-			return err
-		}
-		tableARN = output.TableDescription.TableArn
-		return nil
+			}
+			output, err := d.DynamoDB.CreateTableWithContext(ctx, input)
+			if err != nil {
+				return err
+			}
+			tableARN = output.TableDescription.TableArn
+			return nil
+		})
 	}); err != nil {
 		return err
 	}
 
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
-		_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
-			ResourceArn: tableARN,
-			Tags:        desc.Tags.AWSTags(),
+	return backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+				ResourceArn: tableARN,
+				Tags:        desc.Tags.AWSTags(),
+			})
+			return err
 		})
-		return err
 	})
 }
 
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error) {
 	var tableARN *string
-	err = instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
-		out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
-			TableName: aws.String(name),
+	err = backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
+			out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
+				TableName: aws.String(name),
+			})
+			desc.Name = name
+			desc.ProvisionedRead = *out.Table.ProvisionedThroughput.ReadCapacityUnits
+			desc.ProvisionedWrite = *out.Table.ProvisionedThroughput.WriteCapacityUnits
+			status = *out.Table.TableStatus
+			tableARN = out.Table.TableArn
+			return err
 		})
-		desc.Name = name
-		desc.ProvisionedRead = *out.Table.ProvisionedThroughput.ReadCapacityUnits
-		desc.ProvisionedWrite = *out.Table.ProvisionedThroughput.WriteCapacityUnits
-		status = *out.Table.TableStatus
-		tableARN = out.Table.TableArn
-		return err
 	})
 	if err != nil {
 		return
 	}
 
-	err = instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, func(ctx context.Context) error {
-		out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
-			ResourceArn: tableARN,
+	err = backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, func(ctx context.Context) error {
+			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
+				ResourceArn: tableARN,
+			})
+			desc.Tags = make(map[string]string, len(out.Tags))
+			for _, tag := range out.Tags {
+				desc.Tags[*tag.Key] = *tag.Value
+			}
+			return err
 		})
-		desc.Tags = make(map[string]string, len(out.Tags))
-		for _, tag := range out.Tags {
-			desc.Tags[*tag.Key] = *tag.Value
-		}
-		return err
 	})
 	return
 }
 
 func (d dynamoTableClient) UpdateTable(ctx context.Context, desc TableDesc) error {
 	var tableARN *string
-	if err := instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
-		out, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
-			TableName: aws.String(desc.Name),
-			ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-				ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
-				WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
-			},
+	if err := backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
+			out, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
+				TableName: aws.String(desc.Name),
+				ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+					ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
+					WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
+				},
+			})
+			if err != nil {
+				return err
+			}
+			tableARN = out.TableDescription.TableArn
+			return nil
 		})
-		if err != nil {
-			return err
-		}
-		tableARN = out.TableDescription.TableArn
-		return nil
 	}); err != nil {
 		return err
 	}
 
-	return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
-		_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
-			ResourceArn: tableARN,
-			Tags:        desc.Tags.AWSTags(),
+	return backoffAndRetry(func() error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+				ResourceArn: tableARN,
+				Tags:        desc.Tags.AWSTags(),
+			})
+			return err
 		})
-		return err
 	})
 }

From 83c478cc819d0c63b58ddfcf92698dbe7e51cca8 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 23 May 2017 17:02:25 +0100
Subject: [PATCH 007/660] Rate limit DynamoDB API requests in the table
 manager. (#439)

---
 aws_storage_client.go      |  2 ++
 aws_storage_client_test.go |  4 +++-
 dynamodb_table_client.go   | 25 ++++++++++++++++---------
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index b2587181a2995..31819bd31d7ca 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -88,12 +88,14 @@ func init() {
 // DynamoDBConfig specifies config for a DynamoDB database.
 type DynamoDBConfig struct {
 	DynamoDB util.URLValue
+	APILimit float64
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
+	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
 }
 
 // AWSStorageConfig specifies config for storing data on AWS.
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index c4e5eccc75476..433b1010439a7 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -457,7 +457,9 @@ func TestAWSStorageClientChunks(t *testing.T) {
 			TableManagerConfig{
 				PeriodicChunkTableConfig: periodicChunkTableConfig,
 			},
-			&dynamoTableClient{dynamoDB},
+			&dynamoTableClient{
+				DynamoDB: dynamoDB,
+			},
 		)
 		require.NoError(t, err)
 		err = tableManager.syncTables(context.Background())
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 302ef841b5d9a..c1bb52b48152b 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -10,12 +10,14 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
+	"golang.org/x/time/rate"
 
 	"github.com/weaveworks/common/instrument"
 )
 
 type dynamoTableClient struct {
 	DynamoDB dynamodbiface.DynamoDBAPI
+	limiter  *rate.Limiter
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
@@ -26,13 +28,18 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
 	}
 	return dynamoTableClient{
 		DynamoDB: dynamoDB,
+		limiter:  rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
 	}, nil
 }
 
-func backoffAndRetry(fn func() error) error {
+func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
+	if d.limiter != nil { // Tests will have a nil limiter.
+		d.limiter.Wait(ctx)
+	}
+
 	backoff, numRetries := minBackoff, 10
 	for i := 0; i < numRetries; i++ {
-		if err := fn(); err != nil {
+		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
 				log.Errorf("Got error %v on try %d, backing off and retrying.", err, i)
 				time.Sleep(backoff)
@@ -49,7 +56,7 @@ func backoffAndRetry(fn func() error) error {
 
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	table := []string{}
-	err := backoffAndRetry(func() error {
+	err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(ctx context.Context) error {
 			return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
 				for _, s := range resp.TableNames {
@@ -64,7 +71,7 @@ func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 
 func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) error {
 	var tableARN *string
-	if err := backoffAndRetry(func() error {
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
 			input := &dynamodb.CreateTableInput{
 				TableName: aws.String(desc.Name),
@@ -104,7 +111,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) erro
 		return err
 	}
 
-	return backoffAndRetry(func() error {
+	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
 			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 				ResourceArn: tableARN,
@@ -117,7 +124,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) erro
 
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error) {
 	var tableARN *string
-	err = backoffAndRetry(func() error {
+	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
 			out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 				TableName: aws.String(name),
@@ -134,7 +141,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 		return
 	}
 
-	err = backoffAndRetry(func() error {
+	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, func(ctx context.Context) error {
 			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
 				ResourceArn: tableARN,
@@ -151,7 +158,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 
 func (d dynamoTableClient) UpdateTable(ctx context.Context, desc TableDesc) error {
 	var tableARN *string
-	if err := backoffAndRetry(func() error {
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
 			out, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
 				TableName: aws.String(desc.Name),
@@ -170,7 +177,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, desc TableDesc) erro
 		return err
 	}
 
-	return backoffAndRetry(func() error {
+	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
 			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 				ResourceArn: tableARN,

From 606e349fdba12af5696ffa1a7f7f667c146bce44 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 24 May 2017 12:18:26 +0100
Subject: [PATCH 008/660] Test tags can be set correctly. (#440)

---
 table_manager.go      | 2 +-
 table_manager_test.go | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 41b1c1971f434..9c62850a3ce14 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -81,7 +81,7 @@ func (ts Tags) String() string {
 
 // Set implements flag.Value
 func (ts *Tags) Set(s string) error {
-	if ts == nil {
+	if *ts == nil {
 		*ts = map[string]string{}
 	}
 
diff --git a/table_manager_test.go b/table_manager_test.go
index ce4de332f69c6..42196161e9da5 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -204,11 +204,8 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check after restarting table manager we get some tags.
 	{
-		cfg := TableManagerConfig{
-			TableTags: Tags{
-				"foo": "bar",
-			},
-		}
+		cfg := TableManagerConfig{}
+		cfg.TableTags.Set("foo=bar")
 		tableManager, err := NewTableManager(cfg, client)
 		if err != nil {
 			t.Fatal(err)

From dec79626a0274b66ec91d32e5d15eda413ed53b4 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 24 May 2017 14:45:17 +0100
Subject: [PATCH 009/660] Only change provisioned capacity and tags if they
 have changed. (#441)

---
 dynamodb_table_client.go   | 65 ++++++++++++++++++++++++--------------
 inmemory_storage_client.go |  2 +-
 table_client.go            |  2 +-
 table_manager.go           | 18 +++++------
 4 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index c1bb52b48152b..a7a378297569a 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -156,34 +156,51 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 	return
 }
 
-func (d dynamoTableClient) UpdateTable(ctx context.Context, desc TableDesc) error {
-	var tableARN *string
-	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
-			out, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
-				TableName: aws.String(desc.Name),
-				ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
-					ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
-					WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
-				},
-			})
-			if err != nil {
+func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected TableDesc) error {
+
+	if current.ProvisionedRead != expected.ProvisionedRead || current.ProvisionedWrite != expected.ProvisionedWrite {
+		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
+				_, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
+					TableName: aws.String(expected.Name),
+					ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+						ReadCapacityUnits:  aws.Int64(expected.ProvisionedRead),
+						WriteCapacityUnits: aws.Int64(expected.ProvisionedWrite),
+					},
+				})
 				return err
-			}
-			tableARN = out.TableDescription.TableArn
-			return nil
-		})
-	}); err != nil {
-		return err
+			})
+		}); err != nil {
+			return err
+		}
 	}
 
-	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
-			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
-				ResourceArn: tableARN,
-				Tags:        desc.Tags.AWSTags(),
+	if !current.Tags.Equals(expected.Tags) {
+		var tableARN *string
+		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
+				out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
+					TableName: aws.String(expected.Name),
+				})
+				if err != nil {
+					return err
+				}
+				tableARN = out.Table.TableArn
+				return nil
 			})
+		}); err != nil {
 			return err
+		}
+
+		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+					ResourceArn: tableARN,
+					Tags:        expected.Tags.AWSTags(),
+				})
+				return err
+			})
 		})
-	})
+	}
+	return nil
 }
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index ccc1256e80b88..ba8c8f2b7feeb 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -86,7 +86,7 @@ func (m *MockStorage) DescribeTable(_ context.Context, name string) (desc TableD
 }
 
 // UpdateTable implements StorageClient.
-func (m *MockStorage) UpdateTable(_ context.Context, desc TableDesc) error {
+func (m *MockStorage) UpdateTable(_ context.Context, _, desc TableDesc) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
diff --git a/table_client.go b/table_client.go
index 8755bd5759278..1fafd536d93fd 100644
--- a/table_client.go
+++ b/table_client.go
@@ -7,7 +7,7 @@ type TableClient interface {
 	ListTables(ctx context.Context) ([]string, error)
 	CreateTable(ctx context.Context, desc TableDesc) error
 	DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error)
-	UpdateTable(ctx context.Context, desc TableDesc) error
+	UpdateTable(ctx context.Context, current, expected TableDesc) error
 }
 
 // TableDesc describes a table.
diff --git a/table_manager.go b/table_manager.go
index 9c62850a3ce14..c9d722b135c75 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -376,28 +376,28 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 }
 
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
-	for _, desc := range descriptions {
-		log.Infof("Checking provisioned throughput on table %s", desc.Name)
-		current, status, err := m.client.DescribeTable(ctx, desc.Name)
+	for _, expected := range descriptions {
+		log.Infof("Checking provisioned throughput on table %s", expected.Name)
+		current, status, err := m.client.DescribeTable(ctx, expected.Name)
 		if err != nil {
 			return err
 		}
 
 		if status != dynamodb.TableStatusActive {
-			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", desc.Name, status)
+			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", expected.Name, status)
 			continue
 		}
 
-		tableCapacity.WithLabelValues(readLabel, desc.Name).Set(float64(current.ProvisionedRead))
-		tableCapacity.WithLabelValues(writeLabel, desc.Name).Set(float64(current.ProvisionedWrite))
+		tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead))
+		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
 
-		if desc.Equals(current) {
+		if expected.Equals(current) {
 			log.Infof("  Provisioned throughput: read = %d, write = %d, skipping.", current.ProvisionedRead, current.ProvisionedWrite)
 			continue
 		}
 
-		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", desc.Name, desc.ProvisionedRead, desc.ProvisionedWrite)
-		err = m.client.UpdateTable(ctx, desc)
+		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", expected.Name, expected.ProvisionedRead, expected.ProvisionedWrite)
+		err = m.client.UpdateTable(ctx, current, expected)
 		if err != nil {
 			return err
 		}

From 3ea444dd2ebaf645ee240dfd9c08f8418fab0259 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Fri, 26 May 2017 14:22:05 +0100
Subject: [PATCH 010/660] Only set DynamoDB table tags if they exist (#445)

* Only set DynamoDB table tags if they exist

* Update mock TagResourceWithContext to fail if no tags
---
 aws_storage_client_test.go |  4 ++++
 dynamodb_table_client.go   | 18 +++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 433b1010439a7..cac44cc236d29 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -328,6 +328,10 @@ func (m *mockDynamoDBClient) TagResourceWithContext(_ aws.Context, input *dynamo
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	if len(input.Tags) == 0 {
+		return nil, fmt.Errorf("tags are required")
+	}
+
 	if !strings.HasPrefix(*input.ResourceArn, arnPrefix) {
 		return nil, fmt.Errorf("not an arn: %v", *input.ResourceArn)
 	}
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index a7a378297569a..e86bef7baefff 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -111,15 +111,19 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) erro
 		return err
 	}
 
-	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
-			_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
-				ResourceArn: tableARN,
-				Tags:        desc.Tags.AWSTags(),
+	tags := desc.Tags.AWSTags()
+	if len(tags) > 0 {
+		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
+					ResourceArn: tableARN,
+					Tags:        tags,
+				})
+				return err
 			})
-			return err
 		})
-	})
+	}
+	return nil
 }
 
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error) {

From 2c963248ae61be1079de929045c2fac877ac02bb Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 30 May 2017 16:16:18 +0100
Subject: [PATCH 011/660] Update weaveworks/common to get new httpgrp error
 handling. (#447)

* Update weaveworks/common to get new httpgrp error handling.

* s/user.{Inject,Extract}(FromHTTPRequest)/user.{Inject,Extract}UserID(FromHTTPRequest)/
---
 chunk_store.go      | 8 ++++----
 chunk_store_test.go | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index a84ee193bfb82..50c74eff2ae20 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -92,7 +92,7 @@ func (c *Store) Stop() {
 
 // Put implements ChunkStore
 func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
-	userID, err := user.Extract(ctx)
+	userID, err := user.ExtractUserID(ctx)
 	if err != nil {
 		return err
 	}
@@ -218,7 +218,7 @@ func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.
 		return c.lookupChunksByMetricName(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 
-	userID, err := user.Extract(ctx)
+	userID, err := user.ExtractUserID(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -275,7 +275,7 @@ func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
-	userID, err := user.Extract(ctx)
+	userID, err := user.ExtractUserID(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -397,7 +397,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 }
 
 func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *metric.LabelMatcher) (ByKey, error) {
-	userID, err := user.Extract(ctx)
+	userID, err := user.ExtractUserID(ctx)
 	if err != nil {
 		return nil, err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index d3a5a882e5995..4dcf1d181370c 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -32,7 +32,7 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 }
 
 func TestChunkStore(t *testing.T) {
-	ctx := user.Inject(context.Background(), userID)
+	ctx := user.InjectUserID(context.Background(), userID)
 	now := model.Now()
 	chunk1 := dummyChunkFor(model.Metric{
 		model.MetricNameLabel: "foo",
@@ -135,7 +135,7 @@ func TestChunkStore(t *testing.T) {
 
 // TestChunkStoreMetricNames tests no metric name queries supported from v7Schema
 func TestChunkStoreMetricNames(t *testing.T) {
-	ctx := user.Inject(context.Background(), userID)
+	ctx := user.InjectUserID(context.Background(), userID)
 	now := model.Now()
 
 	foo1Chunk1 := dummyChunkFor(model.Metric{
@@ -269,7 +269,7 @@ func mustNewLabelMatcher(matchType metric.MatchType, name model.LabelName, value
 }
 
 func TestChunkStoreRandom(t *testing.T) {
-	ctx := user.Inject(context.Background(), userID)
+	ctx := user.InjectUserID(context.Background(), userID)
 	schemas := []struct {
 		name  string
 		fn    func(cfg SchemaConfig) Schema
@@ -350,7 +350,7 @@ func TestChunkStoreRandom(t *testing.T) {
 
 func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
-	ctx := user.Inject(context.Background(), userID)
+	ctx := user.InjectUserID(context.Background(), userID)
 	store := newTestChunkStore(t, StoreConfig{
 		schemaFactory: v6Schema,
 	})

From 2012bdb80017eea20ad70ee17167d803bcd5f1ca Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 30 May 2017 17:22:04 +0100
Subject: [PATCH 012/660] {Extract,Inject}OrgID, not UserID. (#450)

---
 chunk_store.go      | 8 ++++----
 chunk_store_test.go | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 50c74eff2ae20..5587eaa08c99f 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -92,7 +92,7 @@ func (c *Store) Stop() {
 
 // Put implements ChunkStore
 func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
-	userID, err := user.ExtractUserID(ctx)
+	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return err
 	}
@@ -218,7 +218,7 @@ func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.
 		return c.lookupChunksByMetricName(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 
-	userID, err := user.ExtractUserID(ctx)
+	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -275,7 +275,7 @@ func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
-	userID, err := user.ExtractUserID(ctx)
+	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
 	}
@@ -397,7 +397,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 }
 
 func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *metric.LabelMatcher) (ByKey, error) {
-	userID, err := user.ExtractUserID(ctx)
+	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 4dcf1d181370c..de054b6ee40e5 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -32,7 +32,7 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 }
 
 func TestChunkStore(t *testing.T) {
-	ctx := user.InjectUserID(context.Background(), userID)
+	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 	chunk1 := dummyChunkFor(model.Metric{
 		model.MetricNameLabel: "foo",
@@ -135,7 +135,7 @@ func TestChunkStore(t *testing.T) {
 
 // TestChunkStoreMetricNames tests no metric name queries supported from v7Schema
 func TestChunkStoreMetricNames(t *testing.T) {
-	ctx := user.InjectUserID(context.Background(), userID)
+	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 
 	foo1Chunk1 := dummyChunkFor(model.Metric{
@@ -269,7 +269,7 @@ func mustNewLabelMatcher(matchType metric.MatchType, name model.LabelName, value
 }
 
 func TestChunkStoreRandom(t *testing.T) {
-	ctx := user.InjectUserID(context.Background(), userID)
+	ctx := user.InjectOrgID(context.Background(), userID)
 	schemas := []struct {
 		name  string
 		fn    func(cfg SchemaConfig) Schema
@@ -350,7 +350,7 @@ func TestChunkStoreRandom(t *testing.T) {
 
 func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
-	ctx := user.InjectUserID(context.Background(), userID)
+	ctx := user.InjectOrgID(context.Background(), userID)
 	store := newTestChunkStore(t, StoreConfig{
 		schemaFactory: v6Schema,
 	})

From 95b30c74f57066b3f414c5e3b1006ded4478e923 Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Thu, 1 Jun 2017 15:07:59 +0100
Subject: [PATCH 013/660] Log instance ID (#452)

* Helper for logging user info

* Log user ID when we have it

* Switch configs to Prometheus logger

Also use utilities if we have them
---
 chunk_cache.go             |  4 +++-
 chunk_store.go             | 10 +++++-----
 dynamodb_table_client.go   |  4 ++--
 inmemory_storage_client.go | 21 +++++++++++----------
 4 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/chunk_cache.go b/chunk_cache.go
index 044535f2aaabc..9cf20ea97b0f3 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -10,6 +10,8 @@ import (
 	"github.com/prometheus/common/log"
 	"github.com/weaveworks/common/instrument"
 	"golang.org/x/net/context"
+
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
@@ -161,7 +163,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 
 		if err := chunks[i].decode(item.Value); err != nil {
 			memcacheCorrupt.Inc()
-			log.Errorf("Failed to decode chunk from cache: %v", err)
+			util.WithContext(ctx).Errorf("Failed to decode chunk from cache: %v", err)
 			missing = append(missing, chunks[i])
 			continue
 		}
diff --git a/chunk_store.go b/chunk_store.go
index 5587eaa08c99f..edd3780a22412 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -7,7 +7,6 @@ import (
 	"sort"
 
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/storage/metric"
@@ -152,6 +151,7 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 
 // Get implements ChunkStore
 func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*metric.LabelMatcher) ([]Chunk, error) {
+	logger := util.WithContext(ctx)
 	if through < from {
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
@@ -176,7 +176,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	// Now fetch the actual chunk data from Memcache / S3
 	fromCache, missing, err := c.cache.FetchChunkData(ctx, filtered)
 	if err != nil {
-		log.Warnf("Error fetching from cache: %v", err)
+		logger.Warnf("Error fetching from cache: %v", err)
 	}
 
 	fromS3, err := c.storage.GetChunks(ctx, missing)
@@ -185,7 +185,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	}
 
 	if err = c.writeBackCache(ctx, fromS3); err != nil {
-		log.Warnf("Could not store chunks in chunk cache: %v", err)
+		logger.Warnf("Could not store chunks in chunk cache: %v", err)
 	}
 
 	// TODO instead of doing this sort, propagate an index and assign chunks
@@ -389,7 +389,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 		}
 		return !lastPage
 	}); err != nil {
-		log.Errorf("Error querying storage: %v", err)
+		util.WithContext(ctx).Errorf("Error querying storage: %v", err)
 		return nil, err
 	}
 
@@ -425,7 +425,7 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 		}
 
 		if matcher != nil && !matcher.Match(labelValue) {
-			log.Debug("Dropping chunk for non-matching metric ", chunk.Metric)
+			util.WithContext(ctx).Debug("Dropping chunk for non-matching metric ", chunk.Metric)
 			continue
 		}
 		chunkSet = append(chunkSet, chunk)
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index e86bef7baefff..86c7fda4613e8 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -8,11 +8,11 @@ import (
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
-	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
 	"golang.org/x/time/rate"
 
 	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 type dynamoTableClient struct {
@@ -41,7 +41,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 	for i := 0; i < numRetries; i++ {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
-				log.Errorf("Got error %v on try %d, backing off and retrying.", err, i)
+				util.WithContext(ctx).Errorf("Got error %v on try %d, backing off and retrying.", err, i)
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index ba8c8f2b7feeb..2d301b5fcecbb 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -7,7 +7,7 @@ import (
 	"sync"
 
 	"github.com/aws/aws-sdk-go/service/dynamodb"
-	"github.com/prometheus/common/log"
+	"github.com/weaveworks/cortex/pkg/util"
 	"golang.org/x/net/context"
 )
 
@@ -107,7 +107,7 @@ func (m *MockStorage) NewWriteBatch() WriteBatch {
 }
 
 // BatchWrite implements StorageClient.
-func (m *MockStorage) BatchWrite(_ context.Context, batch WriteBatch) error {
+func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
@@ -127,7 +127,7 @@ func (m *MockStorage) BatchWrite(_ context.Context, batch WriteBatch) error {
 		}
 		seenWrites[key] = true
 
-		log.Debugf("Write %s/%x", req.hashValue, req.rangeValue)
+		util.WithContext(ctx).Debugf("Write %s/%x", req.hashValue, req.rangeValue)
 
 		items := table.items[req.hashValue]
 
@@ -156,7 +156,8 @@ func (m *MockStorage) BatchWrite(_ context.Context, batch WriteBatch) error {
 }
 
 // QueryPages implements StorageClient.
-func (m *MockStorage) QueryPages(_ context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	logger := util.WithContext(ctx)
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
@@ -171,7 +172,7 @@ func (m *MockStorage) QueryPages(_ context.Context, query IndexQuery, callback f
 	}
 
 	if query.RangeValuePrefix != nil {
-		log.Debugf("Lookup prefix %s/%x (%d)", query.HashValue, query.RangeValuePrefix, len(items))
+		logger.Debugf("Lookup prefix %s/%x (%d)", query.HashValue, query.RangeValuePrefix, len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
@@ -187,33 +188,33 @@ func (m *MockStorage) QueryPages(_ context.Context, query IndexQuery, callback f
 			return !bytes.HasPrefix(items[i+j].rangeValue, query.RangeValuePrefix)
 		})
 
-		log.Debugf("  found range [%d:%d)", i, i+j)
+		logger.Debugf("  found range [%d:%d)", i, i+j)
 		if i > len(items) || j == 0 {
 			return nil
 		}
 		items = items[i : i+j]
 
 	} else if query.RangeValueStart != nil {
-		log.Debugf("Lookup range %s/%x -> ... (%d)", query.HashValue, query.RangeValueStart, len(items))
+		logger.Debugf("Lookup range %s/%x -> ... (%d)", query.HashValue, query.RangeValueStart, len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
 			return bytes.Compare(items[i].rangeValue, query.RangeValueStart) >= 0
 		})
 
-		log.Debugf("  found range [%d)", i)
+		logger.Debugf("  found range [%d)", i)
 		if i > len(items) {
 			return nil
 		}
 		items = items[i:]
 
 	} else {
-		log.Debugf("Lookup %s/* (%d)", query.HashValue, len(items))
+		logger.Debugf("Lookup %s/* (%d)", query.HashValue, len(items))
 	}
 
 	// Filters
 	if query.ValueEqual != nil {
-		log.Debugf("Filter Value EQ = %s", query.ValueEqual)
+		logger.Debugf("Filter Value EQ = %s", query.ValueEqual)
 
 		filtered := make([]mockItem, 0)
 		for _, v := range items {

From 3cafdd58e8df0cf24d72d1efaff24aaaed3817a5 Mon Sep 17 00:00:00 2001
From: Marcus Cobden <marcus@marcuscobden.co.uk>
Date: Fri, 16 Jun 2017 10:51:11 +0100
Subject: [PATCH 014/660] Add more table & operation information to dynamo
 metrics (#460)

* Include table name in consumed capacity metric
* Add operation to failures metric
---
 aws_storage_client.go | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 31819bd31d7ca..fa9c11ab2daae 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -64,12 +64,12 @@ var (
 		Namespace: "cortex",
 		Name:      "dynamo_consumed_capacity_total",
 		Help:      "The capacity units consumed by operation.",
-	}, []string{"operation"})
+	}, []string{"operation", tableNameLabel})
 	dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_failures_total",
 		Help:      "The total number of errors while storing chunks to the chunk store.",
-	}, []string{tableNameLabel, errorReasonLabel})
+	}, []string{tableNameLabel, errorReasonLabel, "operation"})
 	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "s3_request_duration_seconds",
@@ -177,13 +177,13 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 			return err
 		})
 		for _, cc := range resp.ConsumedCapacity {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem").
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
 		}
 
 		if err != nil {
 			for tableName := range reqs {
-				recordDynamoError(tableName, err)
+				recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem")
 			}
 		}
 
@@ -268,12 +268,12 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		})
 
 		if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages").
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
 		}
 
 		if err != nil {
-			recordDynamoError(*input.TableName, err)
+			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 				time.Sleep(backoff)
@@ -460,13 +460,13 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		})
 
 		for _, cc := range response.ConsumedCapacity {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages").
+			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
 		}
 
 		if err != nil {
 			for tableName := range requests {
-				recordDynamoError(tableName, err)
+				recordDynamoError(tableName, err, "DynamoDB.BatchGetItemPages")
 			}
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
@@ -736,11 +736,11 @@ func nextBackoff(lastBackoff time.Duration) time.Duration {
 	return backoff
 }
 
-func recordDynamoError(tableName string, err error) {
+func recordDynamoError(tableName string, err error, operation string) {
 	if awsErr, ok := err.(awserr.Error); ok {
-		dynamoFailures.WithLabelValues(tableName, awsErr.Code()).Add(float64(1))
+		dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
 	} else {
-		dynamoFailures.WithLabelValues(tableName, otherError).Add(float64(1))
+		dynamoFailures.WithLabelValues(tableName, otherError, operation).Add(float64(1))
 	}
 }
 

From 22a6787ef10185a10c395fe19d49f70d33853416 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 4 Jul 2017 19:09:06 +0200
Subject: [PATCH 015/660] Cortex single-process (lite) mode. (#463)

* Cortex single-process (lite) mode.

* Update the bazel config

* Run table manager as part of the lite process.

Also, merge all schema-relate config into a single struct and don't embed it in any other config, so it can properly be shared.

* Add cmd/lite/lite to .gitignore.

* Lint
---
 BUILD => BUILD.bazel       |   5 ++
 aws_storage_client.go      |  18 ++---
 aws_storage_client_test.go |  18 +++--
 chunk_store.go             |   8 +--
 chunk_store_test.go        |   5 +-
 schema_config.go           | 101 ++++++++++++++++++++------
 schema_config_test.go      |   8 +--
 schema_test.go             |  16 ++---
 storage_client.go          |   4 +-
 table_manager.go           | 143 ++++---------------------------------
 table_manager_test.go      |  46 ++++++------
 11 files changed, 155 insertions(+), 217 deletions(-)
 rename BUILD => BUILD.bazel (93%)

diff --git a/BUILD b/BUILD.bazel
similarity index 93%
rename from BUILD
rename to BUILD.bazel
index 646f2a670ede6..45a65ea569622 100644
--- a/BUILD
+++ b/BUILD.bazel
@@ -8,12 +8,14 @@ go_library(
         "chunk.go",
         "chunk_cache.go",
         "chunk_store.go",
+        "dynamodb_table_client.go",
         "inmemory_storage_client.go",
         "memcache_client.go",
         "schema.go",
         "schema_config.go",
         "schema_util.go",
         "storage_client.go",
+        "table_client.go",
         "table_manager.go",
     ],
     visibility = ["//visibility:public"],
@@ -41,6 +43,7 @@ go_library(
         "//vendor/github.com/weaveworks/common/mtime:go_default_library",
         "//vendor/github.com/weaveworks/common/user:go_default_library",
         "//vendor/golang.org/x/net/context:go_default_library",
+        "//vendor/golang.org/x/time/rate:go_default_library",
     ],
 )
 
@@ -65,6 +68,8 @@ go_test(
         "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
         "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
diff --git a/aws_storage_client.go b/aws_storage_client.go
index fa9c11ab2daae..ffc7f828f52e5 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -101,22 +101,21 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 // AWSStorageConfig specifies config for storing data on AWS.
 type AWSStorageConfig struct {
 	DynamoDBConfig
-	PeriodicChunkTableConfig
-
 	S3 util.URLValue
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *AWSStorageConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.DynamoDBConfig.RegisterFlags(f)
-	cfg.PeriodicChunkTableConfig.RegisterFlags(f)
 
 	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 }
 
 type awsStorageClient struct {
-	cfg        AWSStorageConfig
+	cfg       AWSStorageConfig
+	schemaCfg SchemaConfig
+
 	DynamoDB   dynamodbiface.DynamoDBAPI
 	S3         s3iface.S3API
 	bucketName string
@@ -127,7 +126,7 @@ type awsStorageClient struct {
 }
 
 // NewAWSStorageClient makes a new AWS-backed StorageClient.
-func NewAWSStorageClient(cfg AWSStorageConfig) (StorageClient, error) {
+func NewAWSStorageClient(cfg AWSStorageConfig, schemaCfg SchemaConfig) (StorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
@@ -145,6 +144,7 @@ func NewAWSStorageClient(cfg AWSStorageConfig) (StorageClient, error) {
 
 	storageClient := awsStorageClient{
 		cfg:        cfg,
+		schemaCfg:  schemaCfg,
 		DynamoDB:   dynamoDB,
 		S3:         s3Client,
 		bucketName: bucketName,
@@ -347,7 +347,7 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 	)
 
 	for _, chunk := range chunks {
-		if !a.cfg.ChunkTableFrom.IsSet() || chunk.From.Before(a.cfg.ChunkTableFrom.Time) {
+		if !a.schemaCfg.ChunkTables.From.IsSet() || chunk.From.Before(a.schemaCfg.ChunkTables.From.Time) {
 			s3Chunks = append(s3Chunks, chunk)
 		} else {
 			dynamoDBChunks = append(dynamoDBChunks, chunk)
@@ -547,7 +547,7 @@ func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
 		}
 		key := chunks[i].externalKey()
 
-		if !a.cfg.ChunkTableFrom.IsSet() || chunks[i].From.Before(a.cfg.ChunkTableFrom.Time) {
+		if !a.schemaCfg.ChunkTables.From.IsSet() || chunks[i].From.Before(a.schemaCfg.ChunkTables.From.Time) {
 			s3ChunkKeys = append(s3ChunkKeys, key)
 			s3ChunkBufs = append(s3ChunkBufs, buf)
 		} else {
@@ -570,10 +570,10 @@ func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
 
 func (a awsStorageClient) chunkTableFor(t model.Time) string {
 	var (
-		periodSecs = int64(a.cfg.ChunkTablePeriod / time.Second)
+		periodSecs = int64(a.schemaCfg.ChunkTables.Period / time.Second)
 		table      = t.Unix() / periodSecs
 	)
-	return a.cfg.ChunkTablePrefix + strconv.Itoa(int(table))
+	return a.schemaCfg.ChunkTables.Prefix + strconv.Itoa(int(table))
 }
 
 func (a awsStorageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index cac44cc236d29..6d100e2cd4537 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -452,15 +452,15 @@ func TestAWSStorageClientChunks(t *testing.T) {
 
 	t.Run("DynamoDB chunks", func(t *testing.T) {
 		dynamoDB := newMockDynamoDB(0, 0)
-		periodicChunkTableConfig := PeriodicChunkTableConfig{
-			ChunkTableFrom:   util.NewDayValue(model.Now()),
-			ChunkTablePeriod: 1 * time.Minute,
-			ChunkTablePrefix: "chunks",
+		schemaConfig := SchemaConfig{
+			ChunkTables: periodicTableConfig{
+				From:   util.NewDayValue(model.Now()),
+				Period: 1 * time.Minute,
+				Prefix: "chunks",
+			},
 		}
 		tableManager, err := NewTableManager(
-			TableManagerConfig{
-				PeriodicChunkTableConfig: periodicChunkTableConfig,
-			},
+			schemaConfig,
 			&dynamoTableClient{
 				DynamoDB: dynamoDB,
 			},
@@ -472,9 +472,7 @@ func TestAWSStorageClientChunks(t *testing.T) {
 		client := awsStorageClient{
 			DynamoDB:       dynamoDB,
 			queryRequestFn: dynamoDB.queryRequest,
-			cfg: AWSStorageConfig{
-				PeriodicChunkTableConfig: periodicChunkTableConfig,
-			},
+			schemaCfg:      schemaConfig,
 		}
 
 		testStorageClientChunks(t, client)
diff --git a/chunk_store.go b/chunk_store.go
index edd3780a22412..9108724dbf6b4 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -41,7 +41,6 @@ func init() {
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	SchemaConfig
 	CacheConfig
 
 	// For injecting different schemas in tests.
@@ -50,7 +49,6 @@ type StoreConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.SchemaConfig.RegisterFlags(f)
 	cfg.CacheConfig.RegisterFlags(f)
 }
 
@@ -64,13 +62,13 @@ type Store struct {
 }
 
 // NewStore makes a new ChunkStore
-func NewStore(cfg StoreConfig, storage StorageClient) (*Store, error) {
+func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (*Store, error) {
 	var schema Schema
 	var err error
 	if cfg.schemaFactory == nil {
-		schema, err = newCompositeSchema(cfg.SchemaConfig)
+		schema, err = newCompositeSchema(schemaCfg)
 	} else {
-		schema = cfg.schemaFactory(cfg.SchemaConfig)
+		schema = cfg.schemaFactory(schemaCfg)
 	}
 	if err != nil {
 		return nil, err
diff --git a/chunk_store_test.go b/chunk_store_test.go
index de054b6ee40e5..4453178345fa9 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -22,11 +22,12 @@ import (
 // newTestStore creates a new Store for testing.
 func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(TableManagerConfig{}, storage)
+	schemaCfg := SchemaConfig{}
+	tableManager, err := NewTableManager(schemaCfg, storage)
 	require.NoError(t, err)
 	err = tableManager.syncTables(context.Background())
 	require.NoError(t, err)
-	store, err := NewStore(cfg, storage)
+	store, err := NewStore(cfg, schemaCfg, storage)
 	require.NoError(t, err)
 	return store
 }
diff --git a/schema_config.go b/schema_config.go
index 3c69150f60201..33c4094c45877 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/prometheus/common/model"
 
+	"github.com/weaveworks/common/mtime"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -21,46 +22,55 @@ const (
 
 // SchemaConfig contains the config for our chunk index schemas
 type SchemaConfig struct {
-	PeriodicTableConfig
-
 	// After midnight on this day, we start bucketing indexes by day instead of by
 	// hour.  Only the day matters, not the time within the day.
 	DailyBucketsFrom util.DayValue
-
-	// After this time, we will only query for base64-encoded label values.
 	Base64ValuesFrom util.DayValue
-
-	// After this time, we will read and write v4 schemas.
-	V4SchemaFrom util.DayValue
-
-	// After this time, we will read and write v5 schemas.
-	V5SchemaFrom util.DayValue
-
-	// After this time, we will read and write v6 schemas.
-	V6SchemaFrom util.DayValue
-
-	// After this time, we will read and write v7 schemas.
-	V7SchemaFrom util.DayValue
+	V4SchemaFrom     util.DayValue
+	V5SchemaFrom     util.DayValue
+	V6SchemaFrom     util.DayValue
+	V7SchemaFrom     util.DayValue
+
+	// Period with which the table manager will poll for tables.
+	DynamoDBPollInterval time.Duration
+
+	// duration a table will be created before it is needed.
+	CreationGracePeriod time.Duration
+	MaxChunkAge         time.Duration
+
+	// Config for the index & chunk tables.
+	OriginalTableName string
+	UsePeriodicTables bool
+	IndexTables       periodicTableConfig
+	ChunkTables       periodicTableConfig
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.PeriodicTableConfig.RegisterFlags(f)
-
 	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
 	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
 	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
 	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema.")
+
+	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
+	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
+	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age time before flushing.")
+
+	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
+	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
+
+	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", "cortex_", f)
+	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
 }
 
 func (cfg *SchemaConfig) tableForBucket(bucketStart int64) string {
-	if !cfg.UsePeriodicTables || bucketStart < (cfg.PeriodicTableStartAt.Unix()) {
+	if !cfg.UsePeriodicTables || bucketStart < (cfg.IndexTables.From.Unix()) {
 		return cfg.OriginalTableName
 	}
 	// TODO remove reference to time package here
-	return cfg.TablePrefix + strconv.Itoa(int(bucketStart/int64(cfg.TablePeriod/time.Second)))
+	return cfg.IndexTables.Prefix + strconv.Itoa(int(bucketStart/int64(cfg.IndexTables.Period/time.Second)))
 }
 
 // Bucket describes a range of time with a tableName and hashKey
@@ -131,6 +141,57 @@ func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []
 	return result
 }
 
+type periodicTableConfig struct {
+	From                       util.DayValue
+	Prefix                     string
+	Period                     time.Duration
+	ProvisionedWriteThroughput int64
+	ProvisionedReadThroughput  int64
+	InactiveWriteThroughput    int64
+	InactiveReadThroughput     int64
+	Tags                       Tags
+}
+
+func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
+	f.Var(&cfg.From, argPrefix+".from", "Date after which to write chunks to DynamoDB.")
+	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period chunk tables.")
+	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB chunk tables period.")
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB chunk tables write throughput.")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB chunk tables read throughput.")
+	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
+	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables.")
+	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
+}
+
+func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {
+	var (
+		periodSecs     = int64(cfg.Period / time.Second)
+		beginGraceSecs = int64(beginGrace / time.Second)
+		endGraceSecs   = int64(endGrace / time.Second)
+		firstTable     = cfg.From.Unix() / periodSecs
+		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
+		now            = mtime.Now().Unix()
+		result         = []TableDesc{}
+	)
+	for i := firstTable; i <= lastTable; i++ {
+		table := TableDesc{
+			// Name construction needs to be consistent with chunk_store.bigBuckets
+			Name:             cfg.Prefix + strconv.Itoa(int(i)),
+			ProvisionedRead:  cfg.InactiveReadThroughput,
+			ProvisionedWrite: cfg.InactiveWriteThroughput,
+			Tags:             cfg.Tags,
+		}
+
+		// if now is within table [start - grace, end + grace), then we need some write throughput
+		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
+			table.ProvisionedRead = cfg.ProvisionedReadThroughput
+			table.ProvisionedWrite = cfg.ProvisionedWriteThroughput
+		}
+		result = append(result, table)
+	}
+	return result
+}
+
 // compositeSchema is a Schema which delegates to various schemas depending
 // on when they were activated.
 type compositeSchema struct {
diff --git a/schema_config_test.go b/schema_config_test.go
index fd3dd7989b7e6..2bf6e1d1d41ca 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -16,9 +16,7 @@ func TestHourlyBuckets(t *testing.T) {
 		tableName  = "table"
 	)
 	var cfg = SchemaConfig{
-		PeriodicTableConfig: PeriodicTableConfig{
-			OriginalTableName: tableName,
-		},
+		OriginalTableName: tableName,
 	}
 
 	type args struct {
@@ -104,9 +102,7 @@ func TestDailyBuckets(t *testing.T) {
 		tableName  = "table"
 	)
 	var cfg = SchemaConfig{
-		PeriodicTableConfig: PeriodicTableConfig{
-			OriginalTableName: tableName,
-		},
+		OriginalTableName: tableName,
 	}
 
 	type args struct {
diff --git a/schema_test.go b/schema_test.go
index 2af6663e57757..af5858ee9c1a1 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -72,12 +72,12 @@ func TestSchemaHashKeys(t *testing.T) {
 	)
 
 	cfg := SchemaConfig{
-		PeriodicTableConfig: PeriodicTableConfig{
-			OriginalTableName:    table,
-			UsePeriodicTables:    true,
-			TablePrefix:          periodicPrefix,
-			TablePeriod:          2 * 24 * time.Hour,
-			PeriodicTableStartAt: util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
+		OriginalTableName: table,
+		UsePeriodicTables: true,
+		IndexTables: periodicTableConfig{
+			Prefix: periodicPrefix,
+			Period: 2 * 24 * time.Hour,
+			From:   util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
 		},
 	}
 	compositeSchema := func(dailyBucketsFrom model.Time) Schema {
@@ -284,9 +284,7 @@ func TestSchemaRangeKey(t *testing.T) {
 
 	var (
 		cfg = SchemaConfig{
-			PeriodicTableConfig: PeriodicTableConfig{
-				OriginalTableName: table,
-			},
+			OriginalTableName: table,
 		}
 		hourlyBuckets = v1Schema(cfg)
 		dailyBuckets  = v2Schema(cfg)
diff --git a/storage_client.go b/storage_client.go
index ab767035018f0..9bc8b36a03dbc 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -48,7 +48,7 @@ func (cfg *StorageClientConfig) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewStorageClient makes a storage client based on the configuration.
-func NewStorageClient(cfg StorageClientConfig) (StorageClient, error) {
+func NewStorageClient(cfg StorageClientConfig, schemaCfg SchemaConfig) (StorageClient, error) {
 	switch cfg.StorageClient {
 	case "inmemory":
 		return NewMockStorage(), nil
@@ -57,7 +57,7 @@ func NewStorageClient(cfg StorageClientConfig) (StorageClient, error) {
 		if len(path) > 0 {
 			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
 		}
-		return NewAWSStorageClient(cfg.AWSStorageConfig)
+		return NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, inmemory", cfg.StorageClient)
 	}
diff --git a/table_manager.go b/table_manager.go
index c9d722b135c75..c1ed3933c07c6 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -1,10 +1,8 @@
 package chunk
 
 import (
-	"flag"
 	"fmt"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -13,12 +11,10 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
-	"github.com/prometheus/common/model"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
@@ -44,29 +40,6 @@ func init() {
 	prometheus.MustRegister(tableCapacity)
 }
 
-// TableManagerConfig is the config for a TableManager
-type TableManagerConfig struct {
-	DynamoDBPollInterval time.Duration
-
-	PeriodicTableConfig
-	PeriodicChunkTableConfig
-
-	// duration a table will be created before it is needed.
-	CreationGracePeriod        time.Duration
-	MaxChunkAge                time.Duration
-	ProvisionedWriteThroughput int64
-	ProvisionedReadThroughput  int64
-	InactiveWriteThroughput    int64
-	InactiveReadThroughput     int64
-
-	ChunkTableProvisionedWriteThroughput int64
-	ChunkTableProvisionedReadThroughput  int64
-	ChunkTableInactiveWriteThroughput    int64
-	ChunkTableInactiveReadThroughput     int64
-
-	TableTags Tags
-}
-
 // Tags is a string-string map that implements flag.Value.
 type Tags map[string]string
 
@@ -125,69 +98,16 @@ func (ts Tags) AWSTags() []*dynamodb.Tag {
 	return result
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
-	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
-	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
-	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age time before flushing.")
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, "dynamodb.periodic-table.write-throughput", 3000, "DynamoDB periodic tables write throughput.")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, "dynamodb.periodic-table.read-throughput", 300, "DynamoDB periodic tables read throughput.")
-	f.Int64Var(&cfg.InactiveWriteThroughput, "dynamodb.periodic-table.inactive-write-throughput", 1, "DynamoDB periodic tables write throughput for inactive tables.")
-	f.Int64Var(&cfg.InactiveReadThroughput, "dynamodb.periodic-table.inactive-read-throughput", 300, "DynamoDB periodic tables read throughput for inactive tables.")
-	f.Int64Var(&cfg.ChunkTableProvisionedWriteThroughput, "dynamodb.chunk-table.write-throughput", 3000, "DynamoDB chunk tables write throughput.")
-	f.Int64Var(&cfg.ChunkTableProvisionedReadThroughput, "dynamodb.chunk-table.read-throughput", 300, "DynamoDB chunk tables read throughput.")
-	f.Int64Var(&cfg.ChunkTableInactiveWriteThroughput, "dynamodb.chunk-table.inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
-	f.Int64Var(&cfg.ChunkTableInactiveReadThroughput, "dynamodb.chunk-table.inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables.")
-	f.Var(&cfg.TableTags, "dynamodb.table.tag", "Tag (of the form key=value) to be added to all tables under management.")
-
-	cfg.PeriodicTableConfig.RegisterFlags(f)
-	cfg.PeriodicChunkTableConfig.RegisterFlags(f)
-}
-
-// PeriodicTableConfig for the use of periodic tables (ie, weekly tables).  Can
-// control when to start the periodic tables, how long the period should be,
-// and the prefix to give the tables.
-type PeriodicTableConfig struct {
-	OriginalTableName    string
-	UsePeriodicTables    bool
-	TablePrefix          string
-	TablePeriod          time.Duration
-	PeriodicTableStartAt util.DayValue
-}
-
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *PeriodicTableConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "", "The name of the DynamoDB table used before versioned schemas were introduced.")
-	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", true, "Should we use periodic tables.")
-	f.StringVar(&cfg.TablePrefix, "dynamodb.periodic-table.prefix", "cortex_", "DynamoDB table prefix for the periodic tables.")
-	f.DurationVar(&cfg.TablePeriod, "dynamodb.periodic-table.period", 7*24*time.Hour, "DynamoDB periodic tables period.")
-	f.Var(&cfg.PeriodicTableStartAt, "dynamodb.periodic-table.start", "DynamoDB periodic tables start time.")
-}
-
-// PeriodicChunkTableConfig contains the various parameters for the chunk table.
-type PeriodicChunkTableConfig struct {
-	ChunkTableFrom   util.DayValue
-	ChunkTablePrefix string
-	ChunkTablePeriod time.Duration
-}
-
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *PeriodicChunkTableConfig) RegisterFlags(f *flag.FlagSet) {
-	f.Var(&cfg.ChunkTableFrom, "dynamodb.chunk-table.from", "Date after which to write chunks to DynamoDB.")
-	f.StringVar(&cfg.ChunkTablePrefix, "dynamodb.chunk-table.prefix", "cortex_chunks_", "DynamoDB table prefix for period chunk tables.")
-	f.DurationVar(&cfg.ChunkTablePeriod, "dynamodb.chunk-table.period", 7*24*time.Hour, "DynamoDB chunk tables period.")
-}
-
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	client TableClient
-	cfg    TableManagerConfig
+	cfg    SchemaConfig
 	done   chan struct{}
 	wait   sync.WaitGroup
 }
 
 // NewTableManager makes a new TableManager
-func NewTableManager(cfg TableManagerConfig, tableClient TableClient) (*TableManager, error) {
+func NewTableManager(cfg SchemaConfig, tableClient TableClient) (*TableManager, error) {
 	return &TableManager{
 		cfg:    cfg,
 		client: tableClient,
@@ -255,43 +175,37 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 	// Add the legacy table
 	legacyTable := TableDesc{
 		Name:             m.cfg.OriginalTableName,
-		ProvisionedRead:  m.cfg.InactiveReadThroughput,
-		ProvisionedWrite: m.cfg.InactiveWriteThroughput,
-		Tags:             m.cfg.TableTags,
+		ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
+		ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
+		Tags:             m.cfg.IndexTables.Tags,
 	}
 
 	if m.cfg.UsePeriodicTables {
 		// if we are before the switch to periodic table, we need to give this table write throughput
 		var (
-			tablePeriodSecs = int64(m.cfg.TablePeriod / time.Second)
+			tablePeriodSecs = int64(m.cfg.IndexTables.Period / time.Second)
 			gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
 			maxChunkAgeSecs = int64(m.cfg.MaxChunkAge / time.Second)
-			firstTable      = m.cfg.PeriodicTableStartAt.Unix() / tablePeriodSecs
+			firstTable      = m.cfg.IndexTables.From.Unix() / tablePeriodSecs
 			now             = mtime.Now().Unix()
 		)
 
 		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
-			legacyTable.ProvisionedRead = m.cfg.ProvisionedReadThroughput
-			legacyTable.ProvisionedWrite = m.cfg.ProvisionedWriteThroughput
+			legacyTable.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput
+			legacyTable.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput
 		}
 	}
 	result = append(result, legacyTable)
 
 	if m.cfg.UsePeriodicTables {
-		result = append(result, m.periodicTables(
-			m.cfg.TablePrefix, m.cfg.PeriodicTableStartAt.Time, m.cfg.TablePeriod,
+		result = append(result, m.cfg.IndexTables.periodicTables(
 			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
-			m.cfg.ProvisionedReadThroughput, m.cfg.ProvisionedWriteThroughput,
-			m.cfg.InactiveReadThroughput, m.cfg.InactiveWriteThroughput,
 		)...)
 	}
 
-	if m.cfg.ChunkTableFrom.IsSet() {
-		result = append(result, m.periodicTables(
-			m.cfg.ChunkTablePrefix, m.cfg.ChunkTableFrom.Time, m.cfg.ChunkTablePeriod,
+	if m.cfg.ChunkTables.From.IsSet() {
+		result = append(result, m.cfg.ChunkTables.periodicTables(
 			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
-			m.cfg.ChunkTableProvisionedReadThroughput, m.cfg.ChunkTableProvisionedWriteThroughput,
-			m.cfg.ChunkTableInactiveReadThroughput, m.cfg.ChunkTableInactiveWriteThroughput,
 		)...)
 	}
 
@@ -299,39 +213,6 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 	return result
 }
 
-func (m *TableManager) periodicTables(
-	prefix string, start model.Time, period, beginGrace, endGrace time.Duration,
-	activeRead, activeWrite, inactiveRead, inactiveWrite int64,
-) []TableDesc {
-	var (
-		periodSecs     = int64(period / time.Second)
-		beginGraceSecs = int64(beginGrace / time.Second)
-		endGraceSecs   = int64(endGrace / time.Second)
-		firstTable     = start.Unix() / periodSecs
-		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
-		now            = mtime.Now().Unix()
-		result         = []TableDesc{}
-	)
-	for i := firstTable; i <= lastTable; i++ {
-		table := TableDesc{
-			// Name construction needs to be consistent with chunk_store.bigBuckets
-			Name:             prefix + strconv.Itoa(int(i)),
-			ProvisionedRead:  inactiveRead,
-			ProvisionedWrite: inactiveWrite,
-			Tags:             m.cfg.TableTags,
-		}
-
-		// if now is within table [start - grace, end + grace), then we need some write throughput
-		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
-			table.ProvisionedRead = activeRead
-			table.ProvisionedWrite = activeWrite
-		}
-		result = append(result, table)
-	}
-	log.Infof("periodicTables: %+v", result)
-	return result
-}
-
 // partitionTables works out tables that need to be created vs tables that need to be updated
 func (m *TableManager) partitionTables(ctx context.Context, descriptions []TableDesc) ([]TableDesc, []TableDesc, error) {
 	existingTables, err := m.client.ListTables(ctx)
diff --git a/table_manager_test.go b/table_manager_test.go
index 42196161e9da5..5e9459edf5f6b 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -30,30 +30,30 @@ func TestTableManager(t *testing.T) {
 		DynamoDB: dynamoDB,
 	}
 
-	cfg := TableManagerConfig{
-		PeriodicTableConfig: PeriodicTableConfig{
-			UsePeriodicTables:    true,
-			TablePrefix:          tablePrefix,
-			TablePeriod:          tablePeriod,
-			PeriodicTableStartAt: util.NewDayValue(model.TimeFromUnix(0)),
+	cfg := SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: periodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
 		},
 
-		PeriodicChunkTableConfig: PeriodicChunkTableConfig{
-			ChunkTablePrefix: chunkTablePrefix,
-			ChunkTablePeriod: tablePeriod,
-			ChunkTableFrom:   util.NewDayValue(model.TimeFromUnix(0)),
+		ChunkTables: periodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
 		},
 
-		CreationGracePeriod:                  gracePeriod,
-		MaxChunkAge:                          maxChunkAge,
-		ProvisionedWriteThroughput:           write,
-		ProvisionedReadThroughput:            read,
-		InactiveWriteThroughput:              inactiveWrite,
-		InactiveReadThroughput:               inactiveRead,
-		ChunkTableProvisionedWriteThroughput: write,
-		ChunkTableProvisionedReadThroughput:  read,
-		ChunkTableInactiveWriteThroughput:    inactiveWrite,
-		ChunkTableInactiveReadThroughput:     inactiveRead,
+		CreationGracePeriod: gracePeriod,
+		MaxChunkAge:         maxChunkAge,
 	}
 	tableManager, err := NewTableManager(cfg, client)
 	if err != nil {
@@ -187,7 +187,7 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check at time zero, we have the base table with no tags.
 	{
-		tableManager, err := NewTableManager(TableManagerConfig{}, client)
+		tableManager, err := NewTableManager(SchemaConfig{}, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -204,8 +204,8 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check after restarting table manager we get some tags.
 	{
-		cfg := TableManagerConfig{}
-		cfg.TableTags.Set("foo=bar")
+		cfg := SchemaConfig{}
+		cfg.IndexTables.Tags.Set("foo=bar")
 		tableManager, err := NewTableManager(cfg, client)
 		if err != nil {
 			t.Fatal(err)

From 89f3417fc0e98aa94583f792664dc0cb4be9f7c5 Mon Sep 17 00:00:00 2001
From: Marcus Cobden <marcus@marcuscobden.co.uk>
Date: Wed, 5 Jul 2017 13:26:11 +0100
Subject: [PATCH 016/660] Restore -dynamodb.*-table.start as a deprecated flag
 (#479)

---
 schema_config.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/schema_config.go b/schema_config.go
index 33c4094c45877..c42a9eda86da6 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -161,6 +161,8 @@ func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
 	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables.")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
+
+	f.Var(&cfg.From, argPrefix+".start", fmt.Sprintf("Deprecated: use '%s.from'.", argPrefix))
 }
 
 func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {

From 1499416f684403f53a2196b2831eb59f1f6c4f5c Mon Sep 17 00:00:00 2001
From: Marcus Cobden <marcus@marcuscobden.co.uk>
Date: Wed, 5 Jul 2017 15:49:55 +0100
Subject: [PATCH 017/660] Retry 'Retryable' requests to dynamoDB (#480)

AWS SDK flags certain request errors as retryable
---
 aws_storage_client.go | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index ffc7f828f52e5..bb4245d5e2a58 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -167,14 +167,15 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
 		reqs.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
 		var resp *dynamodb.BatchWriteItemOutput
+		var request *request.Request
 
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
-			var err error
-			resp, err = a.DynamoDB.BatchWriteItemWithContext(ctx, &dynamodb.BatchWriteItemInput{
+			request, resp = a.DynamoDB.BatchWriteItemRequest(&dynamodb.BatchWriteItemInput{
 				RequestItems:           reqs,
 				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 			})
-			return err
+			request.SetContext(ctx)
+			return request.Send()
 		})
 		for _, cc := range resp.ConsumedCapacity {
 			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
@@ -197,7 +198,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 
 		// If we get provisionedThroughputExceededException, then no items were processed,
 		// so back off and retry all.
-		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+		if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || *request.Retryable) {
 			unprocessed.TakeReqs(reqs, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
@@ -275,7 +276,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		if err != nil {
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
@@ -304,6 +305,7 @@ type dynamoDBRequest interface {
 	Data() interface{}
 	Error() error
 	HasNextPage() bool
+	Retryable() bool
 }
 
 func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
@@ -340,6 +342,10 @@ func (a dynamoDBRequestAdapter) HasNextPage() bool {
 	return a.request.HasNextPage()
 }
 
+func (a dynamoDBRequestAdapter) Retryable() bool {
+	return *a.request.Retryable
+}
+
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	var (
 		s3Chunks       []Chunk
@@ -449,14 +455,15 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
 
+		var request *request.Request
 		var response *dynamodb.BatchGetItemOutput
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
-			var err error
-			response, err = a.DynamoDB.BatchGetItemWithContext(ctx, &dynamodb.BatchGetItemInput{
+			request, response = a.DynamoDB.BatchGetItemRequest(&dynamodb.BatchGetItemInput{
 				RequestItems:           requests,
 				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 			})
-			return err
+			request.SetContext(ctx)
+			return request.Send()
 		})
 
 		for _, cc := range response.ConsumedCapacity {
@@ -471,7 +478,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || *request.Retryable) {
 				unprocessed.TakeReqs(requests, -1)
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)

From d1a92e58c93d2fad83a94f8ab8ac5d7ad7d93a1a Mon Sep 17 00:00:00 2001
From: Marcus Cobden <marcus@marcuscobden.co.uk>
Date: Wed, 5 Jul 2017 16:01:59 +0100
Subject: [PATCH 018/660] Revert "Retry 'Retryable' requests to dynamoDB
 (#480)" (#481)

This reverts commit 0d9bd8d235bfc634d00460d1e450bbaf882a9fce.
---
 aws_storage_client.go | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index bb4245d5e2a58..ffc7f828f52e5 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -167,15 +167,14 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
 		reqs.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
 		var resp *dynamodb.BatchWriteItemOutput
-		var request *request.Request
 
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
-			request, resp = a.DynamoDB.BatchWriteItemRequest(&dynamodb.BatchWriteItemInput{
+			var err error
+			resp, err = a.DynamoDB.BatchWriteItemWithContext(ctx, &dynamodb.BatchWriteItemInput{
 				RequestItems:           reqs,
 				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 			})
-			request.SetContext(ctx)
-			return request.Send()
+			return err
 		})
 		for _, cc := range resp.ConsumedCapacity {
 			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
@@ -198,7 +197,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 
 		// If we get provisionedThroughputExceededException, then no items were processed,
 		// so back off and retry all.
-		if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || *request.Retryable) {
+		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 			unprocessed.TakeReqs(reqs, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
@@ -276,7 +275,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		if err != nil {
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 
-			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
@@ -305,7 +304,6 @@ type dynamoDBRequest interface {
 	Data() interface{}
 	Error() error
 	HasNextPage() bool
-	Retryable() bool
 }
 
 func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
@@ -342,10 +340,6 @@ func (a dynamoDBRequestAdapter) HasNextPage() bool {
 	return a.request.HasNextPage()
 }
 
-func (a dynamoDBRequestAdapter) Retryable() bool {
-	return *a.request.Retryable
-}
-
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	var (
 		s3Chunks       []Chunk
@@ -455,15 +449,14 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
 
-		var request *request.Request
 		var response *dynamodb.BatchGetItemOutput
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
-			request, response = a.DynamoDB.BatchGetItemRequest(&dynamodb.BatchGetItemInput{
+			var err error
+			response, err = a.DynamoDB.BatchGetItemWithContext(ctx, &dynamodb.BatchGetItemInput{
 				RequestItems:           requests,
 				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 			})
-			request.SetContext(ctx)
-			return request.Send()
+			return err
 		})
 
 		for _, cc := range response.ConsumedCapacity {
@@ -478,7 +471,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
-			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || *request.Retryable) {
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
 				unprocessed.TakeReqs(requests, -1)
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)

From 3cef6a8a485ceea39f73e41bf2197330dafbbbdf Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Thu, 6 Jul 2017 12:18:40 +0100
Subject: [PATCH 019/660] Allow tags to be set on tables globally (#485)

Deprecated flag means we can roll out new version of Cortex without configuration change.
---
 schema_config.go | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index c42a9eda86da6..b22a32bfa40da 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -43,6 +43,9 @@ type SchemaConfig struct {
 	UsePeriodicTables bool
 	IndexTables       periodicTableConfig
 	ChunkTables       periodicTableConfig
+
+	// Deprecated configuration for setting tags on all tables.
+	Tags Tags
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -61,8 +64,12 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
 
+	f.Var(&cfg.Tags, "dynamodb.table.tag", "Deprecated. Set tags on tables individually.")
+
 	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", "cortex_", f)
+	cfg.IndexTables.globalTags = &cfg.Tags
 	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
+	cfg.ChunkTables.globalTags = &cfg.Tags
 }
 
 func (cfg *SchemaConfig) tableForBucket(bucketStart int64) string {
@@ -150,6 +157,9 @@ type periodicTableConfig struct {
 	InactiveWriteThroughput    int64
 	InactiveReadThroughput     int64
 	Tags                       Tags
+	// Temporarily in place to support tags set on all tables, as means of
+	// smoothing transition to per-table tags.
+	globalTags *Tags
 }
 
 func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
@@ -176,12 +186,21 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 		result         = []TableDesc{}
 	)
 	for i := firstTable; i <= lastTable; i++ {
+		tags := Tags(map[string]string{})
+		for k, v := range cfg.Tags {
+			tags[k] = v
+		}
+		if cfg.globalTags != nil {
+			for k, v := range *cfg.globalTags {
+				tags[k] = v
+			}
+		}
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
 			Name:             cfg.Prefix + strconv.Itoa(int(i)),
 			ProvisionedRead:  cfg.InactiveReadThroughput,
 			ProvisionedWrite: cfg.InactiveWriteThroughput,
-			Tags:             cfg.Tags,
+			Tags:             tags,
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput

From 4625e7452f42f2f12ed173a83ce4e020e229a03f Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Thu, 6 Jul 2017 13:37:31 +0100
Subject: [PATCH 020/660] Tags for legacy tables too (#486)

---
 schema_config.go | 17 ++++++++++++++++-
 table_manager.go |  2 +-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index b22a32bfa40da..757eb493e02af 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -200,7 +200,7 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 			Name:             cfg.Prefix + strconv.Itoa(int(i)),
 			ProvisionedRead:  cfg.InactiveReadThroughput,
 			ProvisionedWrite: cfg.InactiveWriteThroughput,
-			Tags:             tags,
+			Tags:             cfg.GetTags(),
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
@@ -213,6 +213,21 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 	return result
 }
 
+// GetTags returns tags for the table. Exists to provide backwards
+// compatibility for the command-line.
+func (cfg *periodicTableConfig) GetTags() Tags {
+	tags := Tags(map[string]string{})
+	for k, v := range cfg.Tags {
+		tags[k] = v
+	}
+	if cfg.globalTags != nil {
+		for k, v := range *cfg.globalTags {
+			tags[k] = v
+		}
+	}
+	return tags
+}
+
 // compositeSchema is a Schema which delegates to various schemas depending
 // on when they were activated.
 type compositeSchema struct {
diff --git a/table_manager.go b/table_manager.go
index c1ed3933c07c6..186db5b00f61b 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -177,7 +177,7 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		Name:             m.cfg.OriginalTableName,
 		ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
 		ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
-		Tags:             m.cfg.IndexTables.Tags,
+		Tags:             m.cfg.IndexTables.GetTags(),
 	}
 
 	if m.cfg.UsePeriodicTables {

From 3efe990c23ae0924f03b4402a37e4512da7e83e0 Mon Sep 17 00:00:00 2001
From: Marcus Cobden <marcus@marcuscobden.co.uk>
Date: Thu, 6 Jul 2017 15:49:43 +0100
Subject: [PATCH 021/660] Second attempt at Retry 'Retryable' requests to
 dynamoDB (#483)

* Revert "Revert "Retry 'Retryable' requests to dynamoDB (#480)" (#481)"

This reverts commit 4b9ff90b2b1cdd4a177d43c99c9cc64d412c6d69.

* Use request abstraction to fix tests
---
 aws_storage_client.go      | 58 ++++++++++++++++++---------
 aws_storage_client_test.go | 82 ++++++++++++++++++++++++++------------
 2 files changed, 96 insertions(+), 44 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index ffc7f828f52e5..a3923ac4b0e40 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -120,9 +120,11 @@ type awsStorageClient struct {
 	S3         s3iface.S3API
 	bucketName string
 
-	// queryRequestFn exists for mocking, so we don't have to write a whole load
+	// These functions exists for mocking, so we don't have to write a whole load
 	// of boilerplate.
-	queryRequestFn func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
+	queryRequestFn          func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
+	batchGetItemRequestFn   func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest
+	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
 // NewAWSStorageClient makes a new AWS-backed StorageClient.
@@ -150,6 +152,8 @@ func NewAWSStorageClient(cfg AWSStorageConfig, schemaCfg SchemaConfig) (StorageC
 		bucketName: bucketName,
 	}
 	storageClient.queryRequestFn = storageClient.queryRequest
+	storageClient.batchGetItemRequestFn = storageClient.batchGetItemRequest
+	storageClient.batchWriteItemRequestFn = storageClient.batchWriteItemRequest
 	return storageClient, nil
 }
 
@@ -166,16 +170,16 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		reqs := dynamoDBWriteBatch{}
 		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
 		reqs.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
-		var resp *dynamodb.BatchWriteItemOutput
+		request := a.batchWriteItemRequestFn(ctx, &dynamodb.BatchWriteItemInput{
+			RequestItems:           reqs,
+			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+		})
 
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
-			var err error
-			resp, err = a.DynamoDB.BatchWriteItemWithContext(ctx, &dynamodb.BatchWriteItemInput{
-				RequestItems:           reqs,
-				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
-			})
-			return err
+			return request.Send()
 		})
+		resp := request.Data().(*dynamodb.BatchWriteItemOutput)
+
 		for _, cc := range resp.ConsumedCapacity {
 			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
@@ -197,7 +201,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 
 		// If we get provisionedThroughputExceededException, then no items were processed,
 		// so back off and retry all.
-		if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+		if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 			unprocessed.TakeReqs(reqs, -1)
 			time.Sleep(backoff)
 			backoff = nextBackoff(backoff)
@@ -275,7 +279,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 		if err != nil {
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
@@ -304,6 +308,7 @@ type dynamoDBRequest interface {
 	Data() interface{}
 	Error() error
 	HasNextPage() bool
+	Retryable() bool
 }
 
 func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
@@ -312,6 +317,18 @@ func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.Quer
 	return dynamoDBRequestAdapter{req}
 }
 
+func (a awsStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
+	req, _ := a.DynamoDB.BatchGetItemRequest(input)
+	req.SetContext(ctx)
+	return dynamoDBRequestAdapter{req}
+}
+
+func (a awsStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
+	req, _ := a.DynamoDB.BatchWriteItemRequest(input)
+	req.SetContext(ctx)
+	return dynamoDBRequestAdapter{req}
+}
+
 type dynamoDBRequestAdapter struct {
 	request *request.Request
 }
@@ -340,6 +357,10 @@ func (a dynamoDBRequestAdapter) HasNextPage() bool {
 	return a.request.HasNextPage()
 }
 
+func (a dynamoDBRequestAdapter) Retryable() bool {
+	return *a.request.Retryable
+}
+
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	var (
 		s3Chunks       []Chunk
@@ -449,15 +470,14 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
 
-		var response *dynamodb.BatchGetItemOutput
+		request := a.batchGetItemRequestFn(ctx, &dynamodb.BatchGetItemInput{
+			RequestItems:           requests,
+			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+		})
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
-			var err error
-			response, err = a.DynamoDB.BatchGetItemWithContext(ctx, &dynamodb.BatchGetItemInput{
-				RequestItems:           requests,
-				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
-			})
-			return err
+			return request.Send()
 		})
+		response := request.Data().(*dynamodb.BatchGetItemOutput)
 
 		for _, cc := range response.ConsumedCapacity {
 			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
@@ -471,7 +491,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
-			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException {
+			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				unprocessed.TakeReqs(requests, -1)
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 6d100e2cd4537..ce19e40d175bf 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -64,7 +64,7 @@ func (m *mockDynamoDBClient) createTable(name string) {
 	}
 }
 
-func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dynamodb.BatchWriteItemInput, _ ...request.Option) (*dynamodb.BatchWriteItemOutput, error) {
+func (m *mockDynamoDBClient) batchWriteItemRequest(_ context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
@@ -74,13 +74,19 @@ func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dyn
 
 	if m.provisionedErr > 0 {
 		m.provisionedErr--
-		return resp, awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil)
+		return &dynamoDBMockRequest{
+			result: resp,
+			err:    awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil),
+		}
 	}
 
 	for tableName, writeRequests := range input.RequestItems {
 		table, ok := m.tables[tableName]
 		if !ok {
-			return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("table not found: %s", tableName)
+			return &dynamoDBMockRequest{
+				result: &dynamodb.BatchWriteItemOutput{},
+				err:    fmt.Errorf("table not found: %s", tableName),
+			}
 		}
 
 		for _, writeRequest := range writeRequests {
@@ -103,17 +109,20 @@ func (m *mockDynamoDBClient) BatchWriteItemWithContext(_ aws.Context, input *dyn
 				items = append(items, nil)
 				copy(items[i+1:], items[i:])
 			} else {
-				return &dynamodb.BatchWriteItemOutput{}, fmt.Errorf("Duplicate entry")
+				return &dynamoDBMockRequest{
+					result: &dynamodb.BatchWriteItemOutput{},
+					err:    fmt.Errorf("Duplicate entry"),
+				}
 			}
 			items[i] = writeRequest.PutRequest.Item
 
 			table.items[hashValue] = items
 		}
 	}
-	return resp, nil
+	return &dynamoDBMockRequest{result: resp}
 }
 
-func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynamodb.BatchGetItemInput, _ ...request.Option) (*dynamodb.BatchGetItemOutput, error) {
+func (m *mockDynamoDBClient) batchGetItemRequest(_ context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
@@ -124,13 +133,19 @@ func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynam
 
 	if m.provisionedErr > 0 {
 		m.provisionedErr--
-		return resp, awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil)
+		return &dynamoDBMockRequest{
+			result: resp,
+			err:    awserr.New(dynamodb.ErrCodeProvisionedThroughputExceededException, "", nil),
+		}
 	}
 
 	for tableName, readRequests := range input.RequestItems {
 		table, ok := m.tables[tableName]
 		if !ok {
-			return &dynamodb.BatchGetItemOutput{}, fmt.Errorf("table not found")
+			return &dynamoDBMockRequest{
+				result: &dynamodb.BatchGetItemOutput{},
+				err:    fmt.Errorf("table not found"),
+			}
 		}
 
 		unprocessed := &dynamodb.KeysAndAttributes{
@@ -155,7 +170,10 @@ func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynam
 				return bytes.Compare(items[i][rangeKey].B, rangeValue) >= 0
 			})
 			if i >= len(items) || !bytes.Equal(items[i][rangeKey].B, rangeValue) {
-				return &dynamodb.BatchGetItemOutput{}, fmt.Errorf("Couldn't find ite,")
+				return &dynamoDBMockRequest{
+					result: &dynamodb.BatchGetItemOutput{},
+					err:    fmt.Errorf("Couldn't find item"),
+				}
 			}
 
 			// Only return AttributesToGet!
@@ -166,7 +184,9 @@ func (m *mockDynamoDBClient) BatchGetItemWithContext(_ aws.Context, input *dynam
 			resp.Responses[tableName] = append(resp.Responses[tableName], item)
 		}
 	}
-	return resp, nil
+	return &dynamoDBMockRequest{
+		result: resp,
+	}
 }
 
 func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
@@ -223,24 +243,28 @@ func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.Que
 }
 
 type dynamoDBMockRequest struct {
-	result *dynamodb.QueryOutput
+	result interface{}
+	err    error
 }
 
 func (m *dynamoDBMockRequest) NextPage() dynamoDBRequest {
 	return m
 }
 func (m *dynamoDBMockRequest) Send() error {
-	return nil
+	return m.err
 }
 func (m *dynamoDBMockRequest) Data() interface{} {
 	return m.result
 }
 func (m *dynamoDBMockRequest) Error() error {
-	return nil
+	return m.err
 }
 func (m *dynamoDBMockRequest) HasNextPage() bool {
 	return false
 }
+func (m *dynamoDBMockRequest) Retryable() bool {
+	return false
+}
 
 func (m *mockDynamoDBClient) ListTablesPagesWithContext(_ aws.Context, input *dynamodb.ListTablesInput, fn func(*dynamodb.ListTablesOutput, bool) bool, _ ...request.Option) error {
 	m.mtx.RLock()
@@ -403,16 +427,18 @@ func (m *mockS3) GetObjectWithContext(_ aws.Context, req *s3.GetObjectInput, _ .
 }
 
 func TestAWSStorageClient(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
+	mockDB := newMockDynamoDB(0, 0)
 	client := awsStorageClient{
-		DynamoDB:       dynamoDB,
-		queryRequestFn: dynamoDB.queryRequest,
+		DynamoDB:                mockDB,
+		queryRequestFn:          mockDB.queryRequest,
+		batchGetItemRequestFn:   mockDB.batchGetItemRequest,
+		batchWriteItemRequestFn: mockDB.batchWriteItemRequest,
 	}
 	batch := client.NewWriteBatch()
 	for i := 0; i < 30; i++ {
 		batch.Add("table", fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
 	}
-	dynamoDB.createTable("table")
+	mockDB.createTable("table")
 
 	err := client.BatchWrite(context.Background(), batch)
 	require.NoError(t, err)
@@ -442,9 +468,11 @@ func TestAWSStorageClientChunks(t *testing.T) {
 	t.Run("S3 chunks", func(t *testing.T) {
 		dynamoDB := newMockDynamoDB(0, 0)
 		client := awsStorageClient{
-			DynamoDB:       dynamoDB,
-			S3:             newMockS3(),
-			queryRequestFn: dynamoDB.queryRequest,
+			DynamoDB:                dynamoDB,
+			S3:                      newMockS3(),
+			queryRequestFn:          dynamoDB.queryRequest,
+			batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+			batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 		}
 
 		testStorageClientChunks(t, client)
@@ -470,9 +498,11 @@ func TestAWSStorageClientChunks(t *testing.T) {
 		require.NoError(t, err)
 
 		client := awsStorageClient{
-			DynamoDB:       dynamoDB,
-			queryRequestFn: dynamoDB.queryRequest,
-			schemaCfg:      schemaConfig,
+			DynamoDB:                dynamoDB,
+			schemaCfg:               schemaConfig,
+			queryRequestFn:          dynamoDB.queryRequest,
+			batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+			batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 		}
 
 		testStorageClientChunks(t, client)
@@ -621,8 +651,10 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 		t.Run(tt.name, func(t *testing.T) {
 			dynamoDB := newMockDynamoDB(0, 0)
 			client := awsStorageClient{
-				DynamoDB:       dynamoDB,
-				queryRequestFn: dynamoDB.queryRequest,
+				DynamoDB:                dynamoDB,
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 			}
 
 			batch := client.NewWriteBatch()

From 5fe1019f008dc5ab3ffff99008428512ce38e47e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 7 Jul 2017 16:48:28 +0100
Subject: [PATCH 022/660] Asynchronously write chunks to the cache from the
 ingester. (#497)

---
 chunk_store.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chunk_store.go b/chunk_store.go
index 9108724dbf6b4..805f80be8d256 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -99,6 +99,7 @@ func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
 		return err
 	}
 
+	c.writeBackCache(ctx, chunks)
 	return c.updateIndex(ctx, userID, chunks)
 }
 

From 180ba8c7f1d8616df53fd0773e2e9b0fb5e87cde Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 12 Jul 2017 17:05:37 +0100
Subject: [PATCH 023/660] Use LazySeriesIterator with fuzzy metric name queries
 (#442)

* Create v8Schema with series index

* Update comment

* Use sha256 for seriesID

* Move iterators inside chunk store

* Create TestChunksToIterators

* Add merge tests

* Add iterator tests

* Add GetReadQueries for v8Schema

* Add merge iterator tests

* Remove absTimeDifference

* Rename MergeSamples to MergeSampleSets

* Remove parallelism for MergeNSampleSets

* Use lazy series iterator when fuzzy metric names

* Tidy up merge iterator

* Refactor getLazySeriesIterators

* Rename function

* Updagte chunk store tests

* Add chunk store get tests

* Add LazySeriesIterator tests

* Move LazySeriesIterator to chunk package

* Lookup chunks when accessing samples

* Create and use sampleSeriesIterator in lazySeriesIterator

* Add dynamodb.v8-schema-from to local k8s

* Move filter splitter inside get iterator methods

* Rename getFuzzyMetricLazySeriesIterators to getSeriesIterators

* Remove unnecessary splitting of matchers

* Use _ = iota to start at 1

* Update tests

* Fix TestChunkStore_Get_lazy tests

* Add comment about mergeIterator from prometheus fanin

* Improve schema comment

* Move chunksToMatrix to ingester tests

* Move metric name check into constructor

* Create matchers based on metric inside LazySeriesIterator

* Use recursive merger

* Create context inside lazy iterator

* Cache metricName on iterator

* Fix ingester import
---
 chunk.go                   |  17 +-
 chunk_store.go             |  98 +++++------
 chunk_store_test.go        | 332 +++++++++++++++++++++++++++----------
 chunk_test.go              |  61 +++++++
 inmemory_storage_client.go |   5 +-
 iterator.go                | 123 ++++++++++++++
 iterator_test.go           | 121 ++++++++++++++
 schema.go                  |  45 ++++-
 schema_config.go           |   6 +
 schema_test.go             |  45 ++++-
 schema_util.go             |  28 ++++
 schema_util_test.go        |  64 +++++++
 12 files changed, 801 insertions(+), 144 deletions(-)
 create mode 100644 iterator.go
 create mode 100644 iterator_test.go

diff --git a/chunk.go b/chunk.go
index d331f165296ef..2b7ddfa3394bf 100644
--- a/chunk.go
+++ b/chunk.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/golang/snappy"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local"
 	prom_chunk "github.com/prometheus/prometheus/storage/local/chunk"
 
 	"github.com/weaveworks/common/errors"
@@ -278,8 +279,7 @@ func (c *Chunk) decode(input []byte) error {
 	})
 }
 
-// ChunksToMatrix converts a slice of chunks into a model.Matrix.
-func ChunksToMatrix(chunks []Chunk) (model.Matrix, error) {
+func chunksToIterators(chunks []Chunk) ([]local.SeriesIterator, error) {
 	// Group chunks by series, sort and dedupe samples.
 	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
 	for _, c := range chunks {
@@ -292,23 +292,24 @@ func ChunksToMatrix(chunks []Chunk) (model.Matrix, error) {
 			sampleStreams[fp] = ss
 		}
 
-		samples, err := c.samples()
+		samples, err := c.Samples()
 		if err != nil {
 			return nil, err
 		}
 
-		ss.Values = util.MergeSamples(ss.Values, samples)
+		ss.Values = util.MergeSampleSets(ss.Values, samples)
 	}
 
-	matrix := make(model.Matrix, 0, len(sampleStreams))
+	iterators := make([]local.SeriesIterator, 0, len(sampleStreams))
 	for _, ss := range sampleStreams {
-		matrix = append(matrix, ss)
+		iterators = append(iterators, util.NewSampleStreamIterator(ss))
 	}
 
-	return matrix, nil
+	return iterators, nil
 }
 
-func (c *Chunk) samples() ([]model.SamplePair, error) {
+// Samples returns all SamplePairs for the chunk.
+func (c *Chunk) Samples() ([]model.SamplePair, error) {
 	it := c.Data.NewIterator()
 	// TODO(juliusv): Pre-allocate this with the right length again once we
 	// add a method upstream to get the number of samples in a chunk.
diff --git a/chunk_store.go b/chunk_store.go
index 805f80be8d256..bb8d2f72995df 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -9,6 +9,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage/local"
 	"github.com/prometheus/prometheus/storage/metric"
 	"golang.org/x/net/context"
 
@@ -149,18 +150,35 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 }
 
 // Get implements ChunkStore
-func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*metric.LabelMatcher) ([]Chunk, error) {
-	logger := util.WithContext(ctx)
+func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*metric.LabelMatcher) ([]local.SeriesIterator, error) {
 	if through < from {
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
 
-	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
+	// Fetch metric name chunks if the matcher is of type equal,
+	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(allMatchers)
+	if ok && metricNameMatcher.Type == metric.Equal {
+		return c.getMetricNameIterators(ctx, from, through, matchers, metricNameMatcher.Value)
+	}
+
+	// Otherwise we will create lazy iterators for all series in our index
+	return c.getSeriesIterators(ctx, from, through, matchers, metricNameMatcher)
+}
 
-	// Fetch chunk descriptors (just ID really) from storage
-	chunks, err := c.lookupChunksByMatchers(ctx, from, through, matchers)
+func (c *Store) getMetricNameIterators(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricName model.LabelValue) ([]local.SeriesIterator, error) {
+	chunks, err := c.getMetricNameChunks(ctx, from, through, allMatchers, metricName)
 	if err != nil {
-		return nil, promql.ErrStorage(err)
+		return nil, err
+	}
+	return chunksToIterators(chunks)
+}
+
+func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
+	logger := util.WithContext(ctx)
+	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
+	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
+	if err != nil {
+		return nil, err
 	}
 
 	// Filter out chunks that are not in the selected time range.
@@ -208,69 +226,53 @@ outer:
 	return filteredChunks, nil
 }
 
-func (c *Store) lookupChunksByMatchers(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher) ([]Chunk, error) {
-	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(matchers)
-
-	// Only lookup by metric name if the matcher is of type equal, otherwise we
-	// have to fetch chunks for all metric names as other metric names could match.
-	if ok && metricNameMatcher.Type == metric.Equal {
-		return c.lookupChunksByMetricName(ctx, from, through, matchers, metricNameMatcher.Value)
-	}
-
+func (c *Store) getSeriesIterators(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricNameMatcher *metric.LabelMatcher) ([]local.SeriesIterator, error) {
+	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
 	}
-
-	// If there is no metric name, we want return chunks for all metric names
-	metricNameQueries, err := c.schema.GetReadQueries(from, through, userID)
+	seriesQueries, err := c.schema.GetReadQueries(from, through, userID)
 	if err != nil {
 		return nil, err
 	}
-	metricNameEntries, err := c.lookupEntriesByQueries(ctx, metricNameQueries)
+	seriesEntries, err := c.lookupEntriesByQueries(ctx, seriesQueries)
 	if err != nil {
 		return nil, err
 	}
 
-	incomingChunkSets := make(chan ByKey)
-	incomingErrors := make(chan error)
-	skippedMetricNames := 0
-
-	for _, metricNameEntry := range metricNameEntries {
-		metricName, err := parseMetricNameRangeValue(metricNameEntry.RangeValue, metricNameEntry.Value)
+	lazyIterators := make([]local.SeriesIterator, 0, len(seriesEntries))
+outer:
+	for _, seriesEntry := range seriesEntries {
+		metric, err := parseSeriesRangeValue(seriesEntry.RangeValue, seriesEntry.Value)
 		if err != nil {
 			return nil, err
 		}
 
-		// We are fetching all metric name chunks, however if there is a metricNameMatcher,
-		// we only want metric names that match
-		if ok && !metricNameMatcher.Match(metricName) {
-			skippedMetricNames++
-			continue
+		// Apply metric name matcher
+		if metricNameMatcher != nil && !metricNameMatcher.Match(metric[metricNameMatcher.Name]) {
+			continue outer
 		}
 
-		go func(metricName model.LabelValue) {
-			chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
-			if err != nil {
-				incomingErrors <- err
-			} else {
-				incomingChunkSets <- chunks
+		// Apply matchers
+		for _, matcher := range allMatchers {
+			if !matcher.Match(metric[matcher.Name]) {
+				continue outer
 			}
-		}(metricName)
-	}
+		}
 
-	var chunkSets []ByKey
-	var lastErr error
-	for i := 0; i < (len(metricNameEntries) - skippedMetricNames); i++ {
-		select {
-		case incoming := <-incomingChunkSets:
-			chunkSets = append(chunkSets, incoming)
-		case err := <-incomingErrors:
-			lastErr = err
+		orgID, err := user.ExtractOrgID(ctx)
+		if err != nil {
+			return nil, err
+		}
+		newIterator, err := NewLazySeriesIterator(c, metric, from, through, orgID)
+		if err != nil {
+			return nil, err
 		}
-	}
 
-	return nWayUnion(chunkSets), lastErr
+		lazyIterators = append(lazyIterators, newIterator)
+	}
+	return lazyIterators, nil
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 4453178345fa9..2167da1cb35b6 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -4,11 +4,13 @@ import (
 	"fmt"
 	"math/rand"
 	"reflect"
+	"sort"
 	"testing"
 	"time"
 
 	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local"
 	"github.com/prometheus/prometheus/storage/local/chunk"
 	"github.com/prometheus/prometheus/storage/metric"
 	"github.com/stretchr/testify/assert"
@@ -17,6 +19,7 @@ import (
 
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // newTestStore creates a new Store for testing.
@@ -32,20 +35,55 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	return store
 }
 
-func TestChunkStore(t *testing.T) {
+func createSampleStreamIteratorFrom(chunk Chunk) (local.SeriesIterator, error) {
+	samples, err := chunk.Samples()
+	if err != nil {
+		return nil, err
+	}
+	return util.NewSampleStreamIterator(&model.SampleStream{
+		Metric: chunk.Metric,
+		Values: samples,
+	}), nil
+}
+
+// Allow sorting of local.SeriesIterator by fingerprint (for comparisation tests)
+type ByFingerprint []local.SeriesIterator
+
+func (s ByFingerprint) Len() int {
+	return len(s)
+}
+func (s ByFingerprint) Swap(i, j int) {
+	s[i], s[j] = s[j], s[i]
+}
+func (s ByFingerprint) Less(i, j int) bool {
+	return s[i].Metric().Metric.Fingerprint() < s[j].Metric().Metric.Fingerprint()
+}
+
+// TestChunkStore_Get tests iterators are returned correctly depending on the type of query
+func TestChunkStore_Get_concrete(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
-	chunk1 := dummyChunkFor(model.Metric{
-		model.MetricNameLabel: "foo",
+
+	// foo chunks (used for fuzzy lazy iterator tests)
+	foo1Metric1 := model.Metric{
+		model.MetricNameLabel: "foo1",
 		"bar":  "baz",
 		"toms": "code",
 		"flip": "flop",
-	})
-	chunk2 := dummyChunkFor(model.Metric{
-		model.MetricNameLabel: "foo",
+	}
+	foo1Metric2 := model.Metric{
+		model.MetricNameLabel: "foo1",
 		"bar":  "beep",
 		"toms": "code",
-	})
+	}
+
+	foo1Chunk1 := dummyChunkFor(foo1Metric1)
+	foo1Chunk2 := dummyChunkFor(foo1Metric2)
+
+	foo1Iterator1, err := createSampleStreamIteratorFrom(foo1Chunk1)
+	require.NoError(t, err)
+	foo1Iterator2, err := createSampleStreamIteratorFrom(foo1Chunk2)
+	require.NoError(t, err)
 
 	schemas := []struct {
 		name string
@@ -58,58 +96,59 @@ func TestChunkStore(t *testing.T) {
 		{"v5 schema", v5Schema},
 		{"v6 schema", v6Schema},
 		{"v7 schema", v7Schema},
+		{"v8 schema", v8Schema},
 	}
 
-	nameMatcher := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
+	nameMatcher := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo1")
 
 	for _, tc := range []struct {
 		query    string
-		expect   []Chunk
+		expect   []local.SeriesIterator
 		matchers []*metric.LabelMatcher
 	}{
 		{
-			`foo`,
-			[]Chunk{chunk1, chunk2},
+			`foo1`,
+			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher},
 		},
 		{
-			`foo{flip=""}`,
-			[]Chunk{chunk2},
+			`foo1{flip=""}`,
+			[]local.SeriesIterator{foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "flip", "")},
 		},
 		{
-			`foo{bar="baz"}`,
-			[]Chunk{chunk1},
+			`foo1{bar="baz"}`,
+			[]local.SeriesIterator{foo1Iterator1},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "baz")},
 		},
 		{
-			`foo{bar="beep"}`,
-			[]Chunk{chunk2},
+			`foo1{bar="beep"}`,
+			[]local.SeriesIterator{foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "beep")},
 		},
 		{
-			`foo{toms="code"}`,
-			[]Chunk{chunk1, chunk2},
+			`foo1{toms="code"}`,
+			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code")},
 		},
 		{
-			`foo{bar!="baz"}`,
-			[]Chunk{chunk2},
+			`foo1{bar!="baz"}`,
+			[]local.SeriesIterator{foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.NotEqual, "bar", "baz")},
 		},
 		{
-			`foo{bar=~"beep|baz"}`,
-			[]Chunk{chunk1, chunk2},
+			`foo1{bar=~"beep|baz"}`,
+			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
 		},
 		{
-			`foo{toms="code", bar=~"beep|baz"}`,
-			[]Chunk{chunk1, chunk2},
+			`foo1{toms="code", bar=~"beep|baz"}`,
+			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
 			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
 		},
 		{
-			`foo{toms="code", bar="baz"}`,
-			[]Chunk{chunk1}, []*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			`foo1{toms="code", bar="baz"}`,
+			[]local.SeriesIterator{foo1Iterator1}, []*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
 		},
 	} {
 		for _, schema := range schemas {
@@ -119,124 +158,242 @@ func TestChunkStore(t *testing.T) {
 					schemaFactory: schema.fn,
 				})
 
-				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
+				if err := store.Put(ctx, []Chunk{
+					foo1Chunk1,
+					foo1Chunk2,
+				}); err != nil {
 					t.Fatal(err)
 				}
 
-				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
+				iterators, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
 				require.NoError(t, err)
 
-				if !reflect.DeepEqual(tc.expect, chunks) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
+				sort.Sort(ByFingerprint(iterators))
+				if !reflect.DeepEqual(tc.expect, iterators) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators))
 				}
 			})
 		}
 	}
 }
 
-// TestChunkStoreMetricNames tests no metric name queries supported from v7Schema
-func TestChunkStoreMetricNames(t *testing.T) {
+// TestChunkStore_Get tests iterators are returned correctly depending on the type of query
+func TestChunkStore_Get_lazy(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
+	from := now.Add(-time.Hour)
 
-	foo1Chunk1 := dummyChunkFor(model.Metric{
+	foo1Metric1 := model.Metric{
 		model.MetricNameLabel: "foo1",
 		"bar":  "baz",
-		"toms": "code",
 		"flip": "flop",
-	})
-	foo1Chunk2 := dummyChunkFor(model.Metric{
+		"toms": "code",
+	}
+	foo1Metric2 := model.Metric{
 		model.MetricNameLabel: "foo1",
 		"bar":  "beep",
 		"toms": "code",
-	})
-	foo2Chunk := dummyChunkFor(model.Metric{
+	}
+	foo2Metric := model.Metric{
 		model.MetricNameLabel: "foo2",
 		"bar":  "beep",
 		"toms": "code",
-	})
-	foo3Chunk := dummyChunkFor(model.Metric{
+	}
+	foo3Metric := model.Metric{
 		model.MetricNameLabel: "foo3",
 		"bar":  "beep",
 		"toms": "code",
-	})
+	}
+
+	foo1Chunk1 := dummyChunkFor(foo1Metric1)
+	foo1Chunk2 := dummyChunkFor(foo1Metric2)
+	foo2Chunk := dummyChunkFor(foo2Metric)
+	foo3Chunk := dummyChunkFor(foo3Metric)
 
 	schemas := []struct {
 		name string
 		fn   func(cfg SchemaConfig) Schema
 	}{
-		{"v7 schema", v7Schema},
+		{"v8 schema", v8Schema},
 	}
 
+	regexMatcher := mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")
+
 	for _, tc := range []struct {
-		query    string
-		expect   []Chunk
-		matchers []*metric.LabelMatcher
+		query                   string
+		matchers                []*metric.LabelMatcher
+		expectedIteratorMetrics []model.Metric
 	}{
-		{
-			`foo1`,
-			[]Chunk{foo1Chunk1, foo1Chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo1")},
-		},
-		{
-			`foo2`,
-			[]Chunk{foo2Chunk},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo2")},
-		},
-		{
-			`foo3`,
-			[]Chunk{foo3Chunk},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo3")},
-		},
-
 		// When name matcher is used without Equal, start matching all metric names
 		// however still filter out metric names which do not match query
 		{
 			`{__name__!="foo1"}`,
-			[]Chunk{foo3Chunk, foo2Chunk},
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, model.MetricNameLabel, "foo1")},
+			[]model.Metric{foo3Metric, foo2Metric},
 		},
 		{
 			`{__name__=~"foo1|foo2"}`,
-			[]Chunk{foo1Chunk1, foo2Chunk, foo1Chunk2},
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, model.MetricNameLabel, "foo1|foo2")},
+			[]model.Metric{foo1Metric1, foo2Metric, foo1Metric2},
 		},
-
 		// No metric names
 		{
 			`{bar="baz"}`,
-			[]Chunk{foo1Chunk1},
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			[]model.Metric{foo1Metric1},
 		},
 		{
 			`{bar="beep"}`,
-			[]Chunk{foo3Chunk, foo2Chunk, foo1Chunk2}, // doesn't match foo1 chunk1
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+			[]model.Metric{foo3Metric, foo2Metric, foo1Metric2}, // doesn't match foo1 metric 1
 		},
 		{
 			`{flip=""}`,
-			[]Chunk{foo3Chunk, foo2Chunk, foo1Chunk2}, // doesn't match foo1 chunk1 as it has a flip value
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "flip", "")},
+			[]model.Metric{foo3Metric, foo2Metric, foo1Metric2}, // doesn't match foo1 chunk1 as it has a flip value
 		},
 		{
 			`{bar!="beep"}`,
-			[]Chunk{foo1Chunk1},
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, "bar", "beep")},
+			[]model.Metric{foo1Metric1},
 		},
 		{
 			`{bar=~"beep|baz"}`,
-			[]Chunk{foo3Chunk, foo1Chunk1, foo2Chunk, foo1Chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			[]*metric.LabelMatcher{regexMatcher},
+			[]model.Metric{foo3Metric, foo1Metric1, foo2Metric, foo1Metric2},
 		},
 		{
 			`{toms="code", bar=~"beep|baz"}`,
-			[]Chunk{foo3Chunk, foo1Chunk1, foo2Chunk, foo1Chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), regexMatcher},
+			[]model.Metric{foo3Metric, foo1Metric1, foo2Metric, foo1Metric2},
 		},
 		{
 			`{toms="code", bar="baz"}`,
-			[]Chunk{foo1Chunk1},
 			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			[]model.Metric{foo1Metric1},
+		},
+	} {
+		for _, schema := range schemas {
+			// Create store for schema
+			store := newTestChunkStore(t, StoreConfig{
+				schemaFactory: schema.fn,
+			})
+
+			// Run test cases for this schema, checking lazy series iterators
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
+				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+
+				// Add chunks to store
+				if err := store.Put(ctx, []Chunk{
+					foo1Chunk1,
+					foo1Chunk2,
+					foo2Chunk,
+					foo3Chunk,
+				}); err != nil {
+					t.Fatal(err)
+				}
+
+				// Get iterators from store given the matchers
+				iterators, err := store.Get(ctx, from, now, tc.matchers...)
+				require.NoError(t, err)
+
+				// Create expected iterators with current schema store
+				var expectedIterators []local.SeriesIterator
+				for _, expectedMetric := range tc.expectedIteratorMetrics {
+					newIterator, err := NewLazySeriesIterator(store, expectedMetric, from, now, userID)
+					require.NoError(t, err)
+					expectedIterators = append(expectedIterators, newIterator)
+				}
+
+				// Check iterators are correct
+				sort.Sort(ByFingerprint(iterators))
+				if !reflect.DeepEqual(expectedIterators, iterators) {
+					t.Fatalf("%s: wrong iterators - %s", tc.query, test.Diff(expectedIterators, iterators))
+				}
+			})
+		}
+	}
+}
+
+// TestChunkStore_getMetricNameChunks tests if chunks are fetched correctly when we have the metric name
+func TestChunkStore_getMetricNameChunks(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	now := model.Now()
+	metricName := model.LabelValue("foo")
+	chunk1 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
+	})
+	chunk2 := dummyChunkFor(model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "beep",
+		"toms": "code",
+	})
+
+	schemas := []struct {
+		name string
+		fn   func(cfg SchemaConfig) Schema
+	}{
+		{"v1 schema", v1Schema},
+		{"v2 schema", v2Schema},
+		{"v3 schema", v3Schema},
+		{"v4 schema", v4Schema},
+		{"v5 schema", v5Schema},
+		{"v6 schema", v6Schema},
+		{"v7 schema", v7Schema},
+		{"v8 schema", v8Schema},
+	}
+
+	for _, tc := range []struct {
+		query    string
+		expect   []Chunk
+		matchers []*metric.LabelMatcher
+	}{
+		{
+			`foo`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{},
+		},
+		{
+			`foo{flip=""}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "flip", "")},
+		},
+		{
+			`foo{bar="baz"}`,
+			[]Chunk{chunk1},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+		},
+		{
+			`foo{bar="beep"}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+		},
+		{
+			`foo{toms="code"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code")},
+		},
+		{
+			`foo{bar!="baz"}`,
+			[]Chunk{chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, "bar", "baz")},
+		},
+		{
+			`foo{bar=~"beep|baz"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`foo{toms="code", bar=~"beep|baz"}`,
+			[]Chunk{chunk1, chunk2},
+			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+		},
+		{
+			`foo{toms="code", bar="baz"}`,
+			[]Chunk{chunk1}, []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
 		},
 	} {
 		for _, schema := range schemas {
@@ -246,11 +403,11 @@ func TestChunkStoreMetricNames(t *testing.T) {
 					schemaFactory: schema.fn,
 				})
 
-				if err := store.Put(ctx, []Chunk{foo1Chunk1, foo1Chunk2, foo2Chunk, foo3Chunk}); err != nil {
+				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
 					t.Fatal(err)
 				}
 
-				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
+				chunks, err := store.getMetricNameChunks(ctx, now.Add(-time.Hour), now, tc.matchers, metricName)
 				require.NoError(t, err)
 
 				if !reflect.DeepEqual(tc.expect, chunks) {
@@ -283,6 +440,7 @@ func TestChunkStoreRandom(t *testing.T) {
 		{name: "v5 schema", fn: v5Schema},
 		{name: "v6 schema", fn: v6Schema},
 		{name: "v7 schema", fn: v7Schema},
+		{name: "v8 schema", fn: v8Schema},
 	}
 
 	for i := range schemas {
@@ -325,10 +483,13 @@ func TestChunkStoreRandom(t *testing.T) {
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
 
+		metricNameLabel := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
+		matchers := []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")}
+
 		for _, s := range schemas {
-			chunks, err := s.store.Get(ctx, startTime, endTime,
-				mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo"),
-				mustNewLabelMatcher(metric.Equal, "bar", "baz"),
+			chunks, err := s.store.getMetricNameChunks(ctx, startTime, endTime,
+				matchers,
+				metricNameLabel.Value,
 			)
 			require.NoError(t, err)
 
@@ -336,7 +497,7 @@ func TestChunkStoreRandom(t *testing.T) {
 			for _, chunk := range chunks {
 				assert.False(t, chunk.From.After(endTime))
 				assert.False(t, chunk.Through.Before(startTime))
-				samples, err := chunk.samples()
+				samples, err := chunk.Samples()
 				assert.NoError(t, err)
 				assert.Equal(t, 1, len(samples))
 				// TODO verify chunk contents
@@ -389,9 +550,12 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
 
-		chunks, err := store.Get(ctx, startTime, endTime,
-			mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo"),
-			mustNewLabelMatcher(metric.Equal, "bar", "baz"),
+		metricNameLabel := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
+		matchers := []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")}
+
+		chunks, err := store.getMetricNameChunks(ctx, startTime, endTime,
+			matchers,
+			metricNameLabel.Value,
 		)
 		if err != nil {
 			t.Fatal(t, err)
@@ -401,7 +565,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		for _, chunk := range chunks {
 			assert.False(t, chunk.From.After(endTime))
 			assert.False(t, chunk.Through.Before(startTime))
-			samples, err := chunk.samples()
+			samples, err := chunk.Samples()
 			assert.NoError(t, err)
 			assert.Equal(t, 1, len(samples))
 		}
diff --git a/chunk_test.go b/chunk_test.go
index 50d76b0113d49..904d31834934b 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -6,8 +6,10 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local"
 	"github.com/prometheus/prometheus/storage/local/chunk"
 	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const userID = "userID"
@@ -126,3 +128,62 @@ func TestParseExternalKey(t *testing.T) {
 		require.Equal(t, c.chunk, chunk)
 	}
 }
+
+func TestChunksToIterators(t *testing.T) {
+	// Create 2 chunks which have the same metric
+	metric := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+	}
+	chunk1 := dummyChunkFor(metric)
+	chunk1Samples, err := chunk1.Samples()
+	require.NoError(t, err)
+	chunk2 := dummyChunkFor(metric)
+	chunk2Samples, err := chunk2.Samples()
+	require.NoError(t, err)
+
+	iterator1 := util.NewSampleStreamIterator(&model.SampleStream{
+		Metric: chunk1.Metric,
+		Values: util.MergeSampleSets(chunk1Samples, chunk2Samples),
+	})
+
+	// Create another chunk with a different metric
+	otherMetric := model.Metric{
+		model.MetricNameLabel: "foo2",
+		"bar":  "baz",
+		"toms": "code",
+	}
+	chunk3 := dummyChunkFor(otherMetric)
+	chunk3Samples, err := chunk3.Samples()
+	require.NoError(t, err)
+
+	iterator2 := util.NewSampleStreamIterator(&model.SampleStream{
+		Metric: chunk3.Metric,
+		Values: chunk3Samples,
+	})
+
+	for _, c := range []struct {
+		chunks            []Chunk
+		expectedIterators []local.SeriesIterator
+	}{
+		{
+			chunks:            []Chunk{},
+			expectedIterators: []local.SeriesIterator{},
+		}, {
+			chunks: []Chunk{
+				chunk1,
+				chunk2,
+				chunk3,
+			},
+			expectedIterators: []local.SeriesIterator{
+				iterator1,
+				iterator2,
+			},
+		},
+	} {
+		iterators, err := chunksToIterators(c.chunks)
+		require.NoError(t, err)
+		require.Equal(t, c.expectedIterators, iterators)
+	}
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 2d301b5fcecbb..d691b6eb45fd6 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -139,9 +139,10 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 			items = append(items, mockItem{})
 			copy(items[i+1:], items[i:])
 		} else {
-			// Return error if duplicate write and not metric name entry
+			// Return error if duplicate write and not metric name entry or series entry
 			itemComponents := decodeRangeKey(items[i].rangeValue)
-			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) {
+			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) &&
+				!bytes.Equal(itemComponents[3], seriesRangeKeyV1) {
 				return fmt.Errorf("Dupe write")
 			}
 		}
diff --git a/iterator.go b/iterator.go
new file mode 100644
index 0000000000000..107cf95757518
--- /dev/null
+++ b/iterator.go
@@ -0,0 +1,123 @@
+package chunk
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"sync"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/local"
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/weaveworks/common/user"
+)
+
+// LazySeriesIterator is a struct and not just a renamed type because otherwise the Metric
+// field and Metric() methods would clash.
+type LazySeriesIterator struct {
+	// The metric corresponding to the iterator.
+	metric     model.Metric
+	metricName model.LabelValue
+	from       model.Time
+	through    model.Time
+	matchers   []*metric.LabelMatcher
+
+	// The store used to fetch chunks and samples.
+	chunkStore *Store
+	// The sampleSeriesIterator is created on the first sample request. This
+	// does not happen with promQL queries which do not require sample data to
+	// be fetched. Use sync.Once to ensure the iterator is only created once.
+	sampleSeriesIterator *local.SeriesIterator
+	onceCreateIterator   sync.Once
+	orgID                string
+}
+
+type byMatcherLabel metric.LabelMatchers
+
+func (lms byMatcherLabel) Len() int           { return len(lms) }
+func (lms byMatcherLabel) Swap(i, j int)      { lms[i], lms[j] = lms[j], lms[i] }
+func (lms byMatcherLabel) Less(i, j int) bool { return lms[i].Name < lms[j].Name }
+
+// NewLazySeriesIterator creates a LazySeriesIterator.
+func NewLazySeriesIterator(chunkStore *Store, seriesMetric model.Metric, from model.Time, through model.Time, orgID string) (*LazySeriesIterator, error) {
+	metricName, ok := seriesMetric[model.MetricNameLabel]
+	if !ok {
+		return nil, fmt.Errorf("series does not have a metric name")
+	}
+
+	var matchers metric.LabelMatchers
+	for labelName, labelValue := range seriesMetric {
+		if labelName == "__name__" {
+			continue
+		}
+
+		matcher, err := metric.NewLabelMatcher(metric.Equal, labelName, labelValue)
+		if err != nil {
+			return nil, err
+		}
+		matchers = append(matchers, matcher)
+	}
+	sort.Sort(byMatcherLabel(matchers))
+
+	return &LazySeriesIterator{
+		chunkStore: chunkStore,
+		metric:     seriesMetric,
+		metricName: metricName,
+		from:       from,
+		through:    through,
+		matchers:   matchers,
+		orgID:      orgID,
+	}, nil
+}
+
+// Metric implements the SeriesIterator interface.
+func (it *LazySeriesIterator) Metric() metric.Metric {
+	return metric.Metric{Metric: it.metric}
+}
+
+// ValueAtOrBeforeTime implements the SeriesIterator interface.
+func (it *LazySeriesIterator) ValueAtOrBeforeTime(t model.Time) model.SamplePair {
+	var err error
+	it.onceCreateIterator.Do(func() {
+		err = it.createSampleSeriesIterator()
+	})
+	if err != nil {
+		// TODO: Handle error.
+		return model.ZeroSamplePair
+	}
+	return (*it.sampleSeriesIterator).ValueAtOrBeforeTime(t)
+}
+
+// RangeValues implements the SeriesIterator interface.
+func (it *LazySeriesIterator) RangeValues(in metric.Interval) []model.SamplePair {
+	var err error
+	it.onceCreateIterator.Do(func() {
+		err = it.createSampleSeriesIterator()
+	})
+	if err != nil {
+		// TODO: Handle error.
+		fmt.Printf("ERROR %+v", err)
+		return nil
+	}
+	return (*it.sampleSeriesIterator).RangeValues(in)
+}
+
+// Close implements the SeriesIterator interface.
+func (it *LazySeriesIterator) Close() {}
+
+func (it *LazySeriesIterator) createSampleSeriesIterator() error {
+	ctx := user.InjectOrgID(context.Background(), it.orgID)
+	sampleSeriesIterators, err := it.chunkStore.getMetricNameIterators(ctx, it.from, it.through, it.matchers, it.metricName)
+	if err != nil {
+		return err
+	}
+
+	// We should only expect one sampleSeriesIterator because we are dealing
+	// with one series.
+	if len(sampleSeriesIterators) != 1 {
+		return fmt.Errorf("multiple series found in LazySeriesIterator chunks")
+	}
+
+	it.sampleSeriesIterator = &sampleSeriesIterators[0]
+	return nil
+}
diff --git a/iterator_test.go b/iterator_test.go
new file mode 100644
index 0000000000000..065a3829ccd7e
--- /dev/null
+++ b/iterator_test.go
@@ -0,0 +1,121 @@
+package chunk
+
+import (
+	"context"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/user"
+)
+
+func TestLazySeriesIterator_Metric(t *testing.T) {
+	store := newTestChunkStore(t, StoreConfig{})
+	now := model.Now()
+	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
+	iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
+	require.NoError(t, err)
+	for _, c := range []struct {
+		iterator       *LazySeriesIterator
+		expectedMetric metric.Metric
+	}{
+		{
+			iterator:       iterator,
+			expectedMetric: metric.Metric{Metric: sampleMetric},
+		},
+	} {
+		metric := c.iterator.Metric()
+		require.Equal(t, c.expectedMetric, metric)
+	}
+}
+
+func TestLazySeriesIterator_ValueAtOrBeforeTime(t *testing.T) {
+	now := model.Now()
+	ctx := user.InjectOrgID(context.Background(), userID)
+
+	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
+	dummyChunk := dummyChunkFor(sampleMetric)
+	dummySamples, err := dummyChunk.Samples()
+	require.NoError(t, err)
+
+	schemas := []struct {
+		name string
+		fn   func(cfg SchemaConfig) Schema
+	}{
+		{"v8 schema", v8Schema},
+	}
+
+	for _, schema := range schemas {
+		// Create store with the dummy chunk.
+		store := newTestChunkStore(t, StoreConfig{schemaFactory: schema.fn})
+		store.Put(ctx, []Chunk{dummyChunk})
+
+		// Create the lazy series iterator
+		iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
+		require.NoError(t, err)
+		for _, tc := range []struct {
+			iterator       *LazySeriesIterator
+			timestamp      model.Time
+			expectedSample model.SamplePair
+		}{
+			{
+				iterator:       iterator,
+				timestamp:      now,
+				expectedSample: dummySamples[0],
+			},
+		} {
+			// sampleSeriesIterator should be created lazily only when RangeValues is called.
+			require.Nil(t, tc.iterator.sampleSeriesIterator)
+			sample := tc.iterator.ValueAtOrBeforeTime(tc.timestamp)
+			require.NotNil(t, tc.iterator.sampleSeriesIterator)
+
+			require.Equal(t, tc.expectedSample, sample)
+		}
+	}
+}
+
+func TestLazySeriesIterator_RangeValues(t *testing.T) {
+	now := model.Now()
+	ctx := user.InjectOrgID(context.Background(), userID)
+
+	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
+	dummyChunk := dummyChunkFor(sampleMetric)
+	dummySamples, err := dummyChunk.Samples()
+	require.NoError(t, err)
+
+	schemas := []struct {
+		name string
+		fn   func(cfg SchemaConfig) Schema
+	}{
+		{"v8 schema", v8Schema},
+	}
+
+	for _, schema := range schemas {
+		// Create store with the dummy chunk.
+		store := newTestChunkStore(t, StoreConfig{schemaFactory: schema.fn})
+		store.Put(ctx, []Chunk{dummyChunk})
+
+		// Create the lazy series iterator
+		iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
+		require.NoError(t, err)
+		for _, tc := range []struct {
+			iterator        *LazySeriesIterator
+			interval        metric.Interval
+			expectedSamples []model.SamplePair
+		}{
+			{
+				iterator:        iterator,
+				interval:        metric.Interval{OldestInclusive: now, NewestInclusive: now},
+				expectedSamples: dummySamples,
+			},
+		} {
+			// sampleSeriesIterator should be created lazily only when RangeValues is called.
+			require.Nil(t, tc.iterator.sampleSeriesIterator)
+			samples := tc.iterator.RangeValues(tc.interval)
+			require.NotNil(t, tc.iterator.sampleSeriesIterator)
+
+			require.Equal(t, tc.expectedSamples, samples)
+		}
+	}
+}
diff --git a/schema.go b/schema.go
index 298c4bedb39f6..5837a97743b97 100644
--- a/schema.go
+++ b/schema.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"crypto/sha1"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"strings"
@@ -17,6 +18,7 @@ var (
 	chunkTimeRangeKeyV4  = []byte{'4'}
 	chunkTimeRangeKeyV5  = []byte{'5'}
 	metricNameRangeKeyV1 = []byte{'6'}
+	seriesRangeKeyV1     = []byte{'7'}
 )
 
 // Errors
@@ -132,6 +134,14 @@ func v7Schema(cfg SchemaConfig) Schema {
 	}
 }
 
+// v8 schema is an extension of v6, with support for a labelset/series index
+func v8Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		v8Entries{},
+	}
+}
+
 // schema implements Schema given a bucketing function and and set of range key callbacks
 type schema struct {
 	buckets func(from, through model.Time, userID string) []Bucket
@@ -500,7 +510,7 @@ func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.
 	}, nil
 }
 
-// v7Entries supports queries with no metric name
+// v7Entries is a deprecated scherma initially used to support queries with no metric name. Use v8Entries instead.
 type v7Entries struct {
 	v6Entries
 }
@@ -529,6 +539,39 @@ func (entries v7Entries) GetWriteEntries(bucket Bucket, metricName model.LabelVa
 }
 
 func (v7Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
+	// Replaced with v8Schema series index
+	return nil, ErrNoMetricNameNotSupported
+}
+
+// v8Entries supports queries with no metric name by using a series index.
+type v8Entries struct {
+	v6Entries
+}
+
+func (entries v8Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	indexEntries, err := entries.v6Entries.GetWriteEntries(bucket, metricName, labels, chunkID)
+	if err != nil {
+		return nil, err
+	}
+
+	seriesID := metricSeriesID(labels)
+	seriesBytes, err := json.Marshal(labels)
+	if err != nil {
+		return nil, err
+	}
+
+	// Add IndexEntry for series with userID:bigBucket HashValue
+	indexEntries = append(indexEntries, IndexEntry{
+		TableName:  bucket.tableName,
+		HashValue:  bucket.hashKey,
+		RangeValue: encodeRangeKey([]byte(seriesID), nil, nil, seriesRangeKeyV1),
+		Value:      seriesBytes,
+	})
+
+	return indexEntries, nil
+}
+
+func (v8Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
diff --git a/schema_config.go b/schema_config.go
index 757eb493e02af..b88445d179030 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -30,6 +30,7 @@ type SchemaConfig struct {
 	V5SchemaFrom     util.DayValue
 	V6SchemaFrom     util.DayValue
 	V7SchemaFrom     util.DayValue
+	V8SchemaFrom     util.DayValue
 
 	// Period with which the table manager will poll for tables.
 	DynamoDBPollInterval time.Duration
@@ -56,6 +57,7 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema.")
+	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema.")
 
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
@@ -274,6 +276,10 @@ func newCompositeSchema(cfg SchemaConfig) (Schema, error) {
 		schemas = append(schemas, compositeSchemaEntry{cfg.V7SchemaFrom.Time, v7Schema(cfg)})
 	}
 
+	if cfg.V8SchemaFrom.IsSet() {
+		schemas = append(schemas, compositeSchemaEntry{cfg.V8SchemaFrom.Time, v8Schema(cfg)})
+	}
+
 	if !sort.IsSorted(byStart(schemas)) {
 		return nil, fmt.Errorf("schemas not in time-sorted order")
 	}
diff --git a/schema_test.go b/schema_test.go
index af5858ee9c1a1..a2f7c28288377 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"crypto/sha1"
 	"encoding/base64"
+	"encoding/json"
 	"fmt"
 	"reflect"
 	"sort"
@@ -234,8 +235,10 @@ func TestSchemaHashKeys(t *testing.T) {
 
 // range value types
 const (
-	MetricNameRangeValue = iota + 1
+	_ = iota
+	MetricNameRangeValue
 	ChunkTimeRangeValue
+	SeriesRangeValue
 )
 
 // parseRangeValueType returns the type of rangeValue
@@ -269,6 +272,10 @@ func parseRangeValueType(rangeValue []byte) (int, error) {
 	case bytes.Equal(components[3], metricNameRangeKeyV1):
 		return MetricNameRangeValue, nil
 
+	// series range values
+	case bytes.Equal(components[3], seriesRangeKeyV1):
+		return SeriesRangeValue, nil
+
 	default:
 		return 0, fmt.Errorf("unrecognised range value type. version: '%v'", string(components[3]))
 	}
@@ -293,6 +300,7 @@ func TestSchemaRangeKey(t *testing.T) {
 		tsRangeKeys   = v5Schema(cfg)
 		v6RangeKeys   = v6Schema(cfg)
 		v7RangeKeys   = v7Schema(cfg)
+		v8RangeKeys   = v8Schema(cfg)
 		metric        = model.Metric{
 			model.MetricNameLabel: metricName,
 			"bar": "bary",
@@ -301,6 +309,10 @@ func TestSchemaRangeKey(t *testing.T) {
 		fooSha1Hash = sha1.Sum([]byte("foo"))
 	)
 
+	seriesID := metricSeriesID(metric)
+	metricBytes, err := json.Marshal(metric)
+	require.NoError(t, err)
+
 	mkEntries := func(hashKey string, callback func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte)) []IndexEntry {
 		result := []IndexEntry{}
 		for labelName, labelValue := range metric {
@@ -432,6 +444,34 @@ func TestSchemaRangeKey(t *testing.T) {
 				},
 			},
 		},
+		{
+			v8RangeKeys,
+			[]IndexEntry{
+				{
+					TableName:  table,
+					HashValue:  "userid:d0",
+					RangeValue: append([]byte(seriesID), []byte("\x00\x00\x007\x00")...),
+					Value:      metricBytes,
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:bar",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bary"),
+				},
+				{
+					TableName:  table,
+					HashValue:  "userid:d0:foo:baz",
+					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
+					Value:      []byte("bazy"),
+				},
+			},
+		},
 	} {
 		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
 			have, err := tc.Schema.GetWriteEntries(
@@ -460,6 +500,9 @@ func TestSchemaRangeKey(t *testing.T) {
 				case ChunkTimeRangeValue:
 					_, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
+				case SeriesRangeValue:
+					_, err := parseSeriesRangeValue(entry.RangeValue, entry.Value)
+					require.NoError(t, err)
 				}
 			}
 		})
diff --git a/schema_util.go b/schema_util.go
index 6ffb92778fe50..b7503e25832d7 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -2,15 +2,22 @@ package chunk
 
 import (
 	"bytes"
+	"crypto/sha256"
 	"encoding/base64"
 	"encoding/binary"
 	"encoding/hex"
+	"encoding/json"
 
 	"fmt"
 
 	"github.com/prometheus/common/model"
 )
 
+func metricSeriesID(m model.Metric) string {
+	h := sha256.Sum256([]byte(m.String()))
+	return string(encodeBase64Bytes(h[:]))
+}
+
 func encodeRangeKey(ss ...[]byte) []byte {
 	length := 0
 	for _, s := range ss {
@@ -96,6 +103,27 @@ func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValu
 	}
 }
 
+// parseSeriesRangeValue returns the model.Metric stored in metric fingerprint
+// range values.
+func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) {
+	components := decodeRangeKey(rangeValue)
+	switch {
+	case len(components) < 4:
+		return nil, fmt.Errorf("invalid metric range value: %x", rangeValue)
+
+	// v1 has the encoded json metric as the value (with the fingerprint as the first component)
+	case bytes.Equal(components[3], seriesRangeKeyV1):
+		var series model.Metric
+		if err := json.Unmarshal(value, &series); err != nil {
+			return nil, err
+		}
+		return series, nil
+
+	default:
+		return nil, fmt.Errorf("unrecognised seriesRangeKey version: '%v'", string(components[3]))
+	}
+}
+
 // parseChunkTimeRangeValue returns the chunkKey, labelValue and metadataInIndex
 // for chunk time range values.
 func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (string, model.LabelValue, bool, error) {
diff --git a/schema_util_test.go b/schema_util_test.go
index 660e72fbab6a2..cd308bbb941d2 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -2,6 +2,8 @@ package chunk
 
 import (
 	"bytes"
+	"encoding/binary"
+	"encoding/json"
 	"math"
 	"math/rand"
 	"testing"
@@ -11,6 +13,43 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
+func TestMetricSeriesID(t *testing.T) {
+	for _, c := range []struct {
+		metric   model.Metric
+		expected string
+	}{
+		{
+			model.Metric{model.MetricNameLabel: "foo"},
+			"LCa0a2j/xo/5m0U8HTBBNBNCLXBkg7+g+YpeiGJm564",
+		},
+		{
+			model.Metric{
+				model.MetricNameLabel: "foo",
+				"bar":  "baz",
+				"toms": "code",
+				"flip": "flop",
+			},
+			"KrbXMezYneba+o7wfEdtzOdAWhbfWcDrlVfs1uOCX3M",
+		},
+		{
+			model.Metric{
+				"flip": "flop",
+				"bar":  "baz",
+				model.MetricNameLabel: "foo",
+				"toms":                "code",
+			},
+			"KrbXMezYneba+o7wfEdtzOdAWhbfWcDrlVfs1uOCX3M",
+		},
+		{
+			model.Metric{},
+			"RBNvo1WzZ4oRRq0W9+hknpT7T8If536DEMBg9hyq/4o",
+		},
+	} {
+		seriesID := metricSeriesID(c.metric)
+		assert.Equal(t, c.expected, seriesID)
+	}
+}
+
 func TestSchemaTimeEncoding(t *testing.T) {
 	assert.Equal(t, uint32(0), decodeTime(encodeTime(0)), "0")
 	assert.Equal(t, uint32(math.MaxUint32), decodeTime(encodeTime(math.MaxUint32)), "MaxUint32")
@@ -83,3 +122,28 @@ func TestParseMetricNameRangeValue(t *testing.T) {
 		assert.Equal(t, model.LabelValue(c.expMetricName), metricName)
 	}
 }
+
+func TestParseSeriesRangeValue(t *testing.T) {
+	metric := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar": "bary",
+		"baz": "bazy",
+	}
+
+	fingerprintBytes := make([]byte, 8)
+	binary.LittleEndian.PutUint64(fingerprintBytes, uint64(metric.Fingerprint()))
+	metricBytes, err := json.Marshal(metric)
+	require.NoError(t, err)
+
+	for _, c := range []struct {
+		encoded   []byte
+		value     []byte
+		expMetric model.Metric
+	}{
+		{encodeRangeKey(fingerprintBytes, nil, nil, seriesRangeKeyV1), metricBytes, metric},
+	} {
+		metric, err := parseSeriesRangeValue(c.encoded, c.value)
+		require.NoError(t, err)
+		assert.Equal(t, c.expMetric, metric)
+	}
+}

From 30aa7f55e5486a9142512f3aea5b7dac362d113e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 13 Jul 2017 10:20:29 +0100
Subject: [PATCH 024/660] BigTable storage backend for Cortex (#468)

* Cortex Bigtable backend.

- Currently stores chunks in BigTable only.
- Minimal amount of refactoring, had to:
  - break out the storage client factory into its own package to avoid import loops.
  - export a few methods on Chunk (ExternalKey and Decode).
- Add stack traces to some of the chunk store errors, and log them on queries.
- Instrument the BigTable gRPC client.

Also, fix some logic in the previous BigTable storage engine:

- Return correct part of the row key as the range key
- Correctly stop reading bigtable results when trying to read to the end of a 'row'.
- Log the range key we read from bigtable.
- Chunks are being written before calculating their checksum, which shouldn't happen.
- Don't return empty chunks for BigTable backend.

* Review feedback

* Instrument streaming gRPC BigTable calls too.

* Chunks don't nessecarily come from S3 anymore.

* When fetching chunks, the should be returned if processingErr == nil.
---
 BUILD.bazel                |   1 +
 aws_storage_client.go      |  28 ++---
 aws_storage_client_test.go |   6 +-
 by_key.go                  |  12 +-
 chunk.go                   |  35 +++---
 chunk_cache.go             |   4 +-
 chunk_cache_test.go        |   4 +-
 chunk_store.go             |  13 +--
 chunk_test.go              |  13 ++-
 gcp/BUILD.bazel            |  17 +++
 gcp/instrumentation.go     |  87 ++++++++++++++
 gcp/storage_client.go      | 226 +++++++++++++++++++++++++++++++++++++
 gcp/table_client.go        |  86 ++++++++++++++
 inmemory_storage_client.go |   8 +-
 schema_config.go           |   8 ++
 schema_util.go             |   3 +-
 storage/BUILD.bazel        |  13 +++
 storage/factory.go         |  63 +++++++++++
 storage_client.go          |  37 +-----
 19 files changed, 561 insertions(+), 103 deletions(-)
 create mode 100644 gcp/BUILD.bazel
 create mode 100644 gcp/instrumentation.go
 create mode 100644 gcp/storage_client.go
 create mode 100644 gcp/table_client.go
 create mode 100644 storage/BUILD.bazel
 create mode 100644 storage/factory.go

diff --git a/BUILD.bazel b/BUILD.bazel
index 45a65ea569622..5c3d9c241c4c7 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -32,6 +32,7 @@ go_library(
         "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
         "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
         "//vendor/github.com/golang/snappy:go_default_library",
+        "//vendor/github.com/pkg/errors:go_default_library",
         "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
diff --git a/aws_storage_client.go b/aws_storage_client.go
index a3923ac4b0e40..aa364e38eb157 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -7,7 +7,6 @@ import (
 	"io/ioutil"
 	"math/rand"
 	"net/url"
-	"strconv"
 	"strings"
 	"time"
 
@@ -22,7 +21,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
-	"github.com/prometheus/common/model"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
@@ -57,7 +55,7 @@ var (
 		Help:      "Time spent doing DynamoDB requests.",
 
 		// DynamoDB latency seems to range from a few ms to a few sec and is
-		// important.  So use 8 buckets from 64us to 8s.
+		// important.  So use 8 buckets from 128us to 2s.
 		Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
 	}, []string{"operation", "status_code"})
 	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
@@ -430,7 +428,7 @@ func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, e
 		var err error
 		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
 			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(chunk.externalKey()),
+			Key:    aws.String(chunk.ExternalKey()),
 		})
 		return err
 	})
@@ -442,7 +440,7 @@ func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, e
 	if err != nil {
 		return Chunk{}, err
 	}
-	if err := chunk.decode(buf); err != nil {
+	if err := chunk.Decode(buf); err != nil {
 		return Chunk{}, err
 	}
 	return chunk, nil
@@ -456,9 +454,9 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 	outstanding := dynamoDBReadRequest{}
 	chunksByKey := map[string]Chunk{}
 	for _, chunk := range chunks {
-		key := chunk.externalKey()
+		key := chunk.ExternalKey()
 		chunksByKey[key] = chunk
-		tableName := a.chunkTableFor(chunk.From)
+		tableName := a.schemaCfg.ChunkTables.TableFor(chunk.From)
 		outstanding.Add(tableName, key, placeholder)
 	}
 
@@ -542,7 +540,7 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 				return nil, fmt.Errorf("Got response from DynamoDB with no value: %+v", item)
 			}
 
-			if err := chunk.decode(buf.B); err != nil {
+			if err := chunk.Decode(buf.B); err != nil {
 				return nil, err
 			}
 
@@ -561,17 +559,17 @@ func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
 
 	for i := range chunks {
 		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].encode()
+		buf, err := chunks[i].Encode()
 		if err != nil {
 			return err
 		}
-		key := chunks[i].externalKey()
+		key := chunks[i].ExternalKey()
 
 		if !a.schemaCfg.ChunkTables.From.IsSet() || chunks[i].From.Before(a.schemaCfg.ChunkTables.From.Time) {
 			s3ChunkKeys = append(s3ChunkKeys, key)
 			s3ChunkBufs = append(s3ChunkBufs, buf)
 		} else {
-			table := a.chunkTableFor(chunks[i].From)
+			table := a.schemaCfg.ChunkTables.TableFor(chunks[i].From)
 			dynamoDBWrites.Add(table, key, placeholder, buf)
 		}
 	}
@@ -588,14 +586,6 @@ func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
 	return a.BatchWrite(ctx, dynamoDBWrites)
 }
 
-func (a awsStorageClient) chunkTableFor(t model.Time) string {
-	var (
-		periodSecs = int64(a.schemaCfg.ChunkTables.Period / time.Second)
-		table      = t.Unix() / periodSecs
-	)
-	return a.schemaCfg.ChunkTables.Prefix + strconv.Itoa(int(table))
-}
-
 func (a awsStorageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
 	incomingErrors := make(chan error)
 	for i := range bufs {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index ce19e40d175bf..7b8246d254cd6 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -522,9 +522,9 @@ func testStorageClientChunks(t *testing.T, client StorageClient) {
 				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
 			})
 			chunks = append(chunks, chunk)
-			_, err := chunk.encode() // Need to encode it, side effect calculates crc
+			_, err := chunk.Encode() // Need to encode it, side effect calculates crc
 			require.NoError(t, err)
-			written = append(written, chunk.externalKey())
+			written = append(written, chunk.ExternalKey())
 		}
 		err := client.PutChunks(context.Background(), chunks)
 		require.NoError(t, err)
@@ -547,7 +547,7 @@ func testStorageClientChunks(t *testing.T, client StorageClient) {
 		sort.Sort(ByKey(chunksWeGot))
 		require.Equal(t, len(chunksToGet), len(chunksWeGot))
 		for j := 0; j < len(chunksWeGot); j++ {
-			require.Equal(t, chunksToGet[i].externalKey(), chunksWeGot[i].externalKey())
+			require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey())
 		}
 	}
 }
diff --git a/by_key.go b/by_key.go
index 243ddf3aff127..3508340502b50 100644
--- a/by_key.go
+++ b/by_key.go
@@ -5,7 +5,7 @@ type ByKey []Chunk
 
 func (cs ByKey) Len() int           { return len(cs) }
 func (cs ByKey) Swap(i, j int)      { cs[i], cs[j] = cs[j], cs[i] }
-func (cs ByKey) Less(i, j int) bool { return cs[i].externalKey() < cs[j].externalKey() }
+func (cs ByKey) Less(i, j int) bool { return cs[i].ExternalKey() < cs[j].ExternalKey() }
 
 // unique will remove duplicates from the input.
 // list must be sorted.
@@ -18,7 +18,7 @@ func unique(cs ByKey) ByKey {
 	result[0] = cs[0]
 	i, j := 0, 1
 	for j < len(cs) {
-		if result[i].externalKey() == cs[j].externalKey() {
+		if result[i].ExternalKey() == cs[j].ExternalKey() {
 			j++
 			continue
 		}
@@ -35,10 +35,10 @@ func merge(a, b ByKey) ByKey {
 	result := make(ByKey, 0, len(a)+len(b))
 	i, j := 0, 0
 	for i < len(a) && j < len(b) {
-		if a[i].externalKey() < b[j].externalKey() {
+		if a[i].ExternalKey() < b[j].ExternalKey() {
 			result = append(result, a[i])
 			i++
-		} else if a[i].externalKey() > b[j].externalKey() {
+		} else if a[i].ExternalKey() > b[j].ExternalKey() {
 			result = append(result, b[j])
 			j++
 		} else {
@@ -92,11 +92,11 @@ func nWayIntersect(sets []ByKey) ByKey {
 			result      = []Chunk{}
 		)
 		for i < len(left) && j < len(right) {
-			if left[i].externalKey() == right[j].externalKey() {
+			if left[i].ExternalKey() == right[j].ExternalKey() {
 				result = append(result, left[i])
 			}
 
-			if left[i].externalKey() < right[j].externalKey() {
+			if left[i].ExternalKey() < right[j].ExternalKey() {
 				i++
 			} else {
 				j++
diff --git a/chunk.go b/chunk.go
index 2b7ddfa3394bf..d4cc6f5267611 100644
--- a/chunk.go
+++ b/chunk.go
@@ -11,19 +11,20 @@ import (
 	"strings"
 
 	"github.com/golang/snappy"
+	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/storage/local"
 	prom_chunk "github.com/prometheus/prometheus/storage/local/chunk"
 
-	"github.com/weaveworks/common/errors"
+	errs "github.com/weaveworks/common/errors"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // Errors that decode can return
 const (
-	ErrInvalidChunkID  = errors.Error("invalid chunk ID")
-	ErrInvalidChecksum = errors.Error("invalid chunk checksum")
-	ErrWrongMetadata   = errors.Error("wrong chunk metadata")
+	ErrInvalidChunkID  = errs.Error("invalid chunk ID")
+	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
+	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -91,7 +92,7 @@ func parseExternalKey(userID, externalKey string) (Chunk, error) {
 		return Chunk{}, err
 	}
 	if chunk.UserID != userID {
-		return Chunk{}, ErrWrongMetadata
+		return Chunk{}, errors.WithStack(ErrWrongMetadata)
 	}
 	return chunk, nil
 }
@@ -99,7 +100,7 @@ func parseExternalKey(userID, externalKey string) (Chunk, error) {
 func parseLegacyChunkID(userID, key string) (Chunk, error) {
 	parts := strings.Split(key, ":")
 	if len(parts) != 3 {
-		return Chunk{}, ErrInvalidChunkID
+		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
 	}
 	fingerprint, err := strconv.ParseUint(parts[0], 10, 64)
 	if err != nil {
@@ -124,12 +125,12 @@ func parseLegacyChunkID(userID, key string) (Chunk, error) {
 func parseNewExternalKey(key string) (Chunk, error) {
 	parts := strings.Split(key, "/")
 	if len(parts) != 2 {
-		return Chunk{}, ErrInvalidChunkID
+		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
 	}
 	userID := parts[0]
 	hexParts := strings.Split(parts[1], ":")
 	if len(hexParts) != 4 {
-		return Chunk{}, ErrInvalidChunkID
+		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
 	}
 	fingerprint, err := strconv.ParseUint(hexParts[0], 16, 64)
 	if err != nil {
@@ -157,9 +158,9 @@ func parseNewExternalKey(key string) (Chunk, error) {
 	}, nil
 }
 
-// externalKey returns the key you can use to fetch this chunk from external
+// ExternalKey returns the key you can use to fetch this chunk from external
 // storage. For newer chunks, this key includes a checksum.
-func (c *Chunk) externalKey() string {
+func (c *Chunk) ExternalKey() string {
 	// Some chunks have a checksum stored in dynamodb, some do not.  We must
 	// generate keys appropriately.
 	if c.ChecksumSet {
@@ -172,8 +173,8 @@ func (c *Chunk) externalKey() string {
 	return fmt.Sprintf("%s/%d:%d:%d", c.UserID, uint64(c.Fingerprint), int64(c.From), int64(c.Through))
 }
 
-// encode writes the chunk out to a big write buffer, then calculates the checksum.
-func (c *Chunk) encode() ([]byte, error) {
+// Encode writes the chunk out to a big write buffer, then calculates the checksum.
+func (c *Chunk) Encode() ([]byte, error) {
 	var buf bytes.Buffer
 
 	// Write 4 empty bytes first - we will come back and put the len in here.
@@ -210,9 +211,9 @@ func (c *Chunk) encode() ([]byte, error) {
 	return output, nil
 }
 
-// decode the chunk from the given buffer, and confirm the chunk is the one we
+// Decode the chunk from the given buffer, and confirm the chunk is the one we
 // expected.
-func (c *Chunk) decode(input []byte) error {
+func (c *Chunk) Decode(input []byte) error {
 	// Legacy chunks were written with metadata in the index.
 	if c.metadataInIndex {
 		var err error
@@ -226,7 +227,7 @@ func (c *Chunk) decode(input []byte) error {
 	// First, calculate the checksum of the chunk and confirm it matches
 	// what we expected.
 	if c.ChecksumSet && c.Checksum != crc32.Checksum(input, castagnoliTable) {
-		return ErrInvalidChecksum
+		return errors.WithStack(ErrInvalidChecksum)
 	}
 
 	// Now unmarshal the chunk metadata.
@@ -249,8 +250,8 @@ func (c *Chunk) decode(input []byte) error {
 	// we don't write the checksum to s3, so we have to copy the checksum in.
 	if c.ChecksumSet {
 		tempMetadata.Checksum, tempMetadata.ChecksumSet = c.Checksum, c.ChecksumSet
-		if c.externalKey() != tempMetadata.externalKey() {
-			return ErrWrongMetadata
+		if c.ExternalKey() != tempMetadata.ExternalKey() {
+			return errors.WithStack(ErrWrongMetadata)
 		}
 	}
 	*c = tempMetadata
diff --git a/chunk_cache.go b/chunk_cache.go
index 9cf20ea97b0f3..87b7bae544695 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -141,7 +141,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 
 	keys := make([]string, 0, len(chunks))
 	for _, chunk := range chunks {
-		keys = append(keys, chunk.externalKey())
+		keys = append(keys, chunk.ExternalKey())
 	}
 
 	var items map[string]*memcache.Item
@@ -161,7 +161,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 			continue
 		}
 
-		if err := chunks[i].decode(item.Value); err != nil {
+		if err := chunks[i].Decode(item.Value); err != nil {
 			memcacheCorrupt.Inc()
 			util.WithContext(ctx).Errorf("Failed to decode chunk from cache: %v", err)
 			missing = append(missing, chunks[i])
diff --git a/chunk_cache_test.go b/chunk_cache_test.go
index e66440ddace88..3d9fa252185f6 100644
--- a/chunk_cache_test.go
+++ b/chunk_cache_test.go
@@ -74,10 +74,10 @@ func TestChunkCache(t *testing.T) {
 			ts.Add(chunkLen),
 		)
 
-		buf, err := chunk.encode()
+		buf, err := chunk.Encode()
 		require.NoError(t, err)
 
-		key := chunk.externalKey()
+		key := chunk.ExternalKey()
 		err = c.StoreChunk(context.Background(), key, buf)
 		require.NoError(t, err)
 
diff --git a/chunk_store.go b/chunk_store.go
index bb8d2f72995df..caea15ad35810 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -125,7 +125,7 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 			return nil, err
 		}
 
-		entries, err := c.schema.GetWriteEntries(chunk.From, chunk.Through, userID, metricName, chunk.Metric, chunk.externalKey())
+		entries, err := c.schema.GetWriteEntries(chunk.From, chunk.Through, userID, metricName, chunk.Metric, chunk.ExternalKey())
 		if err != nil {
 			return nil, err
 		}
@@ -196,18 +196,18 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 		logger.Warnf("Error fetching from cache: %v", err)
 	}
 
-	fromS3, err := c.storage.GetChunks(ctx, missing)
+	fromStorage, err := c.storage.GetChunks(ctx, missing)
 	if err != nil {
 		return nil, promql.ErrStorage(err)
 	}
 
-	if err = c.writeBackCache(ctx, fromS3); err != nil {
+	if err = c.writeBackCache(ctx, fromStorage); err != nil {
 		logger.Warnf("Could not store chunks in chunk cache: %v", err)
 	}
 
 	// TODO instead of doing this sort, propagate an index and assign chunks
 	// into the result based on that index.
-	allChunks := append(fromCache, fromS3...)
+	allChunks := append(fromCache, fromStorage...)
 	sort.Sort(ByKey(allChunks))
 
 	// Filter out chunks
@@ -219,7 +219,6 @@ outer:
 				continue outer
 			}
 		}
-
 		filteredChunks = append(filteredChunks, chunk)
 	}
 
@@ -439,11 +438,11 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 
 func (c *Store) writeBackCache(_ context.Context, chunks []Chunk) error {
 	for i := range chunks {
-		encoded, err := chunks[i].encode()
+		encoded, err := chunks[i].Encode()
 		if err != nil {
 			return err
 		}
-		c.cache.BackgroundWrite(chunks[i].externalKey(), encoded)
+		c.cache.BackgroundWrite(chunks[i].ExternalKey(), encoded)
 	}
 	return nil
 }
diff --git a/chunk_test.go b/chunk_test.go
index 904d31834934b..f71f5bc7bd7fe 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/storage/local"
 	"github.com/prometheus/prometheus/storage/local/chunk"
@@ -34,7 +35,7 @@ func dummyChunkFor(metric model.Metric) Chunk {
 		now,
 	)
 	// Force checksum calculation.
-	_, err := chunk.encode()
+	_, err := chunk.Encode()
 	if err != nil {
 		panic(err)
 	}
@@ -79,18 +80,18 @@ func TestChunkCodec(t *testing.T) {
 		},
 	} {
 		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
-			buf, err := c.chunk.encode()
+			buf, err := c.chunk.Encode()
 			require.NoError(t, err)
 
-			have, err := parseExternalKey(userID, c.chunk.externalKey())
+			have, err := parseExternalKey(userID, c.chunk.ExternalKey())
 			require.NoError(t, err)
 
 			if c.f != nil {
 				c.f(&have, buf)
 			}
 
-			err = have.decode(buf)
-			require.Equal(t, err, c.err)
+			err = have.Decode(buf)
+			require.Equal(t, c.err, errors.Cause(err))
 
 			if c.err == nil {
 				require.Equal(t, have, c.chunk)
@@ -124,7 +125,7 @@ func TestParseExternalKey(t *testing.T) {
 		{key: "invalidUserID/2:270d8f00:270d8f00:f84c5745", chunk: Chunk{}, err: ErrWrongMetadata},
 	} {
 		chunk, err := parseExternalKey(userID, c.key)
-		require.Equal(t, c.err, err)
+		require.Equal(t, c.err, errors.Cause(err))
 		require.Equal(t, c.chunk, chunk)
 	}
 }
diff --git a/gcp/BUILD.bazel b/gcp/BUILD.bazel
new file mode 100644
index 0000000000000..2a61e3f1b37cb
--- /dev/null
+++ b/gcp/BUILD.bazel
@@ -0,0 +1,17 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = [
+        "storage_client.go",
+        "table_client.go",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/chunk:go_default_library",
+        "//vendor/cloud.google.com/go/bigtable:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
+        "//vendor/golang.org/x/net/context:go_default_library",
+        "//vendor/google.golang.org/grpc/status:go_default_library",
+    ],
+)
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
new file mode 100644
index 0000000000000..3211f0ef9f64d
--- /dev/null
+++ b/gcp/instrumentation.go
@@ -0,0 +1,87 @@
+package gcp
+
+import (
+	"io"
+	"time"
+
+	"github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc"
+	"github.com/mwitkow/go-grpc-middleware"
+	"github.com/opentracing/opentracing-go"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/weaveworks/common/instrument"
+	"golang.org/x/net/context"
+	"google.golang.org/api/option"
+	"google.golang.org/grpc"
+)
+
+var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	Namespace: "cortex",
+	Name:      "bigtable_request_duration_seconds",
+	Help:      "Time spent doing BigTable requests.",
+
+	// BigTable latency seems to range from a few ms to a few sec and is
+	// important.  So use 8 buckets from 128us to 2s.
+	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+}, []string{"operation", "status_code"})
+
+func init() {
+	prometheus.MustRegister(bigtableRequestDuration)
+}
+
+func instrumentation() []option.ClientOption {
+	return []option.ClientOption{
+		option.WithGRPCDialOption(
+			grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(
+				otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()),
+				grpcUnaryInstrumentation,
+			)),
+		),
+		option.WithGRPCDialOption(
+			grpc.WithStreamInterceptor(grpcStreamInstrumentation),
+		),
+	}
+}
+
+func grpcUnaryInstrumentation(
+	ctx context.Context, method string, req, resp interface{},
+	cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption,
+) error {
+	start := time.Now()
+	err := invoker(ctx, method, req, resp, cc, opts...)
+	bigtableRequestDuration.WithLabelValues(method, instrument.ErrorCode(err)).Observe(time.Now().Sub(start).Seconds())
+	return err
+}
+
+func grpcStreamInstrumentation(
+	ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string,
+	streamer grpc.Streamer, opts ...grpc.CallOption,
+) (grpc.ClientStream, error) {
+	start := time.Now()
+	stream, err := streamer(ctx, desc, cc, method, opts...)
+	return &instrumentedClientStream{
+		start:        start,
+		method:       method,
+		ClientStream: stream,
+	}, err
+}
+
+type instrumentedClientStream struct {
+	start  time.Time
+	method string
+	grpc.ClientStream
+}
+
+func (s *instrumentedClientStream) RecvMsg(m interface{}) error {
+	err := s.ClientStream.RecvMsg(m)
+	if err == nil {
+		return err
+	}
+
+	if err == io.EOF {
+		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(nil)).Observe(time.Now().Sub(s.start).Seconds())
+	} else {
+		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
+	}
+
+	return err
+}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
new file mode 100644
index 0000000000000..feaea9aafe79a
--- /dev/null
+++ b/gcp/storage_client.go
@@ -0,0 +1,226 @@
+package gcp
+
+import (
+	"flag"
+	"strings"
+
+	"cloud.google.com/go/bigtable"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+const (
+	columnFamily = "f"
+	column       = "c"
+	separator    = "\000"
+)
+
+// Config for a StorageClient
+type Config struct {
+	project  string
+	instance string
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.project, "bigtable.project", "", "BigTable project ID.")
+	f.StringVar(&cfg.instance, "bigtable.instance", "", "BigTable instance ID.")
+}
+
+// storageClient implements chunk.storageClient for GCP.
+type storageClient struct {
+	cfg       Config
+	schemaCfg chunk.SchemaConfig
+	client    *bigtable.Client
+}
+
+// NewStorageClient returns a new StorageClient.
+func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	if err != nil {
+		return nil, err
+	}
+	return &storageClient{
+		cfg:       cfg,
+		schemaCfg: schemaCfg,
+		client:    client,
+	}, nil
+}
+
+func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
+	return bigtableWriteBatch{
+		tables: map[string]map[string]*bigtable.Mutation{},
+	}
+}
+
+type bigtableWriteBatch struct {
+	tables map[string]map[string]*bigtable.Mutation
+}
+
+func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	rows, ok := b.tables[tableName]
+	if !ok {
+		rows = map[string]*bigtable.Mutation{}
+		b.tables[tableName] = rows
+	}
+
+	// TODO the hashValue should actually be hashed - but I have data written in
+	// this format, so we need to do a proper migration.
+	rowKey := hashValue + separator + string(rangeValue)
+	mutation, ok := rows[rowKey]
+	if !ok {
+		mutation = bigtable.NewMutation()
+		rows[rowKey] = mutation
+	}
+
+	mutation.Set(columnFamily, column, 0, value)
+}
+
+func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+	bigtableBatch := batch.(bigtableWriteBatch)
+
+	for tableName, rows := range bigtableBatch.tables {
+		table := s.client.Open(tableName)
+		rowKeys := make([]string, 0, len(rows))
+		muts := make([]*bigtable.Mutation, 0, len(rows))
+		for rowKey, mut := range rows {
+			rowKeys = append(rowKeys, rowKey)
+			muts = append(muts, mut)
+		}
+
+		errs, err := table.ApplyBulk(ctx, rowKeys, muts)
+		if err != nil {
+			return err
+		}
+		for _, err := range errs {
+			if err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	table := s.client.Open(query.TableName)
+
+	var rowRange bigtable.RowRange
+	if len(query.RangeValuePrefix) > 0 {
+		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
+	} else if len(query.RangeValueStart) > 0 {
+		rowRange = bigtable.InfiniteRange(query.HashValue + separator + string(query.RangeValueStart))
+	} else {
+		rowRange = bigtable.PrefixRange(query.HashValue + separator)
+	}
+
+	return table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
+		// BigTable doesn't know when to stop, as we're reading "until the end of the
+		// row" in DynamoDB.  So we need to check the prefix of the row is still correct.
+		if !strings.HasPrefix(r.Key(), query.HashValue+separator) {
+			return false
+		}
+		return callback(bigtableReadBatch(r), false)
+	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
+}
+
+// bigtableReadBatch represents a batch of rows read from BigTable.  As the
+// bigtable interface gives us rows one-by-one, a batch always only contains
+// a single row.
+type bigtableReadBatch bigtable.Row
+
+func (bigtableReadBatch) Len() int {
+	return 1
+}
+
+func (b bigtableReadBatch) RangeValue(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	// String before the first separator is the hashkey
+	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
+	return []byte(parts[1])
+}
+
+func (b bigtableReadBatch) Value(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	cf, ok := b[columnFamily]
+	if !ok || len(cf) != 1 {
+		panic("bad response from bigtable")
+	}
+	return cf[0].Value
+}
+
+func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	keys := map[string][]string{}
+	muts := map[string][]*bigtable.Mutation{}
+
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].ExternalKey()
+		tableName := s.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+		keys[tableName] = append(keys[tableName], key)
+
+		mut := bigtable.NewMutation()
+		mut.Set(columnFamily, column, 0, buf)
+		muts[tableName] = append(muts[tableName], mut)
+	}
+
+	for tableName := range keys {
+		table := s.client.Open(tableName)
+		errs, err := table.ApplyBulk(ctx, keys[tableName], muts[tableName])
+		if err != nil {
+			return err
+		}
+		for _, err := range errs {
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	chunks := map[string][]chunk.Chunk{}
+	keys := map[string]bigtable.RowList{}
+	for _, chunk := range input {
+		tableName := s.schemaCfg.ChunkTables.TableFor(chunk.From)
+		keys[tableName] = append(keys[tableName], chunk.ExternalKey())
+		chunks[tableName] = append(chunks[tableName], chunk)
+	}
+
+	output := make([]chunk.Chunk, 0, len(input))
+	for tableName := range keys {
+		var (
+			i             = 0
+			processingErr error
+			table         = s.client.Open(tableName)
+			keys          = keys[tableName]
+			chunks        = chunks[tableName]
+		)
+		// rows are returned in order
+		if err := table.ReadRows(ctx, keys, func(row bigtable.Row) bool {
+			chunk := chunks[i]
+			i++
+			processingErr = chunk.Decode(row[columnFamily][0].Value)
+			if processingErr == nil {
+				output = append(output, chunk)
+			}
+			return processingErr == nil
+		}); err != nil {
+			return nil, err
+		}
+		if processingErr != nil {
+			return nil, processingErr
+		}
+	}
+	return output, nil
+}
diff --git a/gcp/table_client.go b/gcp/table_client.go
new file mode 100644
index 0000000000000..814c93e44bf97
--- /dev/null
+++ b/gcp/table_client.go
@@ -0,0 +1,86 @@
+package gcp
+
+import (
+	"strings"
+
+	"cloud.google.com/go/bigtable"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"golang.org/x/net/context"
+	"google.golang.org/grpc/status"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+type tableClient struct {
+	cfg    Config
+	client *bigtable.AdminClient
+}
+
+// NewTableClient returns a new TableClient.
+func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
+	client, err := bigtable.NewAdminClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	if err != nil {
+		return nil, err
+	}
+	return &tableClient{
+		cfg:    cfg,
+		client: client,
+	}, nil
+}
+
+func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
+	tables, err := c.client.Tables(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	// Check each table has the right column family.  If not, omit it.
+	output := make([]string, 0, len(tables))
+	for _, table := range tables {
+		info, err := c.client.TableInfo(ctx, table)
+		if err != nil {
+			return nil, err
+		}
+
+		if hasColumnFamily(info.FamilyInfos) {
+			output = append(output, table)
+		}
+	}
+
+	return output, nil
+}
+
+func hasColumnFamily(infos []bigtable.FamilyInfo) bool {
+	for _, family := range infos {
+		if family.Name == columnFamily {
+			return true
+		}
+	}
+	return false
+}
+
+func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	if err := c.client.CreateTable(ctx, desc.Name); err != nil {
+		if !alreadyExistsError(err) {
+			return err
+		}
+	}
+	return c.client.CreateColumnFamily(ctx, desc.Name, columnFamily)
+}
+
+func alreadyExistsError(err error) bool {
+	// This is super fragile, but I can't find a better way of doing it.
+	// Have filed bug upstream: https://github.com/GoogleCloudPlatform/google-cloud-go/issues/672
+	serr, ok := status.FromError(err)
+	return ok && strings.Contains(serr.Message(), "already exists")
+}
+
+func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
+	return chunk.TableDesc{
+		Name: name,
+	}, dynamodb.TableStatusActive, nil
+}
+
+func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
+	return nil
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index d691b6eb45fd6..36fb8253a00de 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -241,11 +241,11 @@ func (m *MockStorage) PutChunks(_ context.Context, chunks []Chunk) error {
 	defer m.mtx.Unlock()
 
 	for i := range chunks {
-		buf, err := chunks[i].encode()
+		buf, err := chunks[i].Encode()
 		if err != nil {
 			return err
 		}
-		m.objects[chunks[i].externalKey()] = buf
+		m.objects[chunks[i].ExternalKey()] = buf
 	}
 	return nil
 }
@@ -257,12 +257,12 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 
 	result := []Chunk{}
 	for _, chunk := range chunkSet {
-		key := chunk.externalKey()
+		key := chunk.ExternalKey()
 		buf, ok := m.objects[key]
 		if !ok {
 			return nil, fmt.Errorf("%v not found", key)
 		}
-		if err := chunk.decode(buf); err != nil {
+		if err := chunk.Decode(buf); err != nil {
 			return nil, err
 		}
 		result = append(result, chunk)
diff --git a/schema_config.go b/schema_config.go
index b88445d179030..b14f6613e4e97 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -230,6 +230,14 @@ func (cfg *periodicTableConfig) GetTags() Tags {
 	return tags
 }
 
+func (cfg *periodicTableConfig) TableFor(t model.Time) string {
+	var (
+		periodSecs = int64(cfg.Period / time.Second)
+		table      = t.Unix() / periodSecs
+	)
+	return cfg.Prefix + strconv.Itoa(int(table))
+}
+
 // compositeSchema is a Schema which delegates to various schemas depending
 // on when they were activated.
 type compositeSchema struct {
diff --git a/schema_util.go b/schema_util.go
index b7503e25832d7..ec5d0d9eeafb0 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -10,6 +10,7 @@ import (
 
 	"fmt"
 
+	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 )
 
@@ -131,7 +132,7 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (string, model.La
 
 	switch {
 	case len(components) < 3:
-		return "", "", false, fmt.Errorf("invalid chunk time range value: %x", rangeValue)
+		return "", "", false, errors.Errorf("invalid chunk time range value: %x", rangeValue)
 
 	// v1 & v2 schema had three components - label name, label value and chunk ID.
 	// No version number.
diff --git a/storage/BUILD.bazel b/storage/BUILD.bazel
new file mode 100644
index 0000000000000..025cecfc0b847
--- /dev/null
+++ b/storage/BUILD.bazel
@@ -0,0 +1,13 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["factory.go"],
+    visibility = ["//visibility:public"],
+    deps = [
+        "//pkg/chunk:go_default_library",
+        "//pkg/chunk/gcp:go_default_library",
+        "//vendor/github.com/prometheus/common/log:go_default_library",
+        "//vendor/golang.org/x/net/context:go_default_library",
+    ],
+)
diff --git a/storage/factory.go b/storage/factory.go
new file mode 100644
index 0000000000000..77bd6e072d1f5
--- /dev/null
+++ b/storage/factory.go
@@ -0,0 +1,63 @@
+package storage
+
+import (
+	"flag"
+	"fmt"
+	"strings"
+
+	"github.com/prometheus/common/log"
+	"golang.org/x/net/context"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+)
+
+// Config chooses which storage client to use.
+type Config struct {
+	StorageClient string
+	chunk.AWSStorageConfig
+	GCPStorageConfig gcp.Config
+}
+
+// RegisterFlags adds the flags required to configure this flag set.
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, inmemory).")
+	cfg.AWSStorageConfig.RegisterFlags(f)
+	cfg.GCPStorageConfig.RegisterFlags(f)
+}
+
+// NewStorageClient makes a storage client based on the configuration.
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	switch cfg.StorageClient {
+	case "inmemory":
+		return chunk.NewMockStorage(), nil
+	case "aws":
+		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		if len(path) > 0 {
+			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+		}
+		return chunk.NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
+	case "gcp":
+		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
+	}
+}
+
+// NewTableClient makes a new table client based on the configuration.
+func NewTableClient(cfg Config) (chunk.TableClient, error) {
+	switch cfg.StorageClient {
+	case "inmemory":
+		return chunk.NewMockStorage(), nil
+	case "aws":
+		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		if len(path) > 0 {
+			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+		}
+		return chunk.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
+	case "gcp":
+		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
+	}
+}
diff --git a/storage_client.go b/storage_client.go
index 9bc8b36a03dbc..8081702a6810b 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -1,13 +1,6 @@
 package chunk
 
-import (
-	"flag"
-	"fmt"
-	"strings"
-
-	"github.com/prometheus/common/log"
-	"golang.org/x/net/context"
-)
+import "golang.org/x/net/context"
 
 // StorageClient is a client for the persistent storage for Cortex. (e.g. DynamoDB + S3).
 type StorageClient interface {
@@ -34,31 +27,3 @@ type ReadBatch interface {
 	RangeValue(index int) []byte
 	Value(index int) []byte
 }
-
-// StorageClientConfig chooses which storage client to use.
-type StorageClientConfig struct {
-	StorageClient string
-	AWSStorageConfig
-}
-
-// RegisterFlags adds the flags required to configure this flag set.
-func (cfg *StorageClientConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, inmemory).")
-	cfg.AWSStorageConfig.RegisterFlags(f)
-}
-
-// NewStorageClient makes a storage client based on the configuration.
-func NewStorageClient(cfg StorageClientConfig, schemaCfg SchemaConfig) (StorageClient, error) {
-	switch cfg.StorageClient {
-	case "inmemory":
-		return NewMockStorage(), nil
-	case "aws":
-		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
-		if len(path) > 0 {
-			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
-		}
-		return NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
-	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, inmemory", cfg.StorageClient)
-	}
-}

From 6f08b58ea7c559f6b3a8126a6e543aa972c7f316 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Thu, 13 Jul 2017 10:52:56 +0100
Subject: [PATCH 025/660] Add dynamoQueryPagesCount metric (#502)

* Add dynamoQueryPagesCount metric

* Use defer

* Use exponential buckets
---
 aws_storage_client.go | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index aa364e38eb157..8a0e7d53d8e6b 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -68,6 +68,14 @@ var (
 		Name:      "dynamo_failures_total",
 		Help:      "The total number of errors while storing chunks to the chunk store.",
 	}, []string{tableNameLabel, errorReasonLabel, "operation"})
+	dynamoQueryPagesCount = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_query_pages_count",
+		Help:      "Number of pages per query.",
+		// Most queries will have one page, however this may increase with fuzzy
+		// metric names.
+		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
+	})
 	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "s3_request_duration_seconds",
@@ -80,6 +88,7 @@ func init() {
 	prometheus.MustRegister(dynamoRequestDuration)
 	prometheus.MustRegister(dynamoConsumedCapacity)
 	prometheus.MustRegister(dynamoFailures)
+	prometheus.MustRegister(dynamoQueryPagesCount)
 	prometheus.MustRegister(s3RequestDuration)
 }
 
@@ -264,7 +273,12 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 
 	request := a.queryRequestFn(ctx, input)
 	backoff := minBackoff
+	pageCount := 0
+	defer func() {
+		dynamoQueryPagesCount.Observe(float64(pageCount))
+	}()
 	for page := request; page != nil; page = page.NextPage() {
+		pageCount++
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})

From d80d55ae4d50bcd166af4ac4ec5a7af9b82f71d3 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Mon, 17 Jul 2017 14:17:09 +0100
Subject: [PATCH 026/660] Update BUILD files with gazelle (#510)

---
 BUILD.bazel     | 5 +++++
 gcp/BUILD.bazel | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/BUILD.bazel b/BUILD.bazel
index 5c3d9c241c4c7..966431dc61ebc 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -10,6 +10,7 @@ go_library(
         "chunk_store.go",
         "dynamodb_table_client.go",
         "inmemory_storage_client.go",
+        "iterator.go",
         "memcache_client.go",
         "schema.go",
         "schema_config.go",
@@ -37,6 +38,7 @@ go_library(
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
         "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/local:go_default_library",
         "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
         "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
         "//vendor/github.com/weaveworks/common/errors:go_default_library",
@@ -56,6 +58,7 @@ go_test(
         "chunk_cache_test.go",
         "chunk_store_test.go",
         "chunk_test.go",
+        "iterator_test.go",
         "schema_config_test.go",
         "schema_test.go",
         "schema_util_test.go",
@@ -72,8 +75,10 @@ go_test(
         "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
         "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
+        "//vendor/github.com/pkg/errors:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/storage/local:go_default_library",
         "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
         "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
         "//vendor/github.com/stretchr/testify/assert:go_default_library",
diff --git a/gcp/BUILD.bazel b/gcp/BUILD.bazel
index 2a61e3f1b37cb..d600d8c60118d 100644
--- a/gcp/BUILD.bazel
+++ b/gcp/BUILD.bazel
@@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
 go_library(
     name = "go_default_library",
     srcs = [
+        "instrumentation.go",
         "storage_client.go",
         "table_client.go",
     ],
@@ -11,7 +12,14 @@ go_library(
         "//pkg/chunk:go_default_library",
         "//vendor/cloud.google.com/go/bigtable:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
+        "//vendor/github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc:go_default_library",
+        "//vendor/github.com/mwitkow/go-grpc-middleware:go_default_library",
+        "//vendor/github.com/opentracing/opentracing-go:go_default_library",
+        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
+        "//vendor/github.com/weaveworks/common/instrument:go_default_library",
         "//vendor/golang.org/x/net/context:go_default_library",
+        "//vendor/google.golang.org/api/option:go_default_library",
+        "//vendor/google.golang.org/grpc:go_default_library",
         "//vendor/google.golang.org/grpc/status:go_default_library",
     ],
 )

From b65bf2ebf964f2e964d15f8b8f8e654795fee442 Mon Sep 17 00:00:00 2001
From: Misha Brukman <mbrukman@google.com>
Date: Mon, 31 Jul 2017 04:03:09 -0400
Subject: [PATCH 027/660] Fix capitalization of "Bigtable". (#522)

---
 gcp/instrumentation.go | 4 ++--
 gcp/storage_client.go  | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 3211f0ef9f64d..81d978c856116 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -17,9 +17,9 @@ import (
 var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 	Namespace: "cortex",
 	Name:      "bigtable_request_duration_seconds",
-	Help:      "Time spent doing BigTable requests.",
+	Help:      "Time spent doing Bigtable requests.",
 
-	// BigTable latency seems to range from a few ms to a few sec and is
+	// Bigtable latency seems to range from a few ms to a few sec and is
 	// important.  So use 8 buckets from 128us to 2s.
 	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
 }, []string{"operation", "status_code"})
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index feaea9aafe79a..4c406ec26bfd3 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -24,8 +24,8 @@ type Config struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.project, "bigtable.project", "", "BigTable project ID.")
-	f.StringVar(&cfg.instance, "bigtable.instance", "", "BigTable instance ID.")
+	f.StringVar(&cfg.project, "bigtable.project", "", "Bigtable project ID.")
+	f.StringVar(&cfg.instance, "bigtable.instance", "", "Bigtable instance ID.")
 }
 
 // storageClient implements chunk.storageClient for GCP.
@@ -116,7 +116,7 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	}
 
 	return table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
-		// BigTable doesn't know when to stop, as we're reading "until the end of the
+		// Bigtable doesn't know when to stop, as we're reading "until the end of the
 		// row" in DynamoDB.  So we need to check the prefix of the row is still correct.
 		if !strings.HasPrefix(r.Key(), query.HashValue+separator) {
 			return false
@@ -125,7 +125,7 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
 }
 
-// bigtableReadBatch represents a batch of rows read from BigTable.  As the
+// bigtableReadBatch represents a batch of rows read from Bigtable.  As the
 // bigtable interface gives us rows one-by-one, a batch always only contains
 // a single row.
 type bigtableReadBatch bigtable.Row

From 46dc9f4ce28a55801fc1ed048852b188cfea7eea Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 2 Aug 2017 10:44:27 +0100
Subject: [PATCH 028/660] Fix TestLazySeriesIterator_RangeValues intermittent
 test failure (#525)

---
 iterator_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/iterator_test.go b/iterator_test.go
index 065a3829ccd7e..8267217c6e261 100644
--- a/iterator_test.go
+++ b/iterator_test.go
@@ -3,6 +3,7 @@ package chunk
 import (
 	"context"
 	"testing"
+	"time"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/storage/metric"
@@ -106,7 +107,7 @@ func TestLazySeriesIterator_RangeValues(t *testing.T) {
 		}{
 			{
 				iterator:        iterator,
-				interval:        metric.Interval{OldestInclusive: now, NewestInclusive: now},
+				interval:        metric.Interval{OldestInclusive: now.Add(-time.Minute), NewestInclusive: now.Add(time.Minute)},
 				expectedSamples: dummySamples,
 			},
 		} {

From b1bc2e505c1d8d1933d5aa75e90895725a15ab0e Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 2 Aug 2017 12:25:46 +0100
Subject: [PATCH 029/660] Fix AWS QueryPages backoff logic (#523)

---
 aws_storage_client.go      | 37 +++++++++++++++++++++++--------------
 aws_storage_client_test.go | 24 ++++++++++++++++++++----
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 8a0e7d53d8e6b..8620b449451f5 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -272,13 +272,32 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 	}
 
 	request := a.queryRequestFn(ctx, input)
-	backoff := minBackoff
 	pageCount := 0
 	defer func() {
 		dynamoQueryPagesCount.Observe(float64(pageCount))
 	}()
+
 	for page := request; page != nil; page = page.NextPage() {
 		pageCount++
+
+		response, err := a.queryPage(ctx, input, page)
+		if err != nil {
+			return err
+		}
+
+		if getNextPage := callback(response, !page.HasNextPage()); !getNextPage {
+			if err != nil {
+				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
+			}
+			return nil
+		}
+	}
+	return nil
+}
+
+func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
+	backoff := minBackoff
+	for i := 0; i < maxRetries; i++ {
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})
@@ -290,28 +309,18 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 
 		if err != nil {
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
-
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
 			}
-
-			return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, err)
+			return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
 		}
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		if getNextPage := callback(dynamoDBReadResponse(queryOutput.Items), !page.HasNextPage()); !getNextPage {
-			if err != nil {
-				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
-			}
-			return nil
-		}
-
-		backoff = minBackoff
+		return dynamoDBReadResponse(queryOutput.Items), nil
 	}
-
-	return nil
+	return nil, fmt.Errorf("QueryPage error: maxRetries exceeded for table %v", *input.TableName)
 }
 
 type dynamoDBRequest interface {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 7b8246d254cd6..004238fc29ced 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -605,9 +605,10 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 	}
 
 	tests := []struct {
-		name  string
-		query IndexQuery
-		want  []IndexEntry
+		name           string
+		query          IndexQuery
+		provisionedErr int
+		want           []IndexEntry
 	}{
 		{
 			"check HashValue only",
@@ -615,6 +616,7 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 				TableName: "table",
 				HashValue: "flip",
 			},
+			0,
 			[]IndexEntry{entries[5], entries[6], entries[7]},
 		},
 		{
@@ -624,6 +626,7 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 				HashValue:       "foo",
 				RangeValueStart: []byte("bar:2"),
 			},
+			0,
 			[]IndexEntry{entries[1], entries[2], entries[3], entries[4]},
 		},
 		{
@@ -633,6 +636,7 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 				HashValue:        "foo",
 				RangeValuePrefix: []byte("baz:"),
 			},
+			0,
 			[]IndexEntry{entries[3], entries[4]},
 		},
 		{
@@ -643,13 +647,25 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 				RangeValuePrefix: []byte("bar"),
 				ValueEqual:       []byte("20"),
 			},
+			0,
+			[]IndexEntry{entries[1]},
+		},
+		{
+			"check retry logic",
+			IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("bar"),
+				ValueEqual:       []byte("20"),
+			},
+			2,
 			[]IndexEntry{entries[1]},
 		},
 	}
 
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			dynamoDB := newMockDynamoDB(0, 0)
+			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
 			client := awsStorageClient{
 				DynamoDB:                dynamoDB,
 				queryRequestFn:          dynamoDB.queryRequest,

From 3052efcafdd6719eadfded53aa8328da024d5714 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 2 Aug 2017 12:40:10 +0100
Subject: [PATCH 030/660] Check numRetries for getDynamoDBChunks (#524)

* Add max retries to getDynamoDBChunks

* Error if query chunks fail all retries
---
 aws_storage_client.go      |  6 +++-
 aws_storage_client_test.go | 64 ++++++++++++++++++++++----------------
 2 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 8620b449451f5..355241ecd06bd 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -486,7 +486,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 	result := []Chunk{}
 	unprocessed := dynamoDBReadRequest{}
 	backoff, numRetries := minBackoff, 0
-	for outstanding.Len()+unprocessed.Len() > 0 {
+	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
 		requests := dynamoDBReadRequest{}
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
@@ -541,6 +541,10 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		backoff = minBackoff
 		numRetries = 0
 	}
+
+	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
+		return nil, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", numRetries, valuesLeft)
+	}
 	return result, nil
 }
 
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 004238fc29ced..46064183bace1 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -478,35 +478,45 @@ func TestAWSStorageClientChunks(t *testing.T) {
 		testStorageClientChunks(t, client)
 	})
 
-	t.Run("DynamoDB chunks", func(t *testing.T) {
-		dynamoDB := newMockDynamoDB(0, 0)
-		schemaConfig := SchemaConfig{
-			ChunkTables: periodicTableConfig{
-				From:   util.NewDayValue(model.Now()),
-				Period: 1 * time.Minute,
-				Prefix: "chunks",
-			},
-		}
-		tableManager, err := NewTableManager(
-			schemaConfig,
-			&dynamoTableClient{
-				DynamoDB: dynamoDB,
-			},
-		)
-		require.NoError(t, err)
-		err = tableManager.syncTables(context.Background())
-		require.NoError(t, err)
+	tests := []struct {
+		name           string
+		provisionedErr int
+	}{
+		{"DynamoDB chunks", 0},
+		{"DynamoDB chunks retry logic", 2},
+	}
 
-		client := awsStorageClient{
-			DynamoDB:                dynamoDB,
-			schemaCfg:               schemaConfig,
-			queryRequestFn:          dynamoDB.queryRequest,
-			batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-			batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-		}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
+			schemaConfig := SchemaConfig{
+				ChunkTables: periodicTableConfig{
+					From:   util.NewDayValue(model.Now()),
+					Period: 1 * time.Minute,
+					Prefix: "chunks",
+				},
+			}
+			tableManager, err := NewTableManager(
+				schemaConfig,
+				&dynamoTableClient{
+					DynamoDB: dynamoDB,
+				},
+			)
+			require.NoError(t, err)
+			err = tableManager.syncTables(context.Background())
+			require.NoError(t, err)
 
-		testStorageClientChunks(t, client)
-	})
+			client := awsStorageClient{
+				DynamoDB:                dynamoDB,
+				schemaCfg:               schemaConfig,
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+			}
+
+			testStorageClientChunks(t, client)
+		})
+	}
 }
 
 func testStorageClientChunks(t *testing.T, client StorageClient) {

From 19d9feb3109f243496bc69a5fb43c230da299ff4 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Tue, 8 Aug 2017 11:30:23 +0100
Subject: [PATCH 031/660] Create DynamoDB retry count metric (#533)

* Create dynamodb retry count metric

* Add zero bucket
---
 aws_storage_client.go | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 355241ecd06bd..6b5dc6cc61aaf 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -76,6 +76,12 @@ var (
 		// metric names.
 		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
 	})
+	dynamoQueryRetryCount = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_query_retry_count",
+		Help:      "Number of retries per DynamoDB operation.",
+		Buckets:   []float64{0, 1, 2, 3, 5, 10, 15, 20},
+	}, []string{"operation"})
 	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "s3_request_duration_seconds",
@@ -89,6 +95,7 @@ func init() {
 	prometheus.MustRegister(dynamoConsumedCapacity)
 	prometheus.MustRegister(dynamoFailures)
 	prometheus.MustRegister(dynamoQueryPagesCount)
+	prometheus.MustRegister(dynamoQueryRetryCount)
 	prometheus.MustRegister(s3RequestDuration)
 }
 
@@ -172,7 +179,12 @@ func (a awsStorageClient) NewWriteBatch() WriteBatch {
 func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
+
 	backoff, numRetries := minBackoff, 0
+	defer func() {
+		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(numRetries))
+	}()
+
 	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
 		reqs := dynamoDBWriteBatch{}
 		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
@@ -297,7 +309,12 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 
 func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
 	backoff := minBackoff
-	for i := 0; i < maxRetries; i++ {
+	numRetries := 0
+	defer func() {
+		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(numRetries))
+	}()
+
+	for ; numRetries < maxRetries; numRetries++ {
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})
@@ -486,6 +503,10 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 	result := []Chunk{}
 	unprocessed := dynamoDBReadRequest{}
 	backoff, numRetries := minBackoff, 0
+	defer func() {
+		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(numRetries))
+	}()
+
 	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
 		requests := dynamoDBReadRequest{}
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)

From 66a1828e82f116d362f72d19c1a56961a35f94ce Mon Sep 17 00:00:00 2001
From: Robin <meccanorak@users.noreply.github.com>
Date: Wed, 9 Aug 2017 13:52:18 +0100
Subject: [PATCH 032/660] Quietly truncate queries ranging into future (#532)

To fix #472
Grafana queries for current month/year would mostly fail due to
weekly tables not existing into future.
Fix is to detect end of time range is future and quietly change to now
Additionally if begin of time-range is future, then immediately return empty resultset
---
 chunk_store.go      | 13 +++++++++++++
 chunk_store_test.go | 26 ++++++++++++++++++++++----
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index caea15ad35810..b04f219315106 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -155,6 +155,19 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
 
+	now := model.Now()
+	if from.After(now) {
+		// time-span start is in future ... regard as legal
+		util.WithContext(ctx).Debugf("Whole timerange %v..%v in future (now=%v) yield empty resultset", through, from, now)
+		return []local.SeriesIterator{}, nil
+	}
+
+	if through.After(now) {
+		// time-span end is in future ... regard as legal
+		util.WithContext(ctx).Debugf("Adjusting end timerange=%v from future to now=%v", through, now)
+		through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
+	}
+
 	// Fetch metric name chunks if the matcher is of type equal,
 	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(allMatchers)
 	if ok && metricNameMatcher.Type == metric.Equal {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 2167da1cb35b6..10ecc662b1b88 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -165,12 +165,30 @@ func TestChunkStore_Get_concrete(t *testing.T) {
 					t.Fatal(err)
 				}
 
-				iterators, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
+				// Query with ordinary time-range
+				iterators1, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
 				require.NoError(t, err)
 
-				sort.Sort(ByFingerprint(iterators))
-				if !reflect.DeepEqual(tc.expect, iterators) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators))
+				sort.Sort(ByFingerprint(iterators1))
+				if !reflect.DeepEqual(tc.expect, iterators1) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators1))
+				}
+
+				// Pushing end of time-range into future should yield exact same resultset
+				iterators2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), tc.matchers...)
+				require.NoError(t, err)
+
+				sort.Sort(ByFingerprint(iterators2))
+				if !reflect.DeepEqual(tc.expect, iterators2) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators2))
+				}
+
+				// Query with both begin & end of time-range in future should yield empty resultset
+				iterators3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.matchers...)
+				require.NoError(t, err)
+				if len(iterators3) != 0 {
+					t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
+						tc.query, len(iterators3), iterators3)
 				}
 			})
 		}

From 7ebcbffe4d363276f43f5695b2fa44f12c321248 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 9 Aug 2017 16:45:28 +0100
Subject: [PATCH 033/660] Use custom http.Client for AWS (#537)

---
 aws_storage_client.go | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 6b5dc6cc61aaf..2ec0bab544239 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -6,6 +6,8 @@ import (
 	"fmt"
 	"io/ioutil"
 	"math/rand"
+	"net"
+	"net/http"
 	"net/url"
 	"strings"
 	"time"
@@ -830,7 +832,25 @@ func awsConfigFromURL(awsURL *url.URL) (*aws.Config, error) {
 	creds := credentials.NewStaticCredentials(awsURL.User.Username(), password, "")
 	config := aws.NewConfig().
 		WithCredentials(creds).
-		WithMaxRetries(0) // We do our own retries, so we can monitor them
+		WithMaxRetries(0). // We do our own retries, so we can monitor them
+		// Use a custom http.Client with the golang defaults but also specifying
+		// MaxIdleConnsPerHost because of a bug in golang https://github.com/golang/go/issues/13801
+		// where MaxIdleConnsPerHost does not work as expected.
+		WithHTTPClient(&http.Client{
+			Transport: &http.Transport{
+				Proxy: http.ProxyFromEnvironment,
+				DialContext: (&net.Dialer{
+					Timeout:   30 * time.Second,
+					KeepAlive: 30 * time.Second,
+					DualStack: true,
+				}).DialContext,
+				MaxIdleConns:          100,
+				IdleConnTimeout:       90 * time.Second,
+				MaxIdleConnsPerHost:   100,
+				TLSHandshakeTimeout:   3 * time.Second,
+				ExpectContinueTimeout: 1 * time.Second,
+			},
+		})
 	if strings.Contains(awsURL.Host, ".") {
 		return config.WithEndpoint(fmt.Sprintf("http://%s", awsURL.Host)).WithRegion("dummy"), nil
 	}

From 10245c7d8d61ab25291f5e1d80261dcc5cfeacf9 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Mon, 14 Aug 2017 11:39:43 +0100
Subject: [PATCH 034/660] Enable write autoscaling for active DynamoDB tables
 (#507)

* Enable autoscaling for dynamodb tables

* Add aws autoscaling via dep ensure

* Generate bazel BUILD files

* Update aws and generate bazel BUILD files

* Fix zipkin-go-opentracing BUILD files

* Tidy up

* Fix tests

* Only enable auto scale if passed ApplicationAutoScale url

* Add tests

* Update comment

* Refactor enable/disable branch logic

* Fix typos

* Use autoScalingConfig struct

* Simplify cfg copy

* Fix typo
---
 BUILD.bazel              |   2 +
 aws_storage_client.go    |  18 +-
 dynamodb_table_client.go | 197 ++++++++++++++++++++-
 schema_config.go         |  50 ++++--
 table_client.go          |   8 +-
 table_manager.go         |   5 +
 table_manager_test.go    | 373 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 635 insertions(+), 18 deletions(-)

diff --git a/BUILD.bazel b/BUILD.bazel
index 966431dc61ebc..cf8444a459ea0 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -27,6 +27,8 @@ go_library(
         "//vendor/github.com/aws/aws-sdk-go/aws/credentials:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
diff --git a/aws_storage_client.go b/aws_storage_client.go
index 2ec0bab544239..f6346fdce01b5 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -14,6 +14,7 @@ import (
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/aws/client"
 	"github.com/aws/aws-sdk-go/aws/credentials"
 	"github.com/aws/aws-sdk-go/aws/request"
 	"github.com/aws/aws-sdk-go/aws/session"
@@ -103,8 +104,9 @@ func init() {
 
 // DynamoDBConfig specifies config for a DynamoDB database.
 type DynamoDBConfig struct {
-	DynamoDB util.URLValue
-	APILimit float64
+	DynamoDB               util.URLValue
+	APILimit               float64
+	ApplicationAutoScaling util.URLValue
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -112,6 +114,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
+	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
 }
 
 // AWSStorageConfig specifies config for storing data on AWS.
@@ -806,6 +809,15 @@ func recordDynamoError(tableName string, err error, operation string) {
 
 // dynamoClientFromURL creates a new DynamoDB client from a URL.
 func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
+	dynamoDBSession, err := awsSessionFromURL(awsURL)
+	if err != nil {
+		return nil, err
+	}
+	return dynamodb.New(dynamoDBSession), nil
+}
+
+// awsSessionFromURL creates a new aws session from a URL.
+func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 	if awsURL == nil {
 		return nil, fmt.Errorf("no URL specified for DynamoDB")
 	}
@@ -817,7 +829,7 @@ func dynamoClientFromURL(awsURL *url.URL) (dynamodbiface.DynamoDBAPI, error) {
 	if err != nil {
 		return nil, err
 	}
-	return dynamodb.New(session.New(config)), nil
+	return session.New(config), nil
 }
 
 // awsConfigFromURL returns AWS config from given URL. It expects escaped AWS Access key ID & Secret Access Key to be
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 86c7fda4613e8..c44300b7654ca 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -6,8 +6,11 @@ import (
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/net/context"
 	"golang.org/x/time/rate"
 
@@ -15,9 +18,24 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
+const (
+	autoScalingPolicyNamePrefix = "DynamoScalingPolicy_cortex_"
+)
+
+var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	Namespace: "cortex",
+	Name:      "application_autoscaling_request_duration_seconds",
+	Help:      "Time spent doing ApplicationAutoScaling requests.",
+
+	// AWS latency seems to range from a few ms to a few sec. So use 8 buckets
+	// from 128us to 2s. TODO: Confirm that this is the case for ApplicationAutoScaling.
+	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+}, []string{"operation", "status_code"})
+
 type dynamoTableClient struct {
-	DynamoDB dynamodbiface.DynamoDBAPI
-	limiter  *rate.Limiter
+	DynamoDB               dynamodbiface.DynamoDBAPI
+	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
+	limiter                *rate.Limiter
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
@@ -26,9 +44,20 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
 	if err != nil {
 		return nil, err
 	}
+
+	var applicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
+	if cfg.ApplicationAutoScaling.URL != nil {
+		session, err := awsSessionFromURL(cfg.ApplicationAutoScaling.URL)
+		if err != nil {
+			return nil, err
+		}
+		applicationAutoScaling = applicationautoscaling.New(session)
+	}
+
 	return dynamoTableClient{
-		DynamoDB: dynamoDB,
-		limiter:  rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
+		DynamoDB:               dynamoDB,
+		ApplicationAutoScaling: applicationAutoScaling,
+		limiter:                rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
 	}, nil
 }
 
@@ -111,6 +140,13 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) erro
 		return err
 	}
 
+	if desc.WriteScale.Enabled {
+		err := d.enableAutoScaling(ctx, desc)
+		if err != nil {
+			return err
+		}
+	}
+
 	tags := desc.Tags.AWSTags()
 	if len(tags) > 0 {
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
@@ -157,10 +193,71 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 			return err
 		})
 	})
+
+	if d.ApplicationAutoScaling != nil {
+		err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+				out, err := d.ApplicationAutoScaling.DescribeScalableTargetsWithContext(ctx, &applicationautoscaling.DescribeScalableTargetsInput{
+					ResourceIds:       []*string{aws.String("table/" + desc.Name)},
+					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+					ServiceNamespace:  aws.String("dynamodb"),
+				})
+				switch l := len(out.ScalableTargets); l {
+				case 0:
+					return err
+				case 1:
+					desc.WriteScale.Enabled = true
+					desc.WriteScale.RoleARN = *out.ScalableTargets[0].RoleARN
+					desc.WriteScale.MinCapacity = *out.ScalableTargets[0].MinCapacity
+					desc.WriteScale.MaxCapacity = *out.ScalableTargets[0].MaxCapacity
+					return err
+				default:
+					return fmt.Errorf("more than one scalable target found for DynamoDB table")
+				}
+			})
+		})
+
+		err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+				out, err := d.ApplicationAutoScaling.DescribeScalingPoliciesWithContext(ctx, &applicationautoscaling.DescribeScalingPoliciesInput{
+					PolicyNames:       []*string{aws.String(autoScalingPolicyNamePrefix + desc.Name)},
+					ResourceId:        aws.String("table/" + desc.Name),
+					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+					ServiceNamespace:  aws.String("dynamodb"),
+				})
+				switch l := len(out.ScalingPolicies); l {
+				case 0:
+					return err
+				case 1:
+					desc.WriteScale.InCooldown = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.ScaleInCooldown
+					desc.WriteScale.OutCooldown = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown
+					desc.WriteScale.TargetValue = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.TargetValue
+					return err
+				default:
+					return fmt.Errorf("more than one scaling policy found for DynamoDB table")
+				}
+			})
+		})
+	}
 	return
 }
 
 func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected TableDesc) error {
+	var err error
+	if !current.WriteScale.Enabled {
+		if expected.WriteScale.Enabled {
+			err = d.enableAutoScaling(ctx, expected)
+		}
+	} else {
+		if !expected.WriteScale.Enabled {
+			err = d.disableAutoScaling(ctx, expected)
+		} else if current.WriteScale != expected.WriteScale {
+			err = d.enableAutoScaling(ctx, expected)
+		}
+	}
+	if err != nil {
+		return err
+	}
 
 	if current.ProvisionedRead != expected.ProvisionedRead || current.ProvisionedWrite != expected.ProvisionedWrite {
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
@@ -208,3 +305,95 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected Ta
 	}
 	return nil
 }
+
+func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc TableDesc) error {
+	// Registers or updates a scalable target
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.RegisterScalableTargetInput{
+				MinCapacity:       aws.Int64(desc.WriteScale.MinCapacity),
+				MaxCapacity:       aws.Int64(desc.WriteScale.MaxCapacity),
+				ResourceId:        aws.String("table/" + desc.Name),
+				RoleARN:           aws.String(desc.WriteScale.RoleARN),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := d.ApplicationAutoScaling.RegisterScalableTarget(input)
+			if err != nil {
+				return err
+			}
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	// Puts or updates a scaling policy
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.PutScalingPolicyInput{
+				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
+				PolicyType:        aws.String("TargetTrackingScaling"),
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
+					PredefinedMetricSpecification: &applicationautoscaling.PredefinedMetricSpecification{
+						PredefinedMetricType: aws.String("DynamoDBWriteCapacityUtilization"),
+					},
+					ScaleInCooldown:  aws.Int64(desc.WriteScale.InCooldown),
+					ScaleOutCooldown: aws.Int64(desc.WriteScale.OutCooldown),
+					TargetValue:      aws.Float64(desc.WriteScale.TargetValue),
+				},
+			}
+			_, err := d.ApplicationAutoScaling.PutScalingPolicy(input)
+			if err != nil {
+				return err
+			}
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc TableDesc) error {
+	// Deregister scalable target
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.DeregisterScalableTargetInput{
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := d.ApplicationAutoScaling.DeregisterScalableTarget(input)
+			if err != nil {
+				return err
+			}
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	// Delete scaling policy
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.DeleteScalingPolicyInput{
+				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := d.ApplicationAutoScaling.DeleteScalingPolicy(input)
+			if err != nil {
+				return err
+			}
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/schema_config.go b/schema_config.go
index b14f6613e4e97..e110b7cf847e7 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -151,14 +151,18 @@ func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []
 }
 
 type periodicTableConfig struct {
-	From                       util.DayValue
-	Prefix                     string
-	Period                     time.Duration
+	From   util.DayValue
+	Prefix string
+	Period time.Duration
+	Tags   Tags
+
 	ProvisionedWriteThroughput int64
 	ProvisionedReadThroughput  int64
 	InactiveWriteThroughput    int64
 	InactiveReadThroughput     int64
-	Tags                       Tags
+
+	WriteScale autoScalingConfig
+
 	// Temporarily in place to support tags set on all tables, as means of
 	// smoothing transition to per-table tags.
 	globalTags *Tags
@@ -166,15 +170,37 @@ type periodicTableConfig struct {
 
 func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
 	f.Var(&cfg.From, argPrefix+".from", "Date after which to write chunks to DynamoDB.")
-	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period chunk tables.")
-	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB chunk tables period.")
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB chunk tables write throughput.")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB chunk tables read throughput.")
-	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB chunk tables write throughput for inactive tables.")
-	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB chunk tables read throughput for inactive tables.")
+	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
+	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
 
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
+	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
+	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
 	f.Var(&cfg.From, argPrefix+".start", fmt.Sprintf("Deprecated: use '%s.from'.", argPrefix))
+
+	cfg.WriteScale.RegisterFlags(argPrefix+".write-scale", f)
+}
+
+type autoScalingConfig struct {
+	Enabled     bool
+	RoleARN     string
+	MinCapacity int64
+	MaxCapacity int64
+	OutCooldown int64
+	InCooldown  int64
+	TargetValue float64
+}
+
+func (cfg *autoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	f.BoolVar(&cfg.Enabled, argPrefix+".enabled", false, "Should we enable autoscale for the table.")
+	f.StringVar(&cfg.RoleARN, argPrefix+".role-arn", "", "AWS AutoScaling role ARN")
+	f.Int64Var(&cfg.MinCapacity, argPrefix+".min-capacity", 3000, "DynamoDB minimum provision capacity.")
+	f.Int64Var(&cfg.MaxCapacity, argPrefix+".max-capacity", 6000, "DynamoDB maximum provision capacity.")
+	f.Int64Var(&cfg.OutCooldown, argPrefix+".out-cooldown", 3000, "DynamoDB minimum time between each autoscaling event that increases provision capacity.")
+	f.Int64Var(&cfg.InCooldown, argPrefix+".in-cooldown", 3000, "DynamoDB minimum time between each autoscaling event that decreases provision capacity.")
+	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
 func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {
@@ -209,6 +235,10 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
 			table.ProvisionedRead = cfg.ProvisionedReadThroughput
 			table.ProvisionedWrite = cfg.ProvisionedWriteThroughput
+
+			if cfg.WriteScale.Enabled {
+				table.WriteScale = cfg.WriteScale
+			}
 		}
 		result = append(result, table)
 	}
diff --git a/table_client.go b/table_client.go
index 1fafd536d93fd..6a1442b35c442 100644
--- a/table_client.go
+++ b/table_client.go
@@ -16,15 +16,21 @@ type TableDesc struct {
 	ProvisionedRead  int64
 	ProvisionedWrite int64
 	Tags             Tags
+	WriteScale       autoScalingConfig
 }
 
 // Equals returns true if other matches desc.
 func (desc TableDesc) Equals(other TableDesc) bool {
+	if desc.WriteScale != other.WriteScale {
+		return false
+	}
+
 	if desc.ProvisionedRead != other.ProvisionedRead {
 		return false
 	}
 
-	if desc.ProvisionedWrite != other.ProvisionedWrite {
+	// Only check provisioned write if auto scaling is disabled
+	if !desc.WriteScale.Enabled && desc.ProvisionedWrite != other.ProvisionedWrite {
 		return false
 	}
 
diff --git a/table_manager.go b/table_manager.go
index 186db5b00f61b..5cc146f174cad 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -182,6 +182,7 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 
 	if m.cfg.UsePeriodicTables {
 		// if we are before the switch to periodic table, we need to give this table write throughput
+
 		var (
 			tablePeriodSecs = int64(m.cfg.IndexTables.Period / time.Second)
 			gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
@@ -193,6 +194,10 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
 			legacyTable.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput
 			legacyTable.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput
+
+			if m.cfg.IndexTables.WriteScale.Enabled {
+				legacyTable.WriteScale = m.cfg.IndexTables.WriteScale
+			}
 		}
 	}
 	result = append(result, legacyTable)
diff --git a/table_manager_test.go b/table_manager_test.go
index 5e9459edf5f6b..aef98b8c384af 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -5,6 +5,10 @@ import (
 	"testing"
 	"time"
 
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/request"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
 	"github.com/prometheus/common/model"
 	"github.com/weaveworks/common/mtime"
 	"golang.org/x/net/context"
@@ -222,6 +226,375 @@ func TestTableManagerTags(t *testing.T) {
 	}
 }
 
+type mockApplicationAutoScalingClient struct {
+	applicationautoscalingiface.ApplicationAutoScalingAPI
+
+	scalableTargets map[string]mockScalableTarget
+	scalingPolicies map[string]mockScalingPolicy
+}
+
+type mockScalableTarget struct {
+	RoleARN     string
+	MinCapacity int64
+	MaxCapacity int64
+}
+
+type mockScalingPolicy struct {
+	ScaleInCooldown  int64
+	ScaleOutCooldown int64
+	TargetValue      float64
+}
+
+func newMockApplicationAutoScaling() *mockApplicationAutoScalingClient {
+	return &mockApplicationAutoScalingClient{
+		scalableTargets: map[string]mockScalableTarget{},
+		scalingPolicies: map[string]mockScalingPolicy{},
+	}
+}
+
+func (m *mockApplicationAutoScalingClient) RegisterScalableTarget(input *applicationautoscaling.RegisterScalableTargetInput) (*applicationautoscaling.RegisterScalableTargetOutput, error) {
+	m.scalableTargets[*input.ResourceId] = mockScalableTarget{
+		RoleARN:     *input.RoleARN,
+		MinCapacity: *input.MinCapacity,
+		MaxCapacity: *input.MaxCapacity,
+	}
+	return &applicationautoscaling.RegisterScalableTargetOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DeregisterScalableTarget(input *applicationautoscaling.DeregisterScalableTargetInput) (*applicationautoscaling.DeregisterScalableTargetOutput, error) {
+	delete(m.scalableTargets, *input.ResourceId)
+	return &applicationautoscaling.DeregisterScalableTargetOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DescribeScalableTargetsWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalableTargetsInput, options ...request.Option) (*applicationautoscaling.DescribeScalableTargetsOutput, error) {
+	scalableTarget, ok := m.scalableTargets[*input.ResourceIds[0]]
+	if !ok {
+		return &applicationautoscaling.DescribeScalableTargetsOutput{}, nil
+	}
+	return &applicationautoscaling.DescribeScalableTargetsOutput{
+		ScalableTargets: []*applicationautoscaling.ScalableTarget{
+			{
+				RoleARN:     aws.String(scalableTarget.RoleARN),
+				MinCapacity: aws.Int64(scalableTarget.MinCapacity),
+				MaxCapacity: aws.Int64(scalableTarget.MaxCapacity),
+			},
+		},
+	}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) PutScalingPolicy(input *applicationautoscaling.PutScalingPolicyInput) (*applicationautoscaling.PutScalingPolicyOutput, error) {
+	m.scalingPolicies[*input.ResourceId] = mockScalingPolicy{
+		ScaleInCooldown:  *input.TargetTrackingScalingPolicyConfiguration.ScaleInCooldown,
+		ScaleOutCooldown: *input.TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown,
+		TargetValue:      *input.TargetTrackingScalingPolicyConfiguration.TargetValue,
+	}
+	return &applicationautoscaling.PutScalingPolicyOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DeleteScalingPolicy(input *applicationautoscaling.DeleteScalingPolicyInput) (*applicationautoscaling.DeleteScalingPolicyOutput, error) {
+	delete(m.scalingPolicies, *input.ResourceId)
+	return &applicationautoscaling.DeleteScalingPolicyOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DescribeScalingPoliciesWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalingPoliciesInput, options ...request.Option) (*applicationautoscaling.DescribeScalingPoliciesOutput, error) {
+	scalingPolicy, ok := m.scalingPolicies[*input.ResourceId]
+	if !ok {
+		return &applicationautoscaling.DescribeScalingPoliciesOutput{}, nil
+	}
+	return &applicationautoscaling.DescribeScalingPoliciesOutput{
+		ScalingPolicies: []*applicationautoscaling.ScalingPolicy{
+			{
+				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
+					ScaleInCooldown:  aws.Int64(scalingPolicy.ScaleInCooldown),
+					ScaleOutCooldown: aws.Int64(scalingPolicy.ScaleOutCooldown),
+					TargetValue:      aws.Float64(scalingPolicy.TargetValue),
+				},
+			},
+		},
+	}, nil
+}
+
+func TestTableManagerAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	applicationAutoScaling := newMockApplicationAutoScaling()
+	client := dynamoTableClient{
+		DynamoDB:               dynamoDB,
+		ApplicationAutoScaling: applicationAutoScaling,
+	}
+
+	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.syncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			expectTables(ctx, t, client, expected)
+		})
+	}
+
+	cfg := SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: periodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			WriteScale: autoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+		},
+
+		ChunkTables: periodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			WriteScale: autoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+		},
+
+		CreationGracePeriod: gracePeriod,
+		MaxChunkAge:         maxChunkAge,
+	}
+
+	// Check tables are created with autoscale
+	{
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Create tables",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are updated with new settings
+	{
+		cfg.IndexTables.WriteScale.OutCooldown = 200
+		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 200,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 90.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are degristered when autoscaling is disabled for inactive tables
+	{
+		cfg.IndexTables.WriteScale.OutCooldown = 200
+		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 200,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 90.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are degristered when autoscaling is disabled entirely
+	{
+		cfg.IndexTables.WriteScale.Enabled = false
+		cfg.ChunkTables.WriteScale.Enabled = false
+
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: autoScalingConfig{
+						Enabled: false,
+					},
+				},
+			},
+		)
+	}
+}
+
 func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []TableDesc) {
 	tables, err := dynamo.ListTables(ctx)
 	if err != nil {

From 0c65d6f3ca0a1463211094cff13cd5fdd608d545 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Mon, 14 Aug 2017 16:44:51 +0100
Subject: [PATCH 035/660] Rename write throughput autoscale flags (#542)

---
 schema_config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index e110b7cf847e7..758f1b9b286b2 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -180,7 +180,7 @@ func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
 	f.Var(&cfg.From, argPrefix+".start", fmt.Sprintf("Deprecated: use '%s.from'.", argPrefix))
 
-	cfg.WriteScale.RegisterFlags(argPrefix+".write-scale", f)
+	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
 }
 
 type autoScalingConfig struct {

From ed1bf2054d33fddcc396fd58f995318c4368fc73 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 22 Aug 2017 12:13:16 +0100
Subject: [PATCH 036/660] Fetch chunks from bigtable in batches of 100 (#548)

* Extend gRPC stream instrumentation to catch more error cases.

* Don't forget to register memcache dropped writes counter.

* Fetch chunks for Bigtable in batches of 100, in parallel.  Also, instrument errors from Bigtable with stack traces.
---
 chunk_cache.go         |  1 +
 gcp/instrumentation.go | 24 ++++++++++++
 gcp/storage_client.go  | 83 +++++++++++++++++++++++++++++-------------
 3 files changed, 83 insertions(+), 25 deletions(-)

diff --git a/chunk_cache.go b/chunk_cache.go
index 87b7bae544695..47fed3c4b9d3d 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -52,6 +52,7 @@ func init() {
 	prometheus.MustRegister(memcacheRequests)
 	prometheus.MustRegister(memcacheHits)
 	prometheus.MustRegister(memcacheCorrupt)
+	prometheus.MustRegister(memcacheDroppedWriteBack)
 	prometheus.MustRegister(memcacheRequestDuration)
 }
 
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 81d978c856116..d6bf4e90a4883 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -12,6 +12,7 @@ import (
 	"golang.org/x/net/context"
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
+	"google.golang.org/grpc/metadata"
 )
 
 var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
@@ -71,6 +72,21 @@ type instrumentedClientStream struct {
 	grpc.ClientStream
 }
 
+func (s *instrumentedClientStream) SendMsg(m interface{}) error {
+	err := s.ClientStream.SendMsg(m)
+	if err == nil {
+		return err
+	}
+
+	if err == io.EOF {
+		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(nil)).Observe(time.Now().Sub(s.start).Seconds())
+	} else {
+		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
+	}
+
+	return err
+}
+
 func (s *instrumentedClientStream) RecvMsg(m interface{}) error {
 	err := s.ClientStream.RecvMsg(m)
 	if err == nil {
@@ -85,3 +101,11 @@ func (s *instrumentedClientStream) RecvMsg(m interface{}) error {
 
 	return err
 }
+
+func (s *instrumentedClientStream) Header() (metadata.MD, error) {
+	md, err := s.ClientStream.Header()
+	if err != nil {
+		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
+	}
+	return md, err
+}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 4c406ec26bfd3..836679e952f76 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -2,18 +2,22 @@ package gcp
 
 import (
 	"flag"
+	"fmt"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
 	"golang.org/x/net/context"
 
+	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
 	columnFamily = "f"
 	column       = "c"
 	separator    = "\000"
+	maxRowReads  = 100
 )
 
 // Config for a StorageClient
@@ -115,7 +119,7 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 		rowRange = bigtable.PrefixRange(query.HashValue + separator)
 	}
 
-	return table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
+	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		// Bigtable doesn't know when to stop, as we're reading "until the end of the
 		// row" in DynamoDB.  So we need to check the prefix of the row is still correct.
 		if !strings.HasPrefix(r.Key(), query.HashValue+separator) {
@@ -123,6 +127,10 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 		}
 		return callback(bigtableReadBatch(r), false)
 	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	return nil
 }
 
 // bigtableReadBatch represents a batch of rows read from Bigtable.  As the
@@ -189,38 +197,63 @@ func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 }
 
 func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
-	chunks := map[string][]chunk.Chunk{}
+	chunks := map[string]map[string]chunk.Chunk{}
 	keys := map[string]bigtable.RowList{}
-	for _, chunk := range input {
-		tableName := s.schemaCfg.ChunkTables.TableFor(chunk.From)
-		keys[tableName] = append(keys[tableName], chunk.ExternalKey())
-		chunks[tableName] = append(chunks[tableName], chunk)
+	for _, c := range input {
+		tableName := s.schemaCfg.ChunkTables.TableFor(c.From)
+		key := c.ExternalKey()
+		keys[tableName] = append(keys[tableName], key)
+		if _, ok := chunks[tableName]; !ok {
+			chunks[tableName] = map[string]chunk.Chunk{}
+		}
+		chunks[tableName][key] = c
 	}
 
-	output := make([]chunk.Chunk, 0, len(input))
+	outs := make(chan chunk.Chunk, len(input))
+	errs := make(chan error, len(input))
+
 	for tableName := range keys {
 		var (
-			i             = 0
-			processingErr error
-			table         = s.client.Open(tableName)
-			keys          = keys[tableName]
-			chunks        = chunks[tableName]
+			table  = s.client.Open(tableName)
+			keys   = keys[tableName]
+			chunks = chunks[tableName]
 		)
-		// rows are returned in order
-		if err := table.ReadRows(ctx, keys, func(row bigtable.Row) bool {
-			chunk := chunks[i]
-			i++
-			processingErr = chunk.Decode(row[columnFamily][0].Value)
-			if processingErr == nil {
-				output = append(output, chunk)
-			}
-			return processingErr == nil
-		}); err != nil {
-			return nil, err
+
+		for i := 0; i < len(keys); i += maxRowReads {
+			page := keys[i:util.Min(i+maxRowReads, len(keys))]
+			go func(page bigtable.RowList) {
+				// rows are returned in key order, not order in row list
+				if err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
+					chunk, ok := chunks[row.Key()]
+					if !ok {
+						errs <- fmt.Errorf("Got row for unknown chunk: %s", row.Key())
+						return false
+					}
+
+					err := chunk.Decode(row[columnFamily][0].Value)
+					if err != nil {
+						errs <- err
+						return false
+					}
+
+					outs <- chunk
+					return true
+				}); err != nil {
+					errs <- errors.WithStack(err)
+				}
+			}(page)
 		}
-		if processingErr != nil {
-			return nil, processingErr
+	}
+
+	output := make([]chunk.Chunk, 0, len(input))
+	for i := 0; i < len(input); i++ {
+		select {
+		case c := <-outs:
+			output = append(output, c)
+		case err := <-errs:
+			return nil, err
 		}
 	}
+
 	return output, nil
 }

From ec42e4cce685205a787038bbabdee8d4f9fda4dd Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 29 Aug 2017 10:39:25 +0100
Subject: [PATCH 037/660] Include more info when logging capacity (#550)

---
 table_manager.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 5cc146f174cad..01d6cc1bc5def 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -278,11 +278,11 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
 
 		if expected.Equals(current) {
-			log.Infof("  Provisioned throughput: read = %d, write = %d, skipping.", current.ProvisionedRead, current.ProvisionedWrite)
+			log.Infof("  Provisioned throughput on table %s: read = %d, write = %d, skipping.", current.Name, current.ProvisionedRead, current.ProvisionedWrite)
 			continue
 		}
 
-		log.Infof("  Updating provisioned throughput on table %s to read = %d, write = %d", expected.Name, expected.ProvisionedRead, expected.ProvisionedWrite)
+		log.Infof("  Updating provisioned throughput on table %s from read = %d, write = %d to read = %d, write = %d", expected.Name, current.ProvisionedRead, current.ProvisionedWrite, expected.ProvisionedRead, expected.ProvisionedWrite)
 		err = m.client.UpdateTable(ctx, current, expected)
 		if err != nil {
 			return err

From e37367b32a96f4cc29dacec117a6f8408bb2702f Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Wed, 30 Aug 2017 11:18:00 +0100
Subject: [PATCH 038/660] Update dynamo_query_retry_count bucket sizes (#549)

Use linear buckets as there are only 20 options. Include 21 so we can see how many hit 20 retries.
---
 aws_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index f6346fdce01b5..b37f64835300e 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -83,7 +83,7 @@ var (
 		Namespace: "cortex",
 		Name:      "dynamo_query_retry_count",
 		Help:      "Number of retries per DynamoDB operation.",
-		Buckets:   []float64{0, 1, 2, 3, 5, 10, 15, 20},
+		Buckets:   prometheus.LinearBuckets(0, 1, 21),
 	}, []string{"operation"})
 	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",

From 0de763140c503d7a286372d7e86ebc0ac5b33097 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 30 Aug 2017 15:10:57 +0100
Subject: [PATCH 039/660] Expose more information about retried DynamoDB errors
 (#552)

---
 aws_storage_client.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index b37f64835300e..1fbb1ea988e4c 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -319,8 +319,9 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(numRetries))
 	}()
 
+	var err error
 	for ; numRetries < maxRetries; numRetries++ {
-		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
+		err = instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})
 
@@ -332,6 +333,9 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 		if err != nil {
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
+				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
+					log.Warnf("DynamoDB error retry=%d, table=%v, err=%v", numRetries, *input.TableName, err)
+				}
 				time.Sleep(backoff)
 				backoff = nextBackoff(backoff)
 				continue
@@ -342,7 +346,7 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
 		return dynamoDBReadResponse(queryOutput.Items), nil
 	}
-	return nil, fmt.Errorf("QueryPage error: maxRetries exceeded for table %v", *input.TableName)
+	return nil, fmt.Errorf("QueryPage error: maxRetries exceeded for table %v, last error %v", *input.TableName, err)
 }
 
 type dynamoDBRequest interface {

From 900936f875a96811fc0426da567a741e70a0b2d5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 6 Sep 2017 09:43:06 +0100
Subject: [PATCH 040/660] Clear error before (re-)sending request (#554)

This is important in the pageQuery() case, where we re-use the same
request each time round the loop.  However it is simplest and safer to
clear on every Send().
---
 aws_storage_client.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 1fbb1ea988e4c..514d6b290c865 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -393,6 +393,9 @@ func (a dynamoDBRequestAdapter) Data() interface{} {
 }
 
 func (a dynamoDBRequestAdapter) Send() error {
+	// Clear error in case we are retrying the same operation - if we
+	// don't do this then the same error will come back again immediately
+	a.request.Error = nil
 	return a.request.Send()
 }
 

From f95dbd15bb4f7c3ce4616bc48720d24800a79899 Mon Sep 17 00:00:00 2001
From: Aaron Kirkbride <aaron@aaronkirkbride.com>
Date: Mon, 11 Sep 2017 10:24:06 +0100
Subject: [PATCH 041/660] Autoscale last N inactive DynamoDB tables (#556)

---
 schema_config.go      |  11 +-
 table_manager_test.go | 245 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 255 insertions(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index 758f1b9b286b2..2aa736df58c71 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -161,7 +161,9 @@ type periodicTableConfig struct {
 	InactiveWriteThroughput    int64
 	InactiveReadThroughput     int64
 
-	WriteScale autoScalingConfig
+	WriteScale              autoScalingConfig
+	InactiveWriteScale      autoScalingConfig
+	InactiveWriteScaleLastN int64
 
 	// Temporarily in place to support tags set on all tables, as means of
 	// smoothing transition to per-table tags.
@@ -181,6 +183,8 @@ func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.Var(&cfg.From, argPrefix+".start", fmt.Sprintf("Deprecated: use '%s.from'.", argPrefix))
 
 	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
+	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
+	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
 }
 
 type autoScalingConfig struct {
@@ -231,6 +235,11 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 			Tags:             cfg.GetTags(),
 		}
 
+		// Autoscale last N tables (excluding lastTable which is active).
+		if cfg.InactiveWriteScale.Enabled && i >= (lastTable-cfg.InactiveWriteScaleLastN) && i < lastTable {
+			table.WriteScale = cfg.InactiveWriteScale
+		}
+
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
 			table.ProvisionedRead = cfg.ProvisionedReadThroughput
diff --git a/table_manager_test.go b/table_manager_test.go
index aef98b8c384af..46d8113d3830c 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -595,6 +595,251 @@ func TestTableManagerAutoScaling(t *testing.T) {
 	}
 }
 
+func TestTableManagerInactiveAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	applicationAutoScaling := newMockApplicationAutoScaling()
+	client := dynamoTableClient{
+		DynamoDB:               dynamoDB,
+		ApplicationAutoScaling: applicationAutoScaling,
+	}
+
+	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.syncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			expectTables(ctx, t, client, expected)
+		})
+	}
+
+	cfg := SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: periodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale: autoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+			InactiveWriteScaleLastN: 2,
+		},
+
+		ChunkTables: periodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale: autoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+			InactiveWriteScaleLastN: 2,
+		},
+
+		CreationGracePeriod: gracePeriod,
+		MaxChunkAge:         maxChunkAge,
+	}
+
+	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
+	{
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Legacy and latest tables",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+
+	// Check inactive tables are autoscaled even if there are less than the limit.
+	{
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"1 week of inactive tables with latest",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+
+	// Check inactive tables past the limit do not autoscale but the latest N do.
+	{
+		tableManager, err := NewTableManager(cfg, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"3 weeks of inactive tables with latest",
+			time.Unix(0, 0).Add(tablePeriod*3).Add(maxChunkAge).Add(gracePeriod),
+			[]TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "2",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "2",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: autoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "3",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "3",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+}
+
 func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []TableDesc) {
 	tables, err := dynamo.ListTables(ctx)
 	if err != nil {

From c33b26f9b98b61579e79e83705777ca2a48eac7f Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Mon, 11 Sep 2017 15:16:59 +0100
Subject: [PATCH 042/660] Support Bazel 0.5.4 (#557)

* No-op update-deps

* Update versions of gazelle, bazel, etc.

Also add protos as deps in Makefile

* Update gazelle

Manually deleted vendor/golang.org/x/net/context/BUILD.bazel
Not sure why I had to

* Minor preparatory updates (#558)

Add gazelle rule and use them from makefile
Update rules_go to a much newer version needed for photo rules, and use an http archive rule with a commit id instead of a git rule with a tag.
---
 BUILD.bazel         | 5 +++++
 gcp/BUILD.bazel     | 4 ++++
 storage/BUILD.bazel | 1 +
 3 files changed, 10 insertions(+)

diff --git a/BUILD.bazel b/BUILD.bazel
index cf8444a459ea0..f0c5ea8b1ba93 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -19,11 +19,13 @@ go_library(
         "table_client.go",
         "table_manager.go",
     ],
+    importpath = "github.com/weaveworks/cortex/pkg/chunk",
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/util:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/aws/client:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/credentials:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
@@ -66,12 +68,15 @@ go_test(
         "schema_util_test.go",
         "table_manager_test.go",
     ],
+    importpath = "github.com/weaveworks/cortex/pkg/chunk",
     library = ":go_default_library",
     deps = [
         "//pkg/util:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling:go_default_library",
+        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
diff --git a/gcp/BUILD.bazel b/gcp/BUILD.bazel
index d600d8c60118d..660273a31467c 100644
--- a/gcp/BUILD.bazel
+++ b/gcp/BUILD.bazel
@@ -7,19 +7,23 @@ go_library(
         "storage_client.go",
         "table_client.go",
     ],
+    importpath = "github.com/weaveworks/cortex/pkg/chunk/gcp",
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/chunk:go_default_library",
+        "//pkg/util:go_default_library",
         "//vendor/cloud.google.com/go/bigtable:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
         "//vendor/github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc:go_default_library",
         "//vendor/github.com/mwitkow/go-grpc-middleware:go_default_library",
         "//vendor/github.com/opentracing/opentracing-go:go_default_library",
+        "//vendor/github.com/pkg/errors:go_default_library",
         "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
         "//vendor/github.com/weaveworks/common/instrument:go_default_library",
         "//vendor/golang.org/x/net/context:go_default_library",
         "//vendor/google.golang.org/api/option:go_default_library",
         "//vendor/google.golang.org/grpc:go_default_library",
+        "//vendor/google.golang.org/grpc/metadata:go_default_library",
         "//vendor/google.golang.org/grpc/status:go_default_library",
     ],
 )
diff --git a/storage/BUILD.bazel b/storage/BUILD.bazel
index 025cecfc0b847..d7ff7eb34ebd6 100644
--- a/storage/BUILD.bazel
+++ b/storage/BUILD.bazel
@@ -3,6 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
 go_library(
     name = "go_default_library",
     srcs = ["factory.go"],
+    importpath = "github.com/weaveworks/cortex/pkg/chunk/storage",
     visibility = ["//visibility:public"],
     deps = [
         "//pkg/chunk:go_default_library",

From 446937590c602a322774556cc71b19bedc265350 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 15 Sep 2017 17:14:05 +0100
Subject: [PATCH 043/660] Extract awsConfigFromURL function to
 weaveworks/common (#562)

* Update weaveworks/common to include aws package

* Move awsConfigFromURL() to weaveworks/common
---
 aws_storage_client.go      | 49 +++--------------------
 aws_storage_client_test.go | 82 --------------------------------------
 2 files changed, 5 insertions(+), 126 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 514d6b290c865..5a84fb0371e3e 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -6,8 +6,6 @@ import (
 	"fmt"
 	"io/ioutil"
 	"math/rand"
-	"net"
-	"net/http"
 	"net/url"
 	"strings"
 	"time"
@@ -15,7 +13,6 @@ import (
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/client"
-	"github.com/aws/aws-sdk-go/aws/credentials"
 	"github.com/aws/aws-sdk-go/aws/request"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
@@ -26,6 +23,7 @@ import (
 	"github.com/prometheus/common/log"
 	"golang.org/x/net/context"
 
+	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/cortex/pkg/util"
 )
@@ -156,10 +154,11 @@ func NewAWSStorageClient(cfg AWSStorageConfig, schemaCfg SchemaConfig) (StorageC
 	if cfg.S3.URL == nil {
 		return nil, fmt.Errorf("no URL specified for S3")
 	}
-	s3Config, err := awsConfigFromURL(cfg.S3.URL)
+	s3Config, err := awscommon.ConfigFromURL(cfg.S3.URL)
 	if err != nil {
 		return nil, err
 	}
+	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
 	s3Client := s3.New(session.New(s3Config))
 	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
 
@@ -832,48 +831,10 @@ func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 	if len(path) > 0 {
 		log.Warnf("Ignoring DynamoDB URL path: %v.", path)
 	}
-	config, err := awsConfigFromURL(awsURL)
+	config, err := awscommon.ConfigFromURL(awsURL)
 	if err != nil {
 		return nil, err
 	}
+	config = config.WithMaxRetries(0) // We do our own retries, so we can monitor them
 	return session.New(config), nil
 }
-
-// awsConfigFromURL returns AWS config from given URL. It expects escaped AWS Access key ID & Secret Access Key to be
-// encoded in the URL. It also expects region specified as a host (letting AWS generate full endpoint) or fully valid
-// endpoint with dummy region assumed (e.g for URLs to emulated services).
-func awsConfigFromURL(awsURL *url.URL) (*aws.Config, error) {
-	if awsURL.User == nil {
-		return nil, fmt.Errorf("must specify escaped Access Key & Secret Access in URL")
-	}
-
-	password, _ := awsURL.User.Password()
-	creds := credentials.NewStaticCredentials(awsURL.User.Username(), password, "")
-	config := aws.NewConfig().
-		WithCredentials(creds).
-		WithMaxRetries(0). // We do our own retries, so we can monitor them
-		// Use a custom http.Client with the golang defaults but also specifying
-		// MaxIdleConnsPerHost because of a bug in golang https://github.com/golang/go/issues/13801
-		// where MaxIdleConnsPerHost does not work as expected.
-		WithHTTPClient(&http.Client{
-			Transport: &http.Transport{
-				Proxy: http.ProxyFromEnvironment,
-				DialContext: (&net.Dialer{
-					Timeout:   30 * time.Second,
-					KeepAlive: 30 * time.Second,
-					DualStack: true,
-				}).DialContext,
-				MaxIdleConns:          100,
-				IdleConnTimeout:       90 * time.Second,
-				MaxIdleConnsPerHost:   100,
-				TLSHandshakeTimeout:   3 * time.Second,
-				ExpectContinueTimeout: 1 * time.Second,
-			},
-		})
-	if strings.Contains(awsURL.Host, ".") {
-		return config.WithEndpoint(fmt.Sprintf("http://%s", awsURL.Host)).WithRegion("dummy"), nil
-	}
-
-	// Let AWS generate default endpoint based on region passed as a host in URL.
-	return config.WithRegion(awsURL.Host), nil
-}
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 46064183bace1..6c04892934612 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"io/ioutil"
 	"math/rand"
-	"net/url"
 	"sort"
 	"strconv"
 	"strings"
@@ -22,7 +21,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"golang.org/x/net/context"
 
@@ -709,83 +707,3 @@ func TestAWSStorageClientQueryPages(t *testing.T) {
 		})
 	}
 }
-
-func TestAWSConfigFromURL(t *testing.T) {
-	for _, tc := range []struct {
-		url            string
-		expectedKey    string
-		expectedSecret string
-		expectedRegion string
-		expectedEp     string
-
-		expectedNotSpecifiedUserErr bool
-	}{
-		{
-			"s3://abc:123@s3.default.svc.cluster.local:4569",
-			"abc",
-			"123",
-			"dummy",
-			"http://s3.default.svc.cluster.local:4569",
-			false,
-		},
-		{
-			"dynamodb://user:pass@dynamodb.default.svc.cluster.local:8000/cortex",
-			"user",
-			"pass",
-			"dummy",
-			"http://dynamodb.default.svc.cluster.local:8000",
-			false,
-		},
-		{
-			// Not escaped password.
-			"s3://abc:123/@s3.default.svc.cluster.local:4569",
-			"",
-			"",
-			"",
-			"",
-			true,
-		},
-		{
-			// Not escaped username.
-			"s3://abc/:123@s3.default.svc.cluster.local:4569",
-			"",
-			"",
-			"",
-			"",
-			true,
-		},
-		{
-			"s3://keyWithEscapedSlashAtTheEnd%2F:%24%2C%26%2C%2B%2C%27%2C%2F%2C%3A%2C%3B%2C%3D%2C%3F%2C%40@eu-west-2/bucket1",
-			"keyWithEscapedSlashAtTheEnd/",
-			"$,&,+,',/,:,;,=,?,@",
-			"eu-west-2",
-			"",
-			false,
-		},
-	} {
-		parsedURL, err := url.Parse(tc.url)
-		require.NoError(t, err)
-
-		cfg, err := awsConfigFromURL(parsedURL)
-		if tc.expectedNotSpecifiedUserErr {
-			require.Error(t, err)
-			continue
-		}
-		require.NoError(t, err)
-
-		require.NotNil(t, cfg.Credentials)
-		val, err := cfg.Credentials.Get()
-		require.NoError(t, err)
-
-		assert.Equal(t, tc.expectedKey, val.AccessKeyID)
-		assert.Equal(t, tc.expectedSecret, val.SecretAccessKey)
-
-		require.NotNil(t, cfg.Region)
-		assert.Equal(t, tc.expectedRegion, *cfg.Region)
-
-		if tc.expectedEp != "" {
-			require.NotNil(t, cfg.Endpoint)
-			assert.Equal(t, tc.expectedEp, *cfg.Endpoint)
-		}
-	}
-}

From cb2ecad4da7b0b853d220ab9d3c2923d84ad4f01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marc=20Carr=C3=A9?= <marccarre@users.noreply.github.com>
Date: Fri, 6 Oct 2017 11:50:41 +0100
Subject: [PATCH 044/660] Remove redundant nil checks. (#575)

In order to please our linter, which was throwing errors like:
```
./pkg/chunk/dynamodb_table_client.go:332:2: redundant if ...; err != nil check, just return error instead.
./pkg/chunk/dynamodb_table_client.go:381:2: redundant if ...; err != nil check, just return error instead.
```
---
 dynamodb_table_client.go | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index c44300b7654ca..cad7b0a3b155e 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -329,7 +329,7 @@ func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc TableDesc
 	}
 
 	// Puts or updates a scaling policy
-	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
 			input := &applicationautoscaling.PutScalingPolicyInput{
 				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
@@ -347,15 +347,9 @@ func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc TableDesc
 				},
 			}
 			_, err := d.ApplicationAutoScaling.PutScalingPolicy(input)
-			if err != nil {
-				return err
-			}
-			return nil
+			return err
 		})
-	}); err != nil {
-		return err
-	}
-	return nil
+	})
 }
 
 func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc TableDesc) error {
@@ -368,17 +362,14 @@ func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc TableDes
 				ServiceNamespace:  aws.String("dynamodb"),
 			}
 			_, err := d.ApplicationAutoScaling.DeregisterScalableTarget(input)
-			if err != nil {
-				return err
-			}
-			return nil
+			return err
 		})
 	}); err != nil {
 		return err
 	}
 
 	// Delete scaling policy
-	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
 			input := &applicationautoscaling.DeleteScalingPolicyInput{
 				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
@@ -387,13 +378,7 @@ func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc TableDes
 				ServiceNamespace:  aws.String("dynamodb"),
 			}
 			_, err := d.ApplicationAutoScaling.DeleteScalingPolicy(input)
-			if err != nil {
-				return err
-			}
-			return nil
+			return err
 		})
-	}); err != nil {
-		return err
-	}
-	return nil
+	})
 }

From 5a41357579ba03894ff44d673bc9ccb234e9afa7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 13 Oct 2017 19:00:15 +0100
Subject: [PATCH 045/660] Use longer period in test to make troubleshooting
 easier (#574)

The test creates tables for every period from the beginning of the day
to now; with a 1 min period that means between 0 and 1440 tables.
Changing to 10 min cuts the number of tables hence number of log lines,
etc., and makes finding issues much easier.
---
 aws_storage_client_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 6c04892934612..28d9a3a457867 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -490,7 +490,7 @@ func TestAWSStorageClientChunks(t *testing.T) {
 			schemaConfig := SchemaConfig{
 				ChunkTables: periodicTableConfig{
 					From:   util.NewDayValue(model.Now()),
-					Period: 1 * time.Minute,
+					Period: 10 * time.Minute,
 					Prefix: "chunks",
 				},
 			}

From 5e4db1d4120ae024ccd5cb320512f2179b553229 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Mon, 30 Oct 2017 14:05:11 +0000
Subject: [PATCH 046/660] Update bigtable latency histogram buckets to reflect
 reality. (#588)

* Update bigtable latency histogram buckets to reflect reality.

* Update comment to correct number of buckets

8 -> 6
---
 gcp/instrumentation.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index d6bf4e90a4883..abe189c5ff5c8 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -20,9 +20,9 @@ var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpt
 	Name:      "bigtable_request_duration_seconds",
 	Help:      "Time spent doing Bigtable requests.",
 
-	// Bigtable latency seems to range from a few ms to a few sec and is
-	// important.  So use 8 buckets from 128us to 2s.
-	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+	// Bigtable latency seems to range from a few ms to a few hundred ms and is
+	// important.  So use 6 buckets from 1ms to 1s.
+	Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
 }, []string{"operation", "status_code"})
 
 func init() {

From cca58270fa72292c85998f2946c927957e7d8a9c Mon Sep 17 00:00:00 2001
From: Julius Volz <julius.volz@gmail.com>
Date: Mon, 13 Nov 2017 17:10:10 +0100
Subject: [PATCH 047/660] Port Cortex to use Prometheus 2 packages (#583)

* Vendor update for Prometheus 2 port

* Hard-fork still-required Prometheus 1 storage packages

* Update build image to latest Go version

* Add lint/vat exceptions

The updated `go vet` complains about more errors than the old one.

* Re-add flag for configuring log level ourselves

The upstream prometheus/common/log changed and doesn't automatically
register flags anymore.

* Adjust multitenant Alertmanager to new upstream packages

* Implement new querier wrappers for Prom 2 packages

* Other adjustments to new packages and types

* Remove now-obsolete MergeSeriesIterator

* Remove now-obsolete LazySeriesIterator

The laziness was actually never used.

* Wire up new API, querier, etc. in main functions

* Update/fix `go test` -tags separation syntax

* Remove unused metadataSeriesSet type

* Re-add Bazel build files

* Adjust comment about (now-removed) lazy iterators

* Pass correct metric name into getMetricNameMatrix()

* Pass in go-kit logger to alerting rule creation

* Fix ErrNoMetricNameNotSupported message

Only the v8 schema supports omitting the metric name in a query.

* Expand and improve chunk store tests and fix uncovered bugs

- Fix over-selection bug when the metric name was non-equals-matched and
  one selects a series that has a subset of labels of another.
- Regex matches against the metric name were broken because the metric
  name was equals-compared to the matcher regex instead of applying it as
  a regex.
- Unneeded sorting of label matchers has been removed.

* Add back -log.level flag to ingester as well

* Ensure sorting of series labels during creation

* Add test for querier label sorting

* Remove SchemaConfig dependency from ingester

The ingester only needs the MaxChunkAge from the SchemaConfig, and the
ingester.Config is actually a better place to store and configure that
information authoritatively.

* Resolve build problems after master rebase

* Reparse AM fallback config every time so we can mutate it

The deep copy that dumped it as JSON and loaded it again stumbled over
the fact that the JSON marshaling renders Secret fields as <secret> and
thus loses the original secret field contents when reloading it from
JSON.

* Fix flaky chunk test
---
 BUILD.bazel                |  15 +-
 aws_storage_client.go      |   2 +-
 aws_storage_client_test.go |   2 +-
 chunk.go                   |  14 +-
 chunk_cache.go             |   2 +-
 chunk_cache_test.go        |   2 +-
 chunk_store.go             |  76 +++++---
 chunk_store_test.go        | 374 ++++++++++++++-----------------------
 chunk_test.go              |  35 ++--
 dynamodb_table_client.go   |   2 +-
 gcp/storage_client.go      |   2 +-
 gcp/table_client.go        |   2 +-
 inmemory_storage_client.go |   2 +-
 iterator.go                | 123 ------------
 iterator_test.go           | 122 ------------
 schema.go                  |   2 +-
 schema_config.go           |   1 -
 storage/BUILD.bazel        |   1 -
 storage/factory.go         |   2 +-
 storage_client.go          |   2 +-
 table_client.go            |   2 +-
 table_manager.go           |   2 +-
 22 files changed, 234 insertions(+), 553 deletions(-)
 delete mode 100644 iterator.go
 delete mode 100644 iterator_test.go

diff --git a/BUILD.bazel b/BUILD.bazel
index f0c5ea8b1ba93..d0b578ded05f9 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -10,7 +10,6 @@ go_library(
         "chunk_store.go",
         "dynamodb_table_client.go",
         "inmemory_storage_client.go",
-        "iterator.go",
         "memcache_client.go",
         "schema.go",
         "schema_config.go",
@@ -22,11 +21,11 @@ go_library(
     importpath = "github.com/weaveworks/cortex/pkg/chunk",
     visibility = ["//visibility:public"],
     deps = [
+        "//pkg/prom1/storage/local/chunk:go_default_library",
         "//pkg/util:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/client:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/credentials:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling:go_default_library",
@@ -41,15 +40,13 @@ go_library(
         "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
         "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/local:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
+        "//vendor/github.com/weaveworks/common/aws:go_default_library",
         "//vendor/github.com/weaveworks/common/errors:go_default_library",
         "//vendor/github.com/weaveworks/common/instrument:go_default_library",
         "//vendor/github.com/weaveworks/common/mtime:go_default_library",
         "//vendor/github.com/weaveworks/common/user:go_default_library",
-        "//vendor/golang.org/x/net/context:go_default_library",
         "//vendor/golang.org/x/time/rate:go_default_library",
     ],
 )
@@ -62,7 +59,6 @@ go_test(
         "chunk_cache_test.go",
         "chunk_store_test.go",
         "chunk_test.go",
-        "iterator_test.go",
         "schema_config_test.go",
         "schema_test.go",
         "schema_util_test.go",
@@ -71,6 +67,7 @@ go_test(
     importpath = "github.com/weaveworks/cortex/pkg/chunk",
     library = ":go_default_library",
     deps = [
+        "//pkg/prom1/storage/local/chunk:go_default_library",
         "//pkg/util:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
@@ -85,9 +82,7 @@ go_test(
         "//vendor/github.com/pkg/errors:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/local:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/local/chunk:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/storage/metric:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
         "//vendor/github.com/stretchr/testify/assert:go_default_library",
         "//vendor/github.com/stretchr/testify/require:go_default_library",
         "//vendor/github.com/weaveworks/common/mtime:go_default_library",
diff --git a/aws_storage_client.go b/aws_storage_client.go
index 5a84fb0371e3e..926c4862edf31 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"bytes"
+	"context"
 	"flag"
 	"fmt"
 	"io/ioutil"
@@ -21,7 +22,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
-	"golang.org/x/net/context"
 
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 28d9a3a457867..76cee508660a0 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"bytes"
+	"context"
 	"fmt"
 	"io/ioutil"
 	"math/rand"
@@ -22,7 +23,6 @@ import (
 	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/cortex/pkg/util"
 )
diff --git a/chunk.go b/chunk.go
index d4cc6f5267611..d4a411dc95fee 100644
--- a/chunk.go
+++ b/chunk.go
@@ -13,8 +13,7 @@ import (
 	"github.com/golang/snappy"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/local"
-	prom_chunk "github.com/prometheus/prometheus/storage/local/chunk"
+	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 
 	errs "github.com/weaveworks/common/errors"
 	"github.com/weaveworks/cortex/pkg/util"
@@ -280,7 +279,7 @@ func (c *Chunk) Decode(input []byte) error {
 	})
 }
 
-func chunksToIterators(chunks []Chunk) ([]local.SeriesIterator, error) {
+func chunksToMatrix(chunks []Chunk) (model.Matrix, error) {
 	// Group chunks by series, sort and dedupe samples.
 	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
 	for _, c := range chunks {
@@ -301,12 +300,15 @@ func chunksToIterators(chunks []Chunk) ([]local.SeriesIterator, error) {
 		ss.Values = util.MergeSampleSets(ss.Values, samples)
 	}
 
-	iterators := make([]local.SeriesIterator, 0, len(sampleStreams))
+	matrix := make(model.Matrix, 0, len(sampleStreams))
 	for _, ss := range sampleStreams {
-		iterators = append(iterators, util.NewSampleStreamIterator(ss))
+		matrix = append(matrix, &model.SampleStream{
+			Metric: ss.Metric,
+			Values: ss.Values,
+		})
 	}
 
-	return iterators, nil
+	return matrix, nil
 }
 
 // Samples returns all SamplePairs for the chunk.
diff --git a/chunk_cache.go b/chunk_cache.go
index 47fed3c4b9d3d..70cb2dbe12403 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"flag"
 	"sync"
 	"time"
@@ -9,7 +10,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
 	"github.com/weaveworks/common/instrument"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/cortex/pkg/util"
 )
diff --git a/chunk_cache_test.go b/chunk_cache_test.go
index 3d9fa252185f6..09144ce5826e8 100644
--- a/chunk_cache_test.go
+++ b/chunk_cache_test.go
@@ -7,8 +7,8 @@ import (
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/local/chunk"
 	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 	"golang.org/x/net/context"
 )
 
diff --git a/chunk_store.go b/chunk_store.go
index b04f219315106..e77e27551138c 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"encoding/json"
 	"flag"
 	"fmt"
@@ -8,10 +9,8 @@ import (
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
-	"github.com/prometheus/prometheus/storage/local"
-	"github.com/prometheus/prometheus/storage/metric"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/util"
@@ -150,7 +149,7 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 }
 
 // Get implements ChunkStore
-func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*metric.LabelMatcher) ([]local.SeriesIterator, error) {
+func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) (model.Matrix, error) {
 	if through < from {
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
@@ -159,7 +158,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
 		util.WithContext(ctx).Debugf("Whole timerange %v..%v in future (now=%v) yield empty resultset", through, from, now)
-		return []local.SeriesIterator{}, nil
+		return nil, nil
 	}
 
 	if through.After(now) {
@@ -170,23 +169,23 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 
 	// Fetch metric name chunks if the matcher is of type equal,
 	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(allMatchers)
-	if ok && metricNameMatcher.Type == metric.Equal {
-		return c.getMetricNameIterators(ctx, from, through, matchers, metricNameMatcher.Value)
+	if ok && metricNameMatcher.Type == labels.MatchEqual {
+		return c.getMetricNameMatrix(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 
-	// Otherwise we will create lazy iterators for all series in our index
-	return c.getSeriesIterators(ctx, from, through, matchers, metricNameMatcher)
+	// Otherwise we consult the metric name index first and then create queries for each matching metric name.
+	return c.getSeriesMatrix(ctx, from, through, matchers, metricNameMatcher)
 }
 
-func (c *Store) getMetricNameIterators(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricName model.LabelValue) ([]local.SeriesIterator, error) {
+func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) (model.Matrix, error) {
 	chunks, err := c.getMetricNameChunks(ctx, from, through, allMatchers, metricName)
 	if err != nil {
 		return nil, err
 	}
-	return chunksToIterators(chunks)
+	return chunksToMatrix(chunks)
 }
 
-func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
+func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	logger := util.WithContext(ctx)
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
@@ -228,7 +227,7 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 outer:
 	for _, chunk := range allChunks {
 		for _, filter := range filters {
-			if !filter.Match(chunk.Metric[filter.Name]) {
+			if !filter.Matches(string(chunk.Metric[model.LabelName(filter.Name)])) {
 				continue outer
 			}
 		}
@@ -238,7 +237,7 @@ outer:
 	return filteredChunks, nil
 }
 
-func (c *Store) getSeriesIterators(ctx context.Context, from, through model.Time, allMatchers []*metric.LabelMatcher, metricNameMatcher *metric.LabelMatcher) ([]local.SeriesIterator, error) {
+func (c *Store) getSeriesMatrix(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) (model.Matrix, error) {
 	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
@@ -253,7 +252,7 @@ func (c *Store) getSeriesIterators(ctx context.Context, from, through model.Time
 		return nil, err
 	}
 
-	lazyIterators := make([]local.SeriesIterator, 0, len(seriesEntries))
+	chunks := make([]Chunk, 0, len(seriesEntries))
 outer:
 	for _, seriesEntry := range seriesEntries {
 		metric, err := parseSeriesRangeValue(seriesEntry.RangeValue, seriesEntry.Value)
@@ -262,32 +261,47 @@ outer:
 		}
 
 		// Apply metric name matcher
-		if metricNameMatcher != nil && !metricNameMatcher.Match(metric[metricNameMatcher.Name]) {
+		if metricNameMatcher != nil && !metricNameMatcher.Matches(string(metric[model.LabelName(metricNameMatcher.Name)])) {
 			continue outer
 		}
 
 		// Apply matchers
 		for _, matcher := range allMatchers {
-			if !matcher.Match(metric[matcher.Name]) {
+			if !matcher.Matches(string(metric[model.LabelName(matcher.Name)])) {
 				continue outer
 			}
 		}
 
-		orgID, err := user.ExtractOrgID(ctx)
-		if err != nil {
-			return nil, err
+		var matchers []*labels.Matcher
+		for labelName, labelValue := range metric {
+			if labelName == "__name__" {
+				continue
+			}
+
+			matcher, err := labels.NewMatcher(labels.MatchEqual, string(labelName), string(labelValue))
+			if err != nil {
+				return nil, err
+			}
+			matchers = append(matchers, matcher)
 		}
-		newIterator, err := NewLazySeriesIterator(c, metric, from, through, orgID)
+
+		cs, err := c.getMetricNameChunks(ctx, from, through, matchers, string(metric[model.MetricNameLabel]))
 		if err != nil {
 			return nil, err
 		}
 
-		lazyIterators = append(lazyIterators, newIterator)
+		for _, chunk := range cs {
+			// getMetricNameChunks() may have selected too many metrics - metrics that match all matchers,
+			// but also have additional labels. We don't want to return those.
+			if chunk.Metric.Equal(metric) {
+				chunks = append(chunks, chunk)
+			}
+		}
 	}
-	return lazyIterators, nil
+	return chunksToMatrix(chunks)
 }
 
-func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*metric.LabelMatcher, metricName model.LabelValue) ([]Chunk, error) {
+func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
@@ -295,7 +309,7 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 
 	// Just get chunks for metric if there are no matchers
 	if len(matchers) == 0 {
-		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
+		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, model.LabelValue(metricName))
 		if err != nil {
 			return nil, err
 		}
@@ -312,14 +326,14 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 	incomingChunkSets := make(chan ByKey)
 	incomingErrors := make(chan error)
 	for _, matcher := range matchers {
-		go func(matcher *metric.LabelMatcher) {
+		go func(matcher *labels.Matcher) {
 			// Lookup IndexQuery's
 			var queries []IndexQuery
 			var err error
-			if matcher.Type != metric.Equal {
-				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
+			if matcher.Type != labels.MatchEqual {
+				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name))
 			} else {
-				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
+				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name), model.LabelValue(matcher.Value))
 			}
 			if err != nil {
 				incomingErrors <- err
@@ -409,7 +423,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 	return entries, nil
 }
 
-func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *metric.LabelMatcher) (ByKey, error) {
+func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) (ByKey, error) {
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
@@ -437,7 +451,7 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 			chunk.metadataInIndex = true
 		}
 
-		if matcher != nil && !matcher.Match(labelValue) {
+		if matcher != nil && !matcher.Matches(string(labelValue)) {
 			util.WithContext(ctx).Debug("Dropping chunk for non-matching metric ", chunk.Metric)
 			continue
 		}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 10ecc662b1b88..75ebf344d2998 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -10,16 +10,16 @@ import (
 
 	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/local"
-	"github.com/prometheus/prometheus/storage/local/chunk"
-	"github.com/prometheus/prometheus/storage/metric"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // newTestStore creates a new Store for testing.
@@ -35,298 +35,212 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	return store
 }
 
-func createSampleStreamIteratorFrom(chunk Chunk) (local.SeriesIterator, error) {
+func createSampleStreamFrom(chunk Chunk) (*model.SampleStream, error) {
 	samples, err := chunk.Samples()
 	if err != nil {
 		return nil, err
 	}
-	return util.NewSampleStreamIterator(&model.SampleStream{
+	return &model.SampleStream{
 		Metric: chunk.Metric,
 		Values: samples,
-	}), nil
+	}, nil
 }
 
 // Allow sorting of local.SeriesIterator by fingerprint (for comparisation tests)
-type ByFingerprint []local.SeriesIterator
+type ByFingerprint model.Matrix
 
-func (s ByFingerprint) Len() int {
-	return len(s)
+func (bfp ByFingerprint) Len() int {
+	return len(bfp)
 }
-func (s ByFingerprint) Swap(i, j int) {
-	s[i], s[j] = s[j], s[i]
+func (bfp ByFingerprint) Swap(i, j int) {
+	bfp[i], bfp[j] = bfp[j], bfp[i]
 }
-func (s ByFingerprint) Less(i, j int) bool {
-	return s[i].Metric().Metric.Fingerprint() < s[j].Metric().Metric.Fingerprint()
+func (bfp ByFingerprint) Less(i, j int) bool {
+	return bfp[i].Metric.Fingerprint() < bfp[j].Metric.Fingerprint()
 }
 
-// TestChunkStore_Get tests iterators are returned correctly depending on the type of query
-func TestChunkStore_Get_concrete(t *testing.T) {
+// TestChunkStore_Get tests results are returned correctly depending on the type of query
+func TestChunkStore_Get(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 
-	// foo chunks (used for fuzzy lazy iterator tests)
-	foo1Metric1 := model.Metric{
-		model.MetricNameLabel: "foo1",
+	fooMetric1 := model.Metric{
+		model.MetricNameLabel: "foo",
 		"bar":  "baz",
 		"toms": "code",
 		"flip": "flop",
 	}
-	foo1Metric2 := model.Metric{
-		model.MetricNameLabel: "foo1",
+	fooMetric2 := model.Metric{
+		model.MetricNameLabel: "foo",
 		"bar":  "beep",
 		"toms": "code",
 	}
 
-	foo1Chunk1 := dummyChunkFor(foo1Metric1)
-	foo1Chunk2 := dummyChunkFor(foo1Metric2)
+	// barMetric1 is a subset of barMetric2 to test over-matching bug.
+	barMetric1 := model.Metric{
+		model.MetricNameLabel: "bar",
+		"bar": "baz",
+	}
+	barMetric2 := model.Metric{
+		model.MetricNameLabel: "bar",
+		"bar":  "baz",
+		"toms": "code",
+	}
 
-	foo1Iterator1, err := createSampleStreamIteratorFrom(foo1Chunk1)
+	fooChunk1 := dummyChunkFor(fooMetric1)
+	fooChunk2 := dummyChunkFor(fooMetric2)
+
+	barChunk1 := dummyChunkFor(barMetric1)
+	barChunk2 := dummyChunkFor(barMetric2)
+
+	fooSampleStream1, err := createSampleStreamFrom(fooChunk1)
 	require.NoError(t, err)
-	foo1Iterator2, err := createSampleStreamIteratorFrom(foo1Chunk2)
+	fooSampleStream2, err := createSampleStreamFrom(fooChunk2)
+	require.NoError(t, err)
+
+	barSampleStream1, err := createSampleStreamFrom(barChunk1)
+	require.NoError(t, err)
+	barSampleStream2, err := createSampleStreamFrom(barChunk2)
 	require.NoError(t, err)
 
 	schemas := []struct {
-		name string
-		fn   func(cfg SchemaConfig) Schema
+		name              string
+		fn                func(cfg SchemaConfig) Schema
+		requireMetricName bool
 	}{
-		{"v1 schema", v1Schema},
-		{"v2 schema", v2Schema},
-		{"v3 schema", v3Schema},
-		{"v4 schema", v4Schema},
-		{"v5 schema", v5Schema},
-		{"v6 schema", v6Schema},
-		{"v7 schema", v7Schema},
-		{"v8 schema", v8Schema},
+		{"v1 schema", v1Schema, true},
+		{"v2 schema", v2Schema, true},
+		{"v3 schema", v3Schema, true},
+		{"v4 schema", v4Schema, true},
+		{"v5 schema", v5Schema, true},
+		{"v6 schema", v6Schema, true},
+		{"v7 schema", v7Schema, true},
+		{"v8 schema", v8Schema, false},
 	}
 
-	nameMatcher := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo1")
-
 	for _, tc := range []struct {
-		query    string
-		expect   []local.SeriesIterator
-		matchers []*metric.LabelMatcher
+		query  string
+		expect model.Matrix
 	}{
 		{
-			`foo1`,
-			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher},
+			`foo`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`foo1{flip=""}`,
-			[]local.SeriesIterator{foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "flip", "")},
+			`foo{flip=""}`,
+			model.Matrix{fooSampleStream2},
 		},
 		{
-			`foo1{bar="baz"}`,
-			[]local.SeriesIterator{foo1Iterator1},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			`foo{bar="baz"}`,
+			model.Matrix{fooSampleStream1},
 		},
 		{
-			`foo1{bar="beep"}`,
-			[]local.SeriesIterator{foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+			`foo{bar="beep"}`,
+			model.Matrix{fooSampleStream2},
 		},
 		{
-			`foo1{toms="code"}`,
-			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code")},
+			`foo{toms="code"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`foo1{bar!="baz"}`,
-			[]local.SeriesIterator{foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.NotEqual, "bar", "baz")},
+			`foo{bar!="baz"}`,
+			model.Matrix{fooSampleStream2},
 		},
 		{
-			`foo1{bar=~"beep|baz"}`,
-			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			`foo{bar=~"beep|baz"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`foo1{toms="code", bar=~"beep|baz"}`,
-			[]local.SeriesIterator{foo1Iterator1, foo1Iterator2},
-			[]*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			`foo{toms="code", bar=~"beep|baz"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`foo1{toms="code", bar="baz"}`,
-			[]local.SeriesIterator{foo1Iterator1}, []*metric.LabelMatcher{nameMatcher, mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
-		},
-	} {
-		for _, schema := range schemas {
-			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
-				store := newTestChunkStore(t, StoreConfig{
-					schemaFactory: schema.fn,
-				})
-
-				if err := store.Put(ctx, []Chunk{
-					foo1Chunk1,
-					foo1Chunk2,
-				}); err != nil {
-					t.Fatal(err)
-				}
-
-				// Query with ordinary time-range
-				iterators1, err := store.Get(ctx, now.Add(-time.Hour), now, tc.matchers...)
-				require.NoError(t, err)
-
-				sort.Sort(ByFingerprint(iterators1))
-				if !reflect.DeepEqual(tc.expect, iterators1) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators1))
-				}
-
-				// Pushing end of time-range into future should yield exact same resultset
-				iterators2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), tc.matchers...)
-				require.NoError(t, err)
-
-				sort.Sort(ByFingerprint(iterators2))
-				if !reflect.DeepEqual(tc.expect, iterators2) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, iterators2))
-				}
-
-				// Query with both begin & end of time-range in future should yield empty resultset
-				iterators3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.matchers...)
-				require.NoError(t, err)
-				if len(iterators3) != 0 {
-					t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
-						tc.query, len(iterators3), iterators3)
-				}
-			})
-		}
-	}
-}
-
-// TestChunkStore_Get tests iterators are returned correctly depending on the type of query
-func TestChunkStore_Get_lazy(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
-	now := model.Now()
-	from := now.Add(-time.Hour)
-
-	foo1Metric1 := model.Metric{
-		model.MetricNameLabel: "foo1",
-		"bar":  "baz",
-		"flip": "flop",
-		"toms": "code",
-	}
-	foo1Metric2 := model.Metric{
-		model.MetricNameLabel: "foo1",
-		"bar":  "beep",
-		"toms": "code",
-	}
-	foo2Metric := model.Metric{
-		model.MetricNameLabel: "foo2",
-		"bar":  "beep",
-		"toms": "code",
-	}
-	foo3Metric := model.Metric{
-		model.MetricNameLabel: "foo3",
-		"bar":  "beep",
-		"toms": "code",
-	}
-
-	foo1Chunk1 := dummyChunkFor(foo1Metric1)
-	foo1Chunk2 := dummyChunkFor(foo1Metric2)
-	foo2Chunk := dummyChunkFor(foo2Metric)
-	foo3Chunk := dummyChunkFor(foo3Metric)
-
-	schemas := []struct {
-		name string
-		fn   func(cfg SchemaConfig) Schema
-	}{
-		{"v8 schema", v8Schema},
-	}
-
-	regexMatcher := mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")
-
-	for _, tc := range []struct {
-		query                   string
-		matchers                []*metric.LabelMatcher
-		expectedIteratorMetrics []model.Metric
-	}{
-		// When name matcher is used without Equal, start matching all metric names
-		// however still filter out metric names which do not match query
-		{
-			`{__name__!="foo1"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, model.MetricNameLabel, "foo1")},
-			[]model.Metric{foo3Metric, foo2Metric},
+			`foo{toms="code", bar="baz"}`,
+			model.Matrix{fooSampleStream1},
 		},
 		{
-			`{__name__=~"foo1|foo2"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, model.MetricNameLabel, "foo1|foo2")},
-			[]model.Metric{foo1Metric1, foo2Metric, foo1Metric2},
+			`{__name__=~"foo"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
-		// No metric names
 		{
-			`{bar="baz"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")},
-			[]model.Metric{foo1Metric1},
+			`{__name__=~"foobar"}`,
+			model.Matrix{},
 		},
 		{
-			`{bar="beep"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "beep")},
-			[]model.Metric{foo3Metric, foo2Metric, foo1Metric2}, // doesn't match foo1 metric 1
+			`{__name__=~"fo.*"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`{flip=""}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "flip", "")},
-			[]model.Metric{foo3Metric, foo2Metric, foo1Metric2}, // doesn't match foo1 chunk1 as it has a flip value
+			`{__name__=~"foo", toms="code"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`{bar!="beep"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, "bar", "beep")},
-			[]model.Metric{foo1Metric1},
+			`{__name__!="foo", toms="code"}`,
+			model.Matrix{barSampleStream2},
 		},
 		{
-			`{bar=~"beep|baz"}`,
-			[]*metric.LabelMatcher{regexMatcher},
-			[]model.Metric{foo3Metric, foo1Metric1, foo2Metric, foo1Metric2},
+			`{__name__!="bar", toms="code"}`,
+			model.Matrix{fooSampleStream1, fooSampleStream2},
 		},
 		{
-			`{toms="code", bar=~"beep|baz"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), regexMatcher},
-			[]model.Metric{foo3Metric, foo1Metric1, foo2Metric, foo1Metric2},
+			`{__name__=~"bar", bar="baz"}`,
+			model.Matrix{barSampleStream1, barSampleStream2},
 		},
 		{
-			`{toms="code", bar="baz"}`,
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
-			[]model.Metric{foo1Metric1},
+			`{__name__=~"bar", bar="baz",toms!="code"}`,
+			model.Matrix{barSampleStream1},
 		},
 	} {
 		for _, schema := range schemas {
-			// Create store for schema
-			store := newTestChunkStore(t, StoreConfig{
-				schemaFactory: schema.fn,
-			})
-
-			// Run test cases for this schema, checking lazy series iterators
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+				store := newTestChunkStore(t, StoreConfig{
+					schemaFactory: schema.fn,
+				})
 
-				// Add chunks to store
 				if err := store.Put(ctx, []Chunk{
-					foo1Chunk1,
-					foo1Chunk2,
-					foo2Chunk,
-					foo3Chunk,
+					fooChunk1,
+					fooChunk2,
+					barChunk1,
+					barChunk2,
 				}); err != nil {
 					t.Fatal(err)
 				}
 
-				// Get iterators from store given the matchers
-				iterators, err := store.Get(ctx, from, now, tc.matchers...)
+				matchers, err := promql.ParseMetricSelector(tc.query)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				metricNameMatcher, _, ok := util.ExtractMetricNameMatcherFromMatchers(matchers)
+				if schema.requireMetricName && (!ok || metricNameMatcher.Type != labels.MatchEqual) {
+					return
+				}
+
+				// Query with ordinary time-range
+				matrix1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+				require.NoError(t, err)
+
+				sort.Sort(ByFingerprint(matrix1))
+				if !reflect.DeepEqual(tc.expect, matrix1) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
+				}
+
+				// Pushing end of time-range into future should yield exact same resultset
+				matrix2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
 				require.NoError(t, err)
 
-				// Create expected iterators with current schema store
-				var expectedIterators []local.SeriesIterator
-				for _, expectedMetric := range tc.expectedIteratorMetrics {
-					newIterator, err := NewLazySeriesIterator(store, expectedMetric, from, now, userID)
-					require.NoError(t, err)
-					expectedIterators = append(expectedIterators, newIterator)
+				sort.Sort(ByFingerprint(matrix2))
+				if !reflect.DeepEqual(tc.expect, matrix2) {
+					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix2))
 				}
 
-				// Check iterators are correct
-				sort.Sort(ByFingerprint(iterators))
-				if !reflect.DeepEqual(expectedIterators, iterators) {
-					t.Fatalf("%s: wrong iterators - %s", tc.query, test.Diff(expectedIterators, iterators))
+				// Query with both begin & end of time-range in future should yield empty resultset
+				matrix3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
+				require.NoError(t, err)
+				if len(matrix3) != 0 {
+					t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
+						tc.query, len(matrix3), matrix3)
 				}
 			})
 		}
@@ -337,7 +251,7 @@ func TestChunkStore_Get_lazy(t *testing.T) {
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
-	metricName := model.LabelValue("foo")
+	metricName := "foo"
 	chunk1 := dummyChunkFor(model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar":  "baz",
@@ -367,51 +281,51 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	for _, tc := range []struct {
 		query    string
 		expect   []Chunk
-		matchers []*metric.LabelMatcher
+		matchers []*labels.Matcher
 	}{
 		{
 			`foo`,
 			[]Chunk{chunk1, chunk2},
-			[]*metric.LabelMatcher{},
+			[]*labels.Matcher{},
 		},
 		{
 			`foo{flip=""}`,
 			[]Chunk{chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "flip", "")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "flip", "")},
 		},
 		{
 			`foo{bar="baz"}`,
 			[]Chunk{chunk1},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")},
 		},
 		{
 			`foo{bar="beep"}`,
 			[]Chunk{chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "beep")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "beep")},
 		},
 		{
 			`foo{toms="code"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code")},
 		},
 		{
 			`foo{bar!="baz"}`,
 			[]Chunk{chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.NotEqual, "bar", "baz")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchNotEqual, "bar", "baz")},
 		},
 		{
 			`foo{bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchRegexp, "bar", "beep|baz")},
 		},
 		{
 			`foo{toms="code", bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.RegexMatch, "bar", "beep|baz")},
+			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code"), mustNewLabelMatcher(labels.MatchRegexp, "bar", "beep|baz")},
 		},
 		{
 			`foo{toms="code", bar="baz"}`,
-			[]Chunk{chunk1}, []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "toms", "code"), mustNewLabelMatcher(metric.Equal, "bar", "baz")},
+			[]Chunk{chunk1}, []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code"), mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")},
 		},
 	} {
 		for _, schema := range schemas {
@@ -436,8 +350,8 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	}
 }
 
-func mustNewLabelMatcher(matchType metric.MatchType, name model.LabelName, value model.LabelValue) *metric.LabelMatcher {
-	matcher, err := metric.NewLabelMatcher(matchType, name, value)
+func mustNewLabelMatcher(matchType labels.MatchType, name string, value string) *labels.Matcher {
+	matcher, err := labels.NewMatcher(matchType, name, value)
 	if err != nil {
 		panic(err)
 	}
@@ -501,8 +415,8 @@ func TestChunkStoreRandom(t *testing.T) {
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
 
-		metricNameLabel := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
-		matchers := []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")}
+		metricNameLabel := mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo")
+		matchers := []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")}
 
 		for _, s := range schemas {
 			chunks, err := s.store.getMetricNameChunks(ctx, startTime, endTime,
@@ -568,8 +482,8 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
 
-		metricNameLabel := mustNewLabelMatcher(metric.Equal, model.MetricNameLabel, "foo")
-		matchers := []*metric.LabelMatcher{mustNewLabelMatcher(metric.Equal, "bar", "baz")}
+		metricNameLabel := mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo")
+		matchers := []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")}
 
 		chunks, err := store.getMetricNameChunks(ctx, startTime, endTime,
 			matchers,
diff --git a/chunk_test.go b/chunk_test.go
index f71f5bc7bd7fe..903da40c20ffb 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -2,14 +2,14 @@ package chunk
 
 import (
 	"fmt"
+	"sort"
 	"testing"
 	"time"
 
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/local"
-	"github.com/prometheus/prometheus/storage/local/chunk"
 	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -130,7 +130,7 @@ func TestParseExternalKey(t *testing.T) {
 	}
 }
 
-func TestChunksToIterators(t *testing.T) {
+func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
@@ -144,10 +144,10 @@ func TestChunksToIterators(t *testing.T) {
 	chunk2Samples, err := chunk2.Samples()
 	require.NoError(t, err)
 
-	iterator1 := util.NewSampleStreamIterator(&model.SampleStream{
+	ss1 := &model.SampleStream{
 		Metric: chunk1.Metric,
 		Values: util.MergeSampleSets(chunk1Samples, chunk2Samples),
-	})
+	}
 
 	// Create another chunk with a different metric
 	otherMetric := model.Metric{
@@ -159,32 +159,35 @@ func TestChunksToIterators(t *testing.T) {
 	chunk3Samples, err := chunk3.Samples()
 	require.NoError(t, err)
 
-	iterator2 := util.NewSampleStreamIterator(&model.SampleStream{
+	ss2 := &model.SampleStream{
 		Metric: chunk3.Metric,
 		Values: chunk3Samples,
-	})
+	}
 
 	for _, c := range []struct {
-		chunks            []Chunk
-		expectedIterators []local.SeriesIterator
+		chunks         []Chunk
+		expectedMatrix model.Matrix
 	}{
 		{
-			chunks:            []Chunk{},
-			expectedIterators: []local.SeriesIterator{},
+			chunks:         []Chunk{},
+			expectedMatrix: model.Matrix{},
 		}, {
 			chunks: []Chunk{
 				chunk1,
 				chunk2,
 				chunk3,
 			},
-			expectedIterators: []local.SeriesIterator{
-				iterator1,
-				iterator2,
+			expectedMatrix: model.Matrix{
+				ss1,
+				ss2,
 			},
 		},
 	} {
-		iterators, err := chunksToIterators(c.chunks)
+		matrix, err := chunksToMatrix(c.chunks)
 		require.NoError(t, err)
-		require.Equal(t, c.expectedIterators, iterators)
+
+		sort.Sort(matrix)
+		sort.Sort(c.expectedMatrix)
+		require.Equal(t, c.expectedMatrix, matrix)
 	}
 }
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index cad7b0a3b155e..4e37239bac21a 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"fmt"
 	"time"
 
@@ -11,7 +12,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
-	"golang.org/x/net/context"
 	"golang.org/x/time/rate"
 
 	"github.com/weaveworks/common/instrument"
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 836679e952f76..bbf6af38111f6 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -1,12 +1,12 @@
 package gcp
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
-	"golang.org/x/net/context"
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 814c93e44bf97..15e38cdbcd465 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -1,11 +1,11 @@
 package gcp
 
 import (
+	"context"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
-	"golang.org/x/net/context"
 	"google.golang.org/grpc/status"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 36fb8253a00de..436af529ed83e 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -2,13 +2,13 @@ package chunk
 
 import (
 	"bytes"
+	"context"
 	"fmt"
 	"sort"
 	"sync"
 
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/weaveworks/cortex/pkg/util"
-	"golang.org/x/net/context"
 )
 
 // MockStorage is a fake in-memory StorageClient.
diff --git a/iterator.go b/iterator.go
deleted file mode 100644
index 107cf95757518..0000000000000
--- a/iterator.go
+++ /dev/null
@@ -1,123 +0,0 @@
-package chunk
-
-import (
-	"context"
-	"fmt"
-	"sort"
-	"sync"
-
-	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/local"
-	"github.com/prometheus/prometheus/storage/metric"
-	"github.com/weaveworks/common/user"
-)
-
-// LazySeriesIterator is a struct and not just a renamed type because otherwise the Metric
-// field and Metric() methods would clash.
-type LazySeriesIterator struct {
-	// The metric corresponding to the iterator.
-	metric     model.Metric
-	metricName model.LabelValue
-	from       model.Time
-	through    model.Time
-	matchers   []*metric.LabelMatcher
-
-	// The store used to fetch chunks and samples.
-	chunkStore *Store
-	// The sampleSeriesIterator is created on the first sample request. This
-	// does not happen with promQL queries which do not require sample data to
-	// be fetched. Use sync.Once to ensure the iterator is only created once.
-	sampleSeriesIterator *local.SeriesIterator
-	onceCreateIterator   sync.Once
-	orgID                string
-}
-
-type byMatcherLabel metric.LabelMatchers
-
-func (lms byMatcherLabel) Len() int           { return len(lms) }
-func (lms byMatcherLabel) Swap(i, j int)      { lms[i], lms[j] = lms[j], lms[i] }
-func (lms byMatcherLabel) Less(i, j int) bool { return lms[i].Name < lms[j].Name }
-
-// NewLazySeriesIterator creates a LazySeriesIterator.
-func NewLazySeriesIterator(chunkStore *Store, seriesMetric model.Metric, from model.Time, through model.Time, orgID string) (*LazySeriesIterator, error) {
-	metricName, ok := seriesMetric[model.MetricNameLabel]
-	if !ok {
-		return nil, fmt.Errorf("series does not have a metric name")
-	}
-
-	var matchers metric.LabelMatchers
-	for labelName, labelValue := range seriesMetric {
-		if labelName == "__name__" {
-			continue
-		}
-
-		matcher, err := metric.NewLabelMatcher(metric.Equal, labelName, labelValue)
-		if err != nil {
-			return nil, err
-		}
-		matchers = append(matchers, matcher)
-	}
-	sort.Sort(byMatcherLabel(matchers))
-
-	return &LazySeriesIterator{
-		chunkStore: chunkStore,
-		metric:     seriesMetric,
-		metricName: metricName,
-		from:       from,
-		through:    through,
-		matchers:   matchers,
-		orgID:      orgID,
-	}, nil
-}
-
-// Metric implements the SeriesIterator interface.
-func (it *LazySeriesIterator) Metric() metric.Metric {
-	return metric.Metric{Metric: it.metric}
-}
-
-// ValueAtOrBeforeTime implements the SeriesIterator interface.
-func (it *LazySeriesIterator) ValueAtOrBeforeTime(t model.Time) model.SamplePair {
-	var err error
-	it.onceCreateIterator.Do(func() {
-		err = it.createSampleSeriesIterator()
-	})
-	if err != nil {
-		// TODO: Handle error.
-		return model.ZeroSamplePair
-	}
-	return (*it.sampleSeriesIterator).ValueAtOrBeforeTime(t)
-}
-
-// RangeValues implements the SeriesIterator interface.
-func (it *LazySeriesIterator) RangeValues(in metric.Interval) []model.SamplePair {
-	var err error
-	it.onceCreateIterator.Do(func() {
-		err = it.createSampleSeriesIterator()
-	})
-	if err != nil {
-		// TODO: Handle error.
-		fmt.Printf("ERROR %+v", err)
-		return nil
-	}
-	return (*it.sampleSeriesIterator).RangeValues(in)
-}
-
-// Close implements the SeriesIterator interface.
-func (it *LazySeriesIterator) Close() {}
-
-func (it *LazySeriesIterator) createSampleSeriesIterator() error {
-	ctx := user.InjectOrgID(context.Background(), it.orgID)
-	sampleSeriesIterators, err := it.chunkStore.getMetricNameIterators(ctx, it.from, it.through, it.matchers, it.metricName)
-	if err != nil {
-		return err
-	}
-
-	// We should only expect one sampleSeriesIterator because we are dealing
-	// with one series.
-	if len(sampleSeriesIterators) != 1 {
-		return fmt.Errorf("multiple series found in LazySeriesIterator chunks")
-	}
-
-	it.sampleSeriesIterator = &sampleSeriesIterators[0]
-	return nil
-}
diff --git a/iterator_test.go b/iterator_test.go
deleted file mode 100644
index 8267217c6e261..0000000000000
--- a/iterator_test.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package chunk
-
-import (
-	"context"
-	"testing"
-	"time"
-
-	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/storage/metric"
-	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/common/user"
-)
-
-func TestLazySeriesIterator_Metric(t *testing.T) {
-	store := newTestChunkStore(t, StoreConfig{})
-	now := model.Now()
-	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
-	iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
-	require.NoError(t, err)
-	for _, c := range []struct {
-		iterator       *LazySeriesIterator
-		expectedMetric metric.Metric
-	}{
-		{
-			iterator:       iterator,
-			expectedMetric: metric.Metric{Metric: sampleMetric},
-		},
-	} {
-		metric := c.iterator.Metric()
-		require.Equal(t, c.expectedMetric, metric)
-	}
-}
-
-func TestLazySeriesIterator_ValueAtOrBeforeTime(t *testing.T) {
-	now := model.Now()
-	ctx := user.InjectOrgID(context.Background(), userID)
-
-	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
-	dummyChunk := dummyChunkFor(sampleMetric)
-	dummySamples, err := dummyChunk.Samples()
-	require.NoError(t, err)
-
-	schemas := []struct {
-		name string
-		fn   func(cfg SchemaConfig) Schema
-	}{
-		{"v8 schema", v8Schema},
-	}
-
-	for _, schema := range schemas {
-		// Create store with the dummy chunk.
-		store := newTestChunkStore(t, StoreConfig{schemaFactory: schema.fn})
-		store.Put(ctx, []Chunk{dummyChunk})
-
-		// Create the lazy series iterator
-		iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
-		require.NoError(t, err)
-		for _, tc := range []struct {
-			iterator       *LazySeriesIterator
-			timestamp      model.Time
-			expectedSample model.SamplePair
-		}{
-			{
-				iterator:       iterator,
-				timestamp:      now,
-				expectedSample: dummySamples[0],
-			},
-		} {
-			// sampleSeriesIterator should be created lazily only when RangeValues is called.
-			require.Nil(t, tc.iterator.sampleSeriesIterator)
-			sample := tc.iterator.ValueAtOrBeforeTime(tc.timestamp)
-			require.NotNil(t, tc.iterator.sampleSeriesIterator)
-
-			require.Equal(t, tc.expectedSample, sample)
-		}
-	}
-}
-
-func TestLazySeriesIterator_RangeValues(t *testing.T) {
-	now := model.Now()
-	ctx := user.InjectOrgID(context.Background(), userID)
-
-	sampleMetric := model.Metric{model.MetricNameLabel: "foo"}
-	dummyChunk := dummyChunkFor(sampleMetric)
-	dummySamples, err := dummyChunk.Samples()
-	require.NoError(t, err)
-
-	schemas := []struct {
-		name string
-		fn   func(cfg SchemaConfig) Schema
-	}{
-		{"v8 schema", v8Schema},
-	}
-
-	for _, schema := range schemas {
-		// Create store with the dummy chunk.
-		store := newTestChunkStore(t, StoreConfig{schemaFactory: schema.fn})
-		store.Put(ctx, []Chunk{dummyChunk})
-
-		// Create the lazy series iterator
-		iterator, err := NewLazySeriesIterator(store, sampleMetric, now, now, userID)
-		require.NoError(t, err)
-		for _, tc := range []struct {
-			iterator        *LazySeriesIterator
-			interval        metric.Interval
-			expectedSamples []model.SamplePair
-		}{
-			{
-				iterator:        iterator,
-				interval:        metric.Interval{OldestInclusive: now.Add(-time.Minute), NewestInclusive: now.Add(time.Minute)},
-				expectedSamples: dummySamples,
-			},
-		} {
-			// sampleSeriesIterator should be created lazily only when RangeValues is called.
-			require.Nil(t, tc.iterator.sampleSeriesIterator)
-			samples := tc.iterator.RangeValues(tc.interval)
-			require.NotNil(t, tc.iterator.sampleSeriesIterator)
-
-			require.Equal(t, tc.expectedSamples, samples)
-		}
-	}
-}
diff --git a/schema.go b/schema.go
index 5837a97743b97..3e14978c1c2f0 100644
--- a/schema.go
+++ b/schema.go
@@ -23,7 +23,7 @@ var (
 
 // Errors
 var (
-	ErrNoMetricNameNotSupported = errors.New("metric name required for pre-v7 schemas")
+	ErrNoMetricNameNotSupported = errors.New("metric name required for pre-v8 schemas")
 )
 
 // Schema interface defines methods to calculate the hash and range keys needed
diff --git a/schema_config.go b/schema_config.go
index 2aa736df58c71..7608d82a68f44 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -61,7 +61,6 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
-	f.DurationVar(&cfg.MaxChunkAge, "ingester.max-chunk-age", 12*time.Hour, "Maximum chunk age time before flushing.")
 
 	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
diff --git a/storage/BUILD.bazel b/storage/BUILD.bazel
index d7ff7eb34ebd6..62179e57ef940 100644
--- a/storage/BUILD.bazel
+++ b/storage/BUILD.bazel
@@ -9,6 +9,5 @@ go_library(
         "//pkg/chunk:go_default_library",
         "//pkg/chunk/gcp:go_default_library",
         "//vendor/github.com/prometheus/common/log:go_default_library",
-        "//vendor/golang.org/x/net/context:go_default_library",
     ],
 )
diff --git a/storage/factory.go b/storage/factory.go
index 77bd6e072d1f5..017d9b74cce38 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -1,12 +1,12 @@
 package storage
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"strings"
 
 	"github.com/prometheus/common/log"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
diff --git a/storage_client.go b/storage_client.go
index 8081702a6810b..cc1d44d5c77a5 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -1,6 +1,6 @@
 package chunk
 
-import "golang.org/x/net/context"
+import "context"
 
 // StorageClient is a client for the persistent storage for Cortex. (e.g. DynamoDB + S3).
 type StorageClient interface {
diff --git a/table_client.go b/table_client.go
index 6a1442b35c442..4ff5f5d318d89 100644
--- a/table_client.go
+++ b/table_client.go
@@ -1,6 +1,6 @@
 package chunk
 
-import "golang.org/x/net/context"
+import "context"
 
 // TableClient is a client for telling Dynamo what to do with tables.
 type TableClient interface {
diff --git a/table_manager.go b/table_manager.go
index 01d6cc1bc5def..d87ed1b46aa75 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"fmt"
 	"sort"
 	"strings"
@@ -11,7 +12,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/log"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"

From a9e04308e68fc365033ba625248a53360f3ecfad Mon Sep 17 00:00:00 2001
From: Julius Volz <julius.volz@gmail.com>
Date: Thu, 16 Nov 2017 09:26:46 +0100
Subject: [PATCH 048/660] Update Prometheus vendoring to latest master (#598)

---
 BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/BUILD.bazel b/BUILD.bazel
index d0b578ded05f9..4a593d0459be3 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -83,6 +83,7 @@ go_test(
         "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
         "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
+        "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
         "//vendor/github.com/stretchr/testify/assert:go_default_library",
         "//vendor/github.com/stretchr/testify/require:go_default_library",
         "//vendor/github.com/weaveworks/common/mtime:go_default_library",

From abf39fe3c3f7cca10da6a4a9aae0dace9f48253d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 20 Nov 2017 16:43:45 +0000
Subject: [PATCH 049/660] Add Jaeger to querier (#600)

* Add Jaeger integration to querier

We don't need to hook anything for the httpgrpc routes - it's already
done in the common library.

* Add more detail to opentracing spans
---
 aws_storage_client.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 926c4862edf31..52d5f20dd36f1 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -11,6 +11,8 @@ import (
 	"strings"
 	"time"
 
+	ot "github.com/opentracing/opentracing-go"
+
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/client"
@@ -248,6 +250,9 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 }
 
 func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{"tableName", query.TableName}, ot.Tag{"hashValue", query.HashValue})
+	defer sp.Finish()
+
 	input := &dynamodb.QueryInput{
 		TableName: aws.String(query.TableName),
 		KeyConditions: map[string]*dynamodb.Condition{
@@ -411,6 +416,9 @@ func (a dynamoDBRequestAdapter) Retryable() bool {
 }
 
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	defer sp.Finish()
+
 	var (
 		s3Chunks       []Chunk
 		dynamoDBChunks []Chunk
@@ -502,6 +510,8 @@ func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, e
 var placeholder = []byte{'c'}
 
 func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{"numChunks", len(chunks)})
+	defer sp.Finish()
 	outstanding := dynamoDBReadRequest{}
 	chunksByKey := map[string]Chunk{}
 	for _, chunk := range chunks {

From 4b4579ee0e26a6e96df079b8c1ff51d05f6812fc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 22 Nov 2017 16:54:21 +0000
Subject: [PATCH 050/660] Save chunks to cache if GetChunks returns partial
 result (#605)

Often, the error is caused by a timeout or client cancel, but the
client may be on a retry loop and come straight back with the same
request.  So we should cache what we did get, and hence do better next
time.
---
 aws_storage_client.go | 14 +++++++-------
 chunk_store.go        | 10 ++++++----
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 52d5f20dd36f1..7bf8d25cb47fe 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -440,15 +440,13 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 	var err error
 	s3Chunks, err = a.getS3Chunks(ctx, s3Chunks)
 	if err != nil {
-		return nil, err
+		return s3Chunks, err
 	}
 
 	dynamoDBChunks, err = a.getDynamoDBChunks(ctx, dynamoDBChunks)
-	if err != nil {
-		return nil, err
-	}
 
-	return append(dynamoDBChunks, s3Chunks...), nil
+	// Return any chunks we did receive: a partial result may be useful
+	return append(dynamoDBChunks, s3Chunks...), err
 }
 
 func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
@@ -476,7 +474,8 @@ func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Ch
 		}
 	}
 	if len(errors) > 0 {
-		return nil, errors[0]
+		// Return any chunks we did receive: a partial result may be useful
+		return result, errors[0]
 	}
 	return result, nil
 }
@@ -585,7 +584,8 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		return nil, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", numRetries, valuesLeft)
+		// Return the chunks we did fetch, because partial results may be useful
+		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", numRetries, valuesLeft)
 	}
 	return result, nil
 }
diff --git a/chunk_store.go b/chunk_store.go
index e77e27551138c..f5dfd64583716 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -209,12 +209,14 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	}
 
 	fromStorage, err := c.storage.GetChunks(ctx, missing)
-	if err != nil {
-		return nil, promql.ErrStorage(err)
+
+	// Always cache any chunks we did get
+	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
+		logger.Warnf("Could not store chunks in chunk cache: %v", cacheErr)
 	}
 
-	if err = c.writeBackCache(ctx, fromStorage); err != nil {
-		logger.Warnf("Could not store chunks in chunk cache: %v", err)
+	if err != nil {
+		return nil, promql.ErrStorage(err)
 	}
 
 	// TODO instead of doing this sort, propagate an index and assign chunks

From 6e06c5af37e5218099fb18971c2410f5bdfb8e6c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 22 Nov 2017 18:08:23 +0000
Subject: [PATCH 051/660] Simple parallel chunk fetching from DynamoDB (#603)

Run multiple goroutines in parallel to drive DynamoDB harder.
Group chunk fetches into "gangs" to allow some configuration over how
hard we hit DynamoDB
---
 aws_storage_client.go      | 38 ++++++++++++++++++++++++++++++++++++--
 aws_storage_client_test.go |  9 +++++++--
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 7bf8d25cb47fe..c7948d4255514 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -107,6 +107,7 @@ type DynamoDBConfig struct {
 	DynamoDB               util.URLValue
 	APILimit               float64
 	ApplicationAutoScaling util.URLValue
+	DynamoDBChunkGangSize  int
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -115,6 +116,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
 	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
+	f.IntVar(&cfg.DynamoDBChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
 }
 
 // AWSStorageConfig specifies config for storing data on AWS.
@@ -415,6 +417,11 @@ func (a dynamoDBRequestAdapter) Retryable() bool {
 	return *a.request.Retryable
 }
 
+type chunksPlusError struct {
+	chunks []Chunk
+	err    error
+}
+
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
 	defer sp.Finish()
@@ -443,10 +450,37 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 		return s3Chunks, err
 	}
 
-	dynamoDBChunks, err = a.getDynamoDBChunks(ctx, dynamoDBChunks)
+	gangSize := a.cfg.DynamoDBChunkGangSize * dynamoDBMaxReadBatchSize
+	if gangSize == 0 { // zero means turn feature off
+		gangSize = len(dynamoDBChunks)
+	}
+
+	results := make(chan chunksPlusError)
+	for i := 0; i < len(dynamoDBChunks); i += gangSize {
+		go func(start int) {
+			end := start + gangSize
+			if end > len(dynamoDBChunks) {
+				end = len(dynamoDBChunks)
+			}
+			outChunks, err := a.getDynamoDBChunks(ctx, dynamoDBChunks[start:end])
+			results <- chunksPlusError{outChunks, err}
+		}(i)
+	}
+	finalChunks := s3Chunks
+	for i := 0; i < len(dynamoDBChunks); i += gangSize {
+		in := <-results
+		if in.err != nil {
+			err = in.err // TODO: cancel other sub-queries at this point
+		} else {
+			finalChunks = append(finalChunks, in.chunks...)
+		}
+	}
+	if err != nil {
+		return nil, err
+	}
 
 	// Return any chunks we did receive: a partial result may be useful
-	return append(dynamoDBChunks, s3Chunks...), err
+	return finalChunks, err
 }
 
 func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 76cee508660a0..6a077f6db2dd9 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -479,9 +479,11 @@ func TestAWSStorageClientChunks(t *testing.T) {
 	tests := []struct {
 		name           string
 		provisionedErr int
+		gangSize       int
 	}{
-		{"DynamoDB chunks", 0},
-		{"DynamoDB chunks retry logic", 2},
+		{"DynamoDB chunks", 0, 10},
+		{"DynamoDB chunks with parallel fetch disabled", 0, 0},
+		{"DynamoDB chunks retry logic", 2, 10},
 	}
 
 	for _, tt := range tests {
@@ -505,6 +507,9 @@ func TestAWSStorageClientChunks(t *testing.T) {
 			require.NoError(t, err)
 
 			client := awsStorageClient{
+				cfg: AWSStorageConfig{
+					DynamoDBConfig: DynamoDBConfig{DynamoDBChunkGangSize: tt.gangSize},
+				},
 				DynamoDB:                dynamoDB,
 				schemaCfg:               schemaConfig,
 				queryRequestFn:          dynamoDB.queryRequest,

From 7b022de7e0587381df4ace7523c8433a27022b6b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 22 Nov 2017 20:31:03 +0000
Subject: [PATCH 052/660] Improve dynamodb retry (#606)

* Put unprocessed requests at the back of the queue on retry

This should maximise the chance that we can get some other work done,
especially in the case that DynamoDB is throttling one area of the DB
but not another.

* Fix comment typos

* Refactor: make BatchWrite() look identical to getDynamoDBChunks()

* Extract backoff out to its own struct and file

* Don't sleep after the last retry in backoff loop

This just delays reporting errors back

* Comments, from review feedback
---
 aws_storage_client.go    | 115 ++++++++++++++++-----------------------
 backoff.go               |  44 +++++++++++++++
 dynamodb_table_client.go |  12 ++--
 3 files changed, 97 insertions(+), 74 deletions(-)
 create mode 100644 backoff.go

diff --git a/aws_storage_client.go b/aws_storage_client.go
index c7948d4255514..bca00ecf49800 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -6,10 +6,8 @@ import (
 	"flag"
 	"fmt"
 	"io/ioutil"
-	"math/rand"
 	"net/url"
 	"strings"
-	"time"
 
 	ot "github.com/opentracing/opentracing-go"
 
@@ -40,12 +38,6 @@ const (
 	errorReasonLabel = "error"
 	otherError       = "other"
 
-	// Backoff for dynamoDB requests, to match AWS lib - see:
-	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
-	minBackoff = 50 * time.Millisecond
-	maxBackoff = 50 * time.Second
-	maxRetries = 20
-
 	// See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html.
 	dynamoDBMaxWriteBatchSize = 25
 	dynamoDBMaxReadBatchSize  = 100
@@ -183,22 +175,25 @@ func (a awsStorageClient) NewWriteBatch() WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
-// batchWrite writes requests to the underlying storage, handling retires and backoff.
+// BatchWrite writes requests to the underlying storage, handling retries and backoff.
+// Structure is identical to getDynamoDBChunks(), but operating on different datatypes
+// so cannot share implementation.  If you fix a bug here fix it there too.
 func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
-	backoff, numRetries := minBackoff, 0
+	backoff := resetBackoff()
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(numRetries))
+		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.numRetries))
 	}()
 
-	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
-		reqs := dynamoDBWriteBatch{}
-		reqs.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
-		reqs.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
+	for outstanding.Len()+unprocessed.Len() > 0 && !backoff.finished() {
+		requests := dynamoDBWriteBatch{}
+		requests.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
+		requests.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
+
 		request := a.batchWriteItemRequestFn(ctx, &dynamodb.BatchWriteItemInput{
-			RequestItems:           reqs,
+			RequestItems:           requests,
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
 
@@ -213,40 +208,36 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 		}
 
 		if err != nil {
-			for tableName := range reqs {
+			for tableName := range requests {
 				recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem")
 			}
+
+			// If we get provisionedThroughputExceededException, then no items were processed,
+			// so back off and retry all.
+			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
+				unprocessed.TakeReqs(requests, -1)
+				backoff.backoff()
+				continue
+			}
+
+			// All other errors are critical.
+			return err
 		}
 
 		// If there are unprocessed items, backoff and retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
 			unprocessed.TakeReqs(unprocessedItems, -1)
-			time.Sleep(backoff)
-			backoff = nextBackoff(backoff)
+			// I am unclear why we don't count here; perhaps the idea is
+			// that while we are making _some_ progress we should carry on.
+			backoff.backoffWithoutCounting()
 			continue
 		}
 
-		// If we get provisionedThroughputExceededException, then no items were processed,
-		// so back off and retry all.
-		if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
-			unprocessed.TakeReqs(reqs, -1)
-			time.Sleep(backoff)
-			backoff = nextBackoff(backoff)
-			numRetries++
-			continue
-		}
-
-		// All other errors are fatal.
-		if err != nil {
-			return err
-		}
-
-		backoff = minBackoff
-		numRetries = 0
+		backoff = resetBackoff()
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", numRetries, valuesLeft)
+		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", backoff.numRetries, valuesLeft)
 	}
 	return nil
 }
@@ -319,14 +310,13 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 }
 
 func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
-	backoff := minBackoff
-	numRetries := 0
+	backoff := resetBackoff()
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(numRetries))
+		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.numRetries))
 	}()
 
 	var err error
-	for ; numRetries < maxRetries; numRetries++ {
+	for !backoff.finished() {
 		err = instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})
@@ -340,10 +330,9 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
-					log.Warnf("DynamoDB error retry=%d, table=%v, err=%v", numRetries, *input.TableName, err)
+					log.Warnf("DynamoDB error retry=%d, table=%v, err=%v", backoff.numRetries, *input.TableName, err)
 				}
-				time.Sleep(backoff)
-				backoff = nextBackoff(backoff)
+				backoff.backoff()
 				continue
 			}
 			return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
@@ -542,6 +531,9 @@ func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, e
 // we need to provide a non-null, non-empty value for the range value.
 var placeholder = []byte{'c'}
 
+// Fetch a set of chunks from DynamoDB, handling retries and backoff.
+// Structure is identical to BatchWrite(), but operating on different datatypes
+// so cannot share implementation.  If you fix a bug here fix it there too.
 func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{"numChunks", len(chunks)})
 	defer sp.Finish()
@@ -556,20 +548,21 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 	result := []Chunk{}
 	unprocessed := dynamoDBReadRequest{}
-	backoff, numRetries := minBackoff, 0
+	backoff := resetBackoff()
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(numRetries))
+		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.numRetries))
 	}()
 
-	for outstanding.Len()+unprocessed.Len() > 0 && numRetries < maxRetries {
+	for outstanding.Len()+unprocessed.Len() > 0 && !backoff.finished() {
 		requests := dynamoDBReadRequest{}
-		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
+		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
 
 		request := a.batchGetItemRequestFn(ctx, &dynamodb.BatchGetItemInput{
 			RequestItems:           requests,
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
+
 		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
 			return request.Send()
 		})
@@ -589,9 +582,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				unprocessed.TakeReqs(requests, -1)
-				time.Sleep(backoff)
-				backoff = nextBackoff(backoff)
-				numRetries++
+				backoff.backoff()
 				continue
 			}
 
@@ -608,18 +599,18 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		// If there are unprocessed items, backoff and retry those items.
 		if unprocessedKeys := response.UnprocessedKeys; unprocessedKeys != nil && dynamoDBReadRequest(unprocessedKeys).Len() > 0 {
 			unprocessed.TakeReqs(unprocessedKeys, -1)
-			time.Sleep(backoff)
-			backoff = nextBackoff(backoff)
+			// I am unclear why we don't count here; perhaps the idea is
+			// that while we are making _some_ progress we should carry on.
+			backoff.backoffWithoutCounting()
 			continue
 		}
 
-		backoff = minBackoff
-		numRetries = 0
+		backoff = resetBackoff()
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
 		// Return the chunks we did fetch, because partial results may be useful
-		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", numRetries, valuesLeft)
+		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", backoff.numRetries, valuesLeft)
 	}
 	return result, nil
 }
@@ -810,7 +801,7 @@ func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte)
 	})
 }
 
-// Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
+// Fill 'b' with ReadRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
 func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 	outLen, inLen := b.Len(), from.Len()
 	toFill := inLen
@@ -839,16 +830,6 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 	}
 }
 
-func nextBackoff(lastBackoff time.Duration) time.Duration {
-	// Based on the "Decorrelated Jitter" approach from https://www.awsarchitectureblog.com/2015/03/backoff.html
-	// sleep = min(cap, random_between(base, sleep * 3))
-	backoff := minBackoff + time.Duration(rand.Int63n(int64((lastBackoff*3)-minBackoff)))
-	if backoff > maxBackoff {
-		backoff = maxBackoff
-	}
-	return backoff
-}
-
 func recordDynamoError(tableName string, err error, operation string) {
 	if awsErr, ok := err.(awserr.Error); ok {
 		dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
diff --git a/backoff.go b/backoff.go
new file mode 100644
index 0000000000000..7b5735c940aa5
--- /dev/null
+++ b/backoff.go
@@ -0,0 +1,44 @@
+package chunk
+
+import (
+	"math/rand"
+	"time"
+)
+
+const (
+	// Backoff for dynamoDB requests, to match AWS lib - see:
+	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
+	minBackoff = 50 * time.Millisecond
+	maxBackoff = 50 * time.Second
+	maxRetries = 20
+)
+
+type backoff struct {
+	numRetries int
+	duration   time.Duration
+}
+
+func resetBackoff() backoff {
+	return backoff{numRetries: 0, duration: minBackoff}
+}
+
+func (b backoff) finished() bool {
+	return b.numRetries >= maxRetries
+}
+
+func (b *backoff) backoff() {
+	b.numRetries++
+	b.backoffWithoutCounting()
+}
+
+func (b *backoff) backoffWithoutCounting() {
+	if !b.finished() {
+		time.Sleep(b.duration)
+	}
+	// Based on the "Decorrelated Jitter" approach from https://www.awsarchitectureblog.com/2015/03/backoff.html
+	// sleep = min(cap, random_between(base, sleep * 3))
+	b.duration = minBackoff + time.Duration(rand.Int63n(int64((b.duration*3)-minBackoff)))
+	if b.duration > maxBackoff {
+		b.duration = maxBackoff
+	}
+}
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 4e37239bac21a..3d4acfbba2e17 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -3,7 +3,6 @@ package chunk
 import (
 	"context"
 	"fmt"
-	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
@@ -66,13 +65,12 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		d.limiter.Wait(ctx)
 	}
 
-	backoff, numRetries := minBackoff, 10
-	for i := 0; i < numRetries; i++ {
+	backoff := resetBackoff()
+	for !backoff.finished() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
-				util.WithContext(ctx).Errorf("Got error %v on try %d, backing off and retrying.", err, i)
-				time.Sleep(backoff)
-				backoff = nextBackoff(backoff)
+				util.WithContext(ctx).Errorf("Got error %v on try %d, backing off and retrying.", err, backoff.numRetries)
+				backoff.backoff()
 				continue
 			} else {
 				return err
@@ -80,7 +78,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		}
 		return nil
 	}
-	return fmt.Errorf("retried %d times, failing", numRetries)
+	return fmt.Errorf("retried %d times, failing", backoff.numRetries)
 }
 
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {

From 43dfbb53a7639c11ce440dcdc2d504fec8147966 Mon Sep 17 00:00:00 2001
From: Julius Volz <julius.volz@gmail.com>
Date: Sat, 25 Nov 2017 15:05:51 +0800
Subject: [PATCH 053/660] Convert Cortex to new Prom-2-style logging (#610)

This makes the codebase work better together with the Prometheus components
which expect an injected go-kit logger, and also makes things more consistent
overall.

I'm introducing a Cortex-wide shared logger for now so that this change doesn't
become even bigger. Eventually, it would be good to change all Cortex
components to take injected loggers.
---
 BUILD.bazel                |  6 ++++--
 aws_storage_client.go      |  6 +++---
 aws_storage_client_test.go |  4 ++--
 chunk_cache.go             |  6 +++---
 chunk_store.go             | 15 ++++++++-------
 chunk_store_test.go        |  7 +++----
 dynamodb_table_client.go   |  3 ++-
 inmemory_storage_client.go | 17 +++++++++--------
 memcache_client.go         |  7 ++++---
 storage/BUILD.bazel        |  3 ++-
 storage/factory.go         |  8 ++++----
 table_manager.go           | 19 ++++++++++---------
 12 files changed, 54 insertions(+), 47 deletions(-)

diff --git a/BUILD.bazel b/BUILD.bazel
index 4a593d0459be3..26031584ac6ed 100644
--- a/BUILD.bazel
+++ b/BUILD.bazel
@@ -4,6 +4,7 @@ go_library(
     name = "go_default_library",
     srcs = [
         "aws_storage_client.go",
+        "backoff.go",
         "by_key.go",
         "chunk.go",
         "chunk_cache.go",
@@ -35,10 +36,11 @@ go_library(
         "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
         "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
+        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
         "//vendor/github.com/golang/snappy:go_default_library",
+        "//vendor/github.com/opentracing/opentracing-go:go_default_library",
         "//vendor/github.com/pkg/errors:go_default_library",
         "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
-        "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
         "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
         "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
@@ -79,8 +81,8 @@ go_test(
         "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
         "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
         "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
+        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
         "//vendor/github.com/pkg/errors:go_default_library",
-        "//vendor/github.com/prometheus/common/log:go_default_library",
         "//vendor/github.com/prometheus/common/model:go_default_library",
         "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
         "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
diff --git a/aws_storage_client.go b/aws_storage_client.go
index bca00ecf49800..f42fd75cc9fef 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -9,6 +9,7 @@ import (
 	"net/url"
 	"strings"
 
+	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
 
 	"github.com/aws/aws-sdk-go/aws"
@@ -21,7 +22,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/log"
 
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
@@ -330,7 +330,7 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
-					log.Warnf("DynamoDB error retry=%d, table=%v, err=%v", backoff.numRetries, *input.TableName, err)
+					level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.numRetries, "table", *input.TableName, "err", err)
 				}
 				backoff.backoff()
 				continue
@@ -854,7 +854,7 @@ func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 	}
 	path := strings.TrimPrefix(awsURL.Path, "/")
 	if len(path) > 0 {
-		log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+		level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 	}
 	config, err := awscommon.ConfigFromURL(awsURL)
 	if err != nil {
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 6a077f6db2dd9..80df93a1530fb 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -20,7 +20,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
-	"github.com/prometheus/common/log"
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
@@ -227,7 +227,7 @@ func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.Que
 						continue
 					}
 				} else {
-					log.Warnf("Unsupported FilterExpression: %s", *input.FilterExpression)
+					level.Warn(util.Logger).Log("msg", "unsupported FilterExpression", "expression", *input.FilterExpression)
 				}
 			}
 		}
diff --git a/chunk_cache.go b/chunk_cache.go
index 70cb2dbe12403..fd9b64ed05911 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -7,8 +7,8 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/log"
 	"github.com/weaveworks/common/instrument"
 
 	"github.com/weaveworks/cortex/pkg/util"
@@ -164,7 +164,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 
 		if err := chunks[i].Decode(item.Value); err != nil {
 			memcacheCorrupt.Inc()
-			util.WithContext(ctx).Errorf("Failed to decode chunk from cache: %v", err)
+			level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "failed to decode chunk from cache", "err", err)
 			missing = append(missing, chunks[i])
 			continue
 		}
@@ -213,7 +213,7 @@ func (c *Cache) writeBackLoop() {
 		case bgWrite := <-c.bgWrites:
 			err := c.StoreChunk(context.Background(), bgWrite.key, bgWrite.buf)
 			if err != nil {
-				log.Errorf("Error writing to memcache: %v", err)
+				level.Error(util.Logger).Log("msg", "error writing to memcache", "err", err)
 			}
 		case <-c.quit:
 			return
diff --git a/chunk_store.go b/chunk_store.go
index f5dfd64583716..02c8b5feed455 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"sort"
 
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -157,13 +158,13 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	now := model.Now()
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
-		util.WithContext(ctx).Debugf("Whole timerange %v..%v in future (now=%v) yield empty resultset", through, from, now)
+		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
 		return nil, nil
 	}
 
 	if through.After(now) {
 		// time-span end is in future ... regard as legal
-		util.WithContext(ctx).Debugf("Adjusting end timerange=%v from future to now=%v", through, now)
+		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
 		through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
@@ -186,7 +187,7 @@ func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Tim
 }
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
-	logger := util.WithContext(ctx)
+	logger := util.WithContext(ctx, util.Logger)
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
 	if err != nil {
@@ -205,14 +206,14 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	// Now fetch the actual chunk data from Memcache / S3
 	fromCache, missing, err := c.cache.FetchChunkData(ctx, filtered)
 	if err != nil {
-		logger.Warnf("Error fetching from cache: %v", err)
+		level.Warn(logger).Log("msg", "error fetching from cache", "err", err)
 	}
 
 	fromStorage, err := c.storage.GetChunks(ctx, missing)
 
 	// Always cache any chunks we did get
 	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
-		logger.Warnf("Could not store chunks in chunk cache: %v", cacheErr)
+		level.Warn(logger).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
 	}
 
 	if err != nil {
@@ -418,7 +419,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 		}
 		return !lastPage
 	}); err != nil {
-		util.WithContext(ctx).Errorf("Error querying storage: %v", err)
+		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
 		return nil, err
 	}
 
@@ -454,7 +455,7 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 		}
 
 		if matcher != nil && !matcher.Matches(string(labelValue)) {
-			util.WithContext(ctx).Debug("Dropping chunk for non-matching metric ", chunk.Metric)
+			level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "dropping chunk for non-matching metric", "metric", chunk.Metric)
 			continue
 		}
 		chunkSet = append(chunkSet, chunk)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 75ebf344d2998..8efef456c1acd 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -8,7 +8,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/prometheus/common/log"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
@@ -193,7 +192,7 @@ func TestChunkStore_Get(t *testing.T) {
 	} {
 		for _, schema := range schemas {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, StoreConfig{
 					schemaFactory: schema.fn,
 				})
@@ -330,7 +329,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	} {
 		for _, schema := range schemas {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				log.Infoln("========= Running query", tc.query, "with schema", schema.name)
+				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, StoreConfig{
 					schemaFactory: schema.fn,
 				})
@@ -468,7 +467,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			ts,
 			ts.Add(chunkLen*time.Second),
 		)
-		log.Infof("Loop %d", i)
+		t.Logf("Loop %d", i)
 		err := store.Put(ctx, []Chunk{chunk})
 		require.NoError(t, err)
 	}
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 3d4acfbba2e17..1ac793ec3566d 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -10,6 +10,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/time/rate"
 
@@ -69,7 +70,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 	for !backoff.finished() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
-				util.WithContext(ctx).Errorf("Got error %v on try %d, backing off and retrying.", err, backoff.numRetries)
+				level.Warn(util.WithContext(ctx, util.Logger)).Log("msg", "got error, backing off and retrying", "err", err, "retry", backoff.numRetries)
 				backoff.backoff()
 				continue
 			} else {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 436af529ed83e..e58bfd644a228 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 
 	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -127,7 +128,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 		}
 		seenWrites[key] = true
 
-		util.WithContext(ctx).Debugf("Write %s/%x", req.hashValue, req.rangeValue)
+		level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "write", "hash", req.hashValue, "range", req.rangeValue)
 
 		items := table.items[req.hashValue]
 
@@ -158,7 +159,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 
 // QueryPages implements StorageClient.
 func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
-	logger := util.WithContext(ctx)
+	logger := util.WithContext(ctx, util.Logger)
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
@@ -173,7 +174,7 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 	}
 
 	if query.RangeValuePrefix != nil {
-		logger.Debugf("Lookup prefix %s/%x (%d)", query.HashValue, query.RangeValuePrefix, len(items))
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup prefix", "hash", query.HashValue, "range_prefix", query.RangeValuePrefix, "num_items", len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
@@ -189,33 +190,33 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 			return !bytes.HasPrefix(items[i+j].rangeValue, query.RangeValuePrefix)
 		})
 
-		logger.Debugf("  found range [%d:%d)", i, i+j)
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "found range", "from_inclusive", i, "to_exclusive", i+j)
 		if i > len(items) || j == 0 {
 			return nil
 		}
 		items = items[i : i+j]
 
 	} else if query.RangeValueStart != nil {
-		logger.Debugf("Lookup range %s/%x -> ... (%d)", query.HashValue, query.RangeValueStart, len(items))
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup range", "hash", query.HashValue, "range_start", query.RangeValueStart, "num_items", len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
 			return bytes.Compare(items[i].rangeValue, query.RangeValueStart) >= 0
 		})
 
-		logger.Debugf("  found range [%d)", i)
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "found range [%d)", "index", i)
 		if i > len(items) {
 			return nil
 		}
 		items = items[i:]
 
 	} else {
-		logger.Debugf("Lookup %s/* (%d)", query.HashValue, len(items))
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup", "hash", query.HashValue, "num_items", len(items))
 	}
 
 	// Filters
 	if query.ValueEqual != nil {
-		logger.Debugf("Filter Value EQ = %s", query.ValueEqual)
+		level.Debug(util.WithContext(ctx, logger)).Log("msg", "filter by equality", "value_equal", query.ValueEqual)
 
 		filtered := make([]mockItem, 0)
 		for _, v := range items {
diff --git a/memcache_client.go b/memcache_client.go
index 1b5b976d4a19f..f88556479ba5c 100644
--- a/memcache_client.go
+++ b/memcache_client.go
@@ -9,7 +9,8 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/prometheus/common/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // MemcacheClient is a memcache client that gets its server list from SRV
@@ -56,7 +57,7 @@ func NewMemcacheClient(cfg MemcacheConfig) *MemcacheClient {
 	}
 	err := newClient.updateMemcacheServers()
 	if err != nil {
-		log.Errorf("Error setting memcache servers to '%v': %v", cfg.Host, err)
+		level.Error(util.Logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
 	}
 
 	newClient.wait.Add(1)
@@ -79,7 +80,7 @@ func (c *MemcacheClient) updateLoop(updateInterval time.Duration) error {
 		case <-ticker.C:
 			err = c.updateMemcacheServers()
 			if err != nil {
-				log.Warnf("Error updating memcache servers: %v", err)
+				level.Warn(util.Logger).Log("msg", "error updating memcache servers", "err", err)
 			}
 		case <-c.quit:
 			ticker.Stop()
diff --git a/storage/BUILD.bazel b/storage/BUILD.bazel
index 62179e57ef940..7b5ede1f40cf8 100644
--- a/storage/BUILD.bazel
+++ b/storage/BUILD.bazel
@@ -8,6 +8,7 @@ go_library(
     deps = [
         "//pkg/chunk:go_default_library",
         "//pkg/chunk/gcp:go_default_library",
-        "//vendor/github.com/prometheus/common/log:go_default_library",
+        "//pkg/util:go_default_library",
+        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
     ],
 )
diff --git a/storage/factory.go b/storage/factory.go
index 017d9b74cce38..ad8e11758a97a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -6,10 +6,10 @@ import (
 	"fmt"
 	"strings"
 
-	"github.com/prometheus/common/log"
-
+	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // Config chooses which storage client to use.
@@ -34,7 +34,7 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageCl
 	case "aws":
 		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
-			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return chunk.NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
@@ -52,7 +52,7 @@ func NewTableClient(cfg Config) (chunk.TableClient, error) {
 	case "aws":
 		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
-			log.Warnf("Ignoring DynamoDB URL path: %v.", path)
+			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return chunk.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
 	case "gcp":
diff --git a/table_manager.go b/table_manager.go
index d87ed1b46aa75..a334ae5c3ac94 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -10,11 +10,12 @@ import (
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/log"
 
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
@@ -136,7 +137,7 @@ func (m *TableManager) loop() {
 	if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
 		return m.syncTables(ctx)
 	}); err != nil {
-		log.Errorf("Error syncing tables: %v", err)
+		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 	}
 
 	for {
@@ -145,7 +146,7 @@ func (m *TableManager) loop() {
 			if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
 				return m.syncTables(ctx)
 			}); err != nil {
-				log.Errorf("Error syncing tables: %v", err)
+				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 			}
 		case <-m.done:
 			return
@@ -155,7 +156,7 @@ func (m *TableManager) loop() {
 
 func (m *TableManager) syncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
-	log.Infof("Expecting %d tables: %+v", len(expected), expected)
+	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", expected)
 
 	toCreate, toCheckThroughput, err := m.partitionTables(ctx, expected)
 	if err != nil {
@@ -252,7 +253,7 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 
 func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, desc := range descriptions {
-		log.Infof("Creating table %s", desc.Name)
+		level.Info(util.Logger).Log("msg", "creating table", "table", desc.Name)
 		err := m.client.CreateTable(ctx, desc)
 		if err != nil {
 			return err
@@ -263,14 +264,14 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, expected := range descriptions {
-		log.Infof("Checking provisioned throughput on table %s", expected.Name)
+		level.Info(util.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
 		current, status, err := m.client.DescribeTable(ctx, expected.Name)
 		if err != nil {
 			return err
 		}
 
 		if status != dynamodb.TableStatusActive {
-			log.Infof("Skipping update on  table %s, not yet ACTIVE (%s)", expected.Name, status)
+			level.Info(util.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name, "status", status)
 			continue
 		}
 
@@ -278,11 +279,11 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
 
 		if expected.Equals(current) {
-			log.Infof("  Provisioned throughput on table %s: read = %d, write = %d, skipping.", current.Name, current.ProvisionedRead, current.ProvisionedWrite)
+			level.Info(util.Logger).Log("msg", "provisioned throughput on table, skipping", "table", current.Name, "read", current.ProvisionedRead, "write", current.ProvisionedWrite)
 			continue
 		}
 
-		log.Infof("  Updating provisioned throughput on table %s from read = %d, write = %d to read = %d, write = %d", expected.Name, current.ProvisionedRead, current.ProvisionedWrite, expected.ProvisionedRead, expected.ProvisionedWrite)
+		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "old_read", expected.ProvisionedWrite)
 		err = m.client.UpdateTable(ctx, current, expected)
 		if err != nil {
 			return err

From 4b4de3057db035889e44217eb9d6fdec7400c818 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 4 Dec 2017 12:18:01 -0800
Subject: [PATCH 054/660] Unify four backoff implementations (#615)

* Replace four backoff implementations with common one
---
 aws_storage_client.go    | 47 ++++++++++++++++++++++++----------------
 backoff.go               | 44 -------------------------------------
 dynamodb_table_client.go | 10 ++++-----
 3 files changed, 33 insertions(+), 68 deletions(-)
 delete mode 100644 backoff.go

diff --git a/aws_storage_client.go b/aws_storage_client.go
index f42fd75cc9fef..5a1132af09e1d 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -8,6 +8,7 @@ import (
 	"io/ioutil"
 	"net/url"
 	"strings"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
@@ -43,6 +44,14 @@ const (
 	dynamoDBMaxReadBatchSize  = 100
 )
 
+var backoffConfig = util.BackoffConfig{
+	// Backoff for dynamoDB requests, to match AWS lib - see:
+	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
+	MinBackoff: 50 * time.Millisecond,
+	MaxBackoff: 50 * time.Second,
+	MaxRetries: 20,
+}
+
 var (
 	dynamoRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
@@ -182,12 +191,12 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
-	backoff := resetBackoff()
+	backoff := util.NewBackoff(backoffConfig, ctx.Done())
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.numRetries))
+		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.NumRetries()))
 	}()
 
-	for outstanding.Len()+unprocessed.Len() > 0 && !backoff.finished() {
+	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBWriteBatch{}
 		requests.TakeReqs(outstanding, dynamoDBMaxWriteBatchSize)
 		requests.TakeReqs(unprocessed, dynamoDBMaxWriteBatchSize)
@@ -216,7 +225,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				unprocessed.TakeReqs(requests, -1)
-				backoff.backoff()
+				backoff.Wait()
 				continue
 			}
 
@@ -229,15 +238,15 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 			unprocessed.TakeReqs(unprocessedItems, -1)
 			// I am unclear why we don't count here; perhaps the idea is
 			// that while we are making _some_ progress we should carry on.
-			backoff.backoffWithoutCounting()
+			backoff.WaitWithoutCounting()
 			continue
 		}
 
-		backoff = resetBackoff()
+		backoff.Reset()
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", backoff.numRetries, valuesLeft)
+		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", backoff.NumRetries(), valuesLeft)
 	}
 	return nil
 }
@@ -310,13 +319,13 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 }
 
 func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
-	backoff := resetBackoff()
+	backoff := util.NewBackoff(backoffConfig, ctx.Done())
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.numRetries))
+		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
 	}()
 
 	var err error
-	for !backoff.finished() {
+	for backoff.Ongoing() {
 		err = instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
 			return page.Send()
 		})
@@ -330,9 +339,9 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
 				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
-					level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.numRetries, "table", *input.TableName, "err", err)
+					level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.NumRetries(), "table", *input.TableName, "err", err)
 				}
-				backoff.backoff()
+				backoff.Wait()
 				continue
 			}
 			return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
@@ -548,12 +557,12 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 	result := []Chunk{}
 	unprocessed := dynamoDBReadRequest{}
-	backoff := resetBackoff()
+	backoff := util.NewBackoff(backoffConfig, ctx.Done())
 	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.numRetries))
+		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.NumRetries()))
 	}()
 
-	for outstanding.Len()+unprocessed.Len() > 0 && !backoff.finished() {
+	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBReadRequest{}
 		requests.TakeReqs(outstanding, dynamoDBMaxReadBatchSize)
 		requests.TakeReqs(unprocessed, dynamoDBMaxReadBatchSize)
@@ -582,7 +591,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				unprocessed.TakeReqs(requests, -1)
-				backoff.backoff()
+				backoff.Wait()
 				continue
 			}
 
@@ -601,16 +610,16 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 			unprocessed.TakeReqs(unprocessedKeys, -1)
 			// I am unclear why we don't count here; perhaps the idea is
 			// that while we are making _some_ progress we should carry on.
-			backoff.backoffWithoutCounting()
+			backoff.WaitWithoutCounting()
 			continue
 		}
 
-		backoff = resetBackoff()
+		backoff.Reset()
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
 		// Return the chunks we did fetch, because partial results may be useful
-		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", backoff.numRetries, valuesLeft)
+		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", backoff.NumRetries(), valuesLeft)
 	}
 	return result, nil
 }
diff --git a/backoff.go b/backoff.go
deleted file mode 100644
index 7b5735c940aa5..0000000000000
--- a/backoff.go
+++ /dev/null
@@ -1,44 +0,0 @@
-package chunk
-
-import (
-	"math/rand"
-	"time"
-)
-
-const (
-	// Backoff for dynamoDB requests, to match AWS lib - see:
-	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
-	minBackoff = 50 * time.Millisecond
-	maxBackoff = 50 * time.Second
-	maxRetries = 20
-)
-
-type backoff struct {
-	numRetries int
-	duration   time.Duration
-}
-
-func resetBackoff() backoff {
-	return backoff{numRetries: 0, duration: minBackoff}
-}
-
-func (b backoff) finished() bool {
-	return b.numRetries >= maxRetries
-}
-
-func (b *backoff) backoff() {
-	b.numRetries++
-	b.backoffWithoutCounting()
-}
-
-func (b *backoff) backoffWithoutCounting() {
-	if !b.finished() {
-		time.Sleep(b.duration)
-	}
-	// Based on the "Decorrelated Jitter" approach from https://www.awsarchitectureblog.com/2015/03/backoff.html
-	// sleep = min(cap, random_between(base, sleep * 3))
-	b.duration = minBackoff + time.Duration(rand.Int63n(int64((b.duration*3)-minBackoff)))
-	if b.duration > maxBackoff {
-		b.duration = maxBackoff
-	}
-}
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 1ac793ec3566d..27455ce331a51 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -66,12 +66,12 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		d.limiter.Wait(ctx)
 	}
 
-	backoff := resetBackoff()
-	for !backoff.finished() {
+	backoff := util.NewBackoff(backoffConfig, ctx.Done())
+	for backoff.Ongoing() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
-				level.Warn(util.WithContext(ctx, util.Logger)).Log("msg", "got error, backing off and retrying", "err", err, "retry", backoff.numRetries)
-				backoff.backoff()
+				level.Warn(util.WithContext(ctx, util.Logger)).Log("msg", "got error, backing off and retrying", "err", err, "retry", backoff.NumRetries())
+				backoff.Wait()
 				continue
 			} else {
 				return err
@@ -79,7 +79,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		}
 		return nil
 	}
-	return fmt.Errorf("retried %d times, failing", backoff.numRetries)
+	return fmt.Errorf("retried %d times, failing", backoff.NumRetries())
 }
 
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {

From 1c22cd19b73271a47ded26a2f6149924a1736137 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 21 Dec 2017 18:24:56 +0000
Subject: [PATCH 055/660] Break out of backoff loop if Context is terminated
 (#618)

* Remove unused WatchPrefix()

* Break out of backoff loop if Context is terminated

Previously we would only stop if the Context signalled cancellation
while we were in a sleep. Now, loops that check `backoff.Ongoing()`
will end if the Context is terminated any time.

A new `backoff.Err()` function is added to report back the reason for termination
---
 aws_storage_client.go    | 14 +++++++-------
 dynamodb_table_client.go |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 5a1132af09e1d..376917e1c591a 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -191,7 +191,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
-	backoff := util.NewBackoff(backoffConfig, ctx.Done())
+	backoff := util.NewBackoff(ctx, backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.NumRetries()))
 	}()
@@ -246,9 +246,9 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		return fmt.Errorf("failed to write chunk after %d retries, %d values remaining", backoff.NumRetries(), valuesLeft)
+		return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err())
 	}
-	return nil
+	return backoff.Err()
 }
 
 func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
@@ -319,7 +319,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 }
 
 func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
-	backoff := util.NewBackoff(backoffConfig, ctx.Done())
+	backoff := util.NewBackoff(ctx, backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
 	}()
@@ -350,7 +350,7 @@ func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryIn
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
 		return dynamoDBReadResponse(queryOutput.Items), nil
 	}
-	return nil, fmt.Errorf("QueryPage error: maxRetries exceeded for table %v, last error %v", *input.TableName, err)
+	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
 }
 
 type dynamoDBRequest interface {
@@ -557,7 +557,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 	result := []Chunk{}
 	unprocessed := dynamoDBReadRequest{}
-	backoff := util.NewBackoff(backoffConfig, ctx.Done())
+	backoff := util.NewBackoff(ctx, backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.NumRetries()))
 	}()
@@ -619,7 +619,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
 		// Return the chunks we did fetch, because partial results may be useful
-		return result, fmt.Errorf("failed to query chunks after %d retries, %d values remaining", backoff.NumRetries(), valuesLeft)
+		return result, fmt.Errorf("failed to query chunks, %d values remaining: %s", valuesLeft, backoff.Err())
 	}
 	return result, nil
 }
diff --git a/dynamodb_table_client.go b/dynamodb_table_client.go
index 27455ce331a51..cc1f5b4a12f1d 100644
--- a/dynamodb_table_client.go
+++ b/dynamodb_table_client.go
@@ -66,7 +66,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		d.limiter.Wait(ctx)
 	}
 
-	backoff := util.NewBackoff(backoffConfig, ctx.Done())
+	backoff := util.NewBackoff(ctx, backoffConfig)
 	for backoff.Ongoing() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
@@ -79,7 +79,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		}
 		return nil
 	}
-	return fmt.Errorf("retried %d times, failing", backoff.NumRetries())
+	return backoff.Err()
 }
 
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {

From c1dfe975142e6e74adc059801d640be4aee931ca Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 3 Jan 2018 18:08:35 +0000
Subject: [PATCH 056/660] Reuse Snappy decoder, for performance (#633)

* Add some benchmarks for encoding and decoding chunks

* Re-use snappy decoder, for performance

Calling Reset() rather than discarding and creating a new one allows
buffers to be re-used, which reduces garbage-collection.
---
 aws_storage_client.go      |  6 ++--
 chunk.go                   | 19 +++++++++++--
 chunk_cache.go             |  3 +-
 chunk_test.go              | 56 +++++++++++++++++++++++++++++++++++++-
 gcp/storage_client.go      |  3 +-
 inmemory_storage_client.go |  3 +-
 6 files changed, 81 insertions(+), 9 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 376917e1c591a..69d88e2832016 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -530,7 +530,8 @@ func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, e
 	if err != nil {
 		return Chunk{}, err
 	}
-	if err := chunk.Decode(buf); err != nil {
+	decodeContext := NewDecodeContext()
+	if err := chunk.Decode(decodeContext, buf); err != nil {
 		return Chunk{}, err
 	}
 	return chunk, nil
@@ -626,6 +627,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 
 func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]Chunk) ([]Chunk, error) {
 	result := []Chunk{}
+	decodeContext := NewDecodeContext()
 	for _, items := range response.Responses {
 		for _, item := range items {
 			key, ok := item[hashKey]
@@ -643,7 +645,7 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 				return nil, fmt.Errorf("Got response from DynamoDB with no value: %+v", item)
 			}
 
-			if err := chunk.Decode(buf.B); err != nil {
+			if err := chunk.Decode(decodeContext, buf.B); err != nil {
 				return nil, err
 			}
 
diff --git a/chunk.go b/chunk.go
index d4a411dc95fee..f590fd7fb81d0 100644
--- a/chunk.go
+++ b/chunk.go
@@ -210,9 +210,21 @@ func (c *Chunk) Encode() ([]byte, error) {
 	return output, nil
 }
 
+// DecodeContext holds data that can be re-used between decodes of different chunks
+type DecodeContext struct {
+	reader *snappy.Reader
+}
+
+// NewDecodeContext creates a new, blank, DecodeContext
+func NewDecodeContext() *DecodeContext {
+	return &DecodeContext{
+		reader: snappy.NewReader(nil),
+	}
+}
+
 // Decode the chunk from the given buffer, and confirm the chunk is the one we
 // expected.
-func (c *Chunk) Decode(input []byte) error {
+func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	// Legacy chunks were written with metadata in the index.
 	if c.metadataInIndex {
 		var err error
@@ -236,10 +248,11 @@ func (c *Chunk) Decode(input []byte) error {
 		return err
 	}
 	var tempMetadata Chunk
-	err := json.NewDecoder(snappy.NewReader(&io.LimitedReader{
+	decodeContext.reader.Reset(&io.LimitedReader{
 		N: int64(metadataLen),
 		R: r,
-	})).Decode(&tempMetadata)
+	})
+	err := json.NewDecoder(decodeContext.reader).Decode(&tempMetadata)
 	if err != nil {
 		return err
 	}
diff --git a/chunk_cache.go b/chunk_cache.go
index fd9b64ed05911..030f7cc0611af 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -155,6 +155,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 		return nil, chunks, err
 	}
 
+	decodeContext := NewDecodeContext()
 	for i, externalKey := range keys {
 		item, ok := items[externalKey]
 		if !ok {
@@ -162,7 +163,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 			continue
 		}
 
-		if err := chunks[i].Decode(item.Value); err != nil {
+		if err := chunks[i].Decode(decodeContext, item.Value); err != nil {
 			memcacheCorrupt.Inc()
 			level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "failed to decode chunk from cache", "err", err)
 			missing = append(missing, chunks[i])
diff --git a/chunk_test.go b/chunk_test.go
index 903da40c20ffb..168782efa89ef 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -43,6 +43,7 @@ func dummyChunkFor(metric model.Metric) Chunk {
 }
 
 func TestChunkCodec(t *testing.T) {
+	decodeContext := NewDecodeContext()
 	for i, c := range []struct {
 		chunk Chunk
 		err   error
@@ -90,7 +91,7 @@ func TestChunkCodec(t *testing.T) {
 				c.f(&have, buf)
 			}
 
-			err = have.Decode(buf)
+			err = have.Decode(decodeContext, buf)
 			require.Equal(t, c.err, errors.Cause(err))
 
 			if c.err == nil {
@@ -191,3 +192,56 @@ func TestChunksToMatrix(t *testing.T) {
 		require.Equal(t, c.expectedMatrix, matrix)
 	}
 }
+
+func benchmarkChunk() Chunk {
+	// This is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
+	return dummyChunkFor(model.Metric{
+		model.MetricNameLabel:              "container_cpu_usage_seconds_total",
+		"beta_kubernetes_io_arch":          "amd64",
+		"beta_kubernetes_io_instance_type": "c3.somesize",
+		"beta_kubernetes_io_os":            "linux",
+		"container_name":                   "some-name",
+		"cpu":                              "cpu01",
+		"failure_domain_beta_kubernetes_io_region": "somewhere-1",
+		"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
+		"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
+		"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
+		"instance": "ip-111-11-1-11.ec2.internal",
+		"job":      "kubernetes-cadvisor",
+		"kubernetes_io_hostname": "ip-111-11-1-11",
+		"monitor":                "prod",
+		"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
+		"namespace":              "kube-system",
+		"pod_name":               "some-other-name-5j8s8",
+	})
+}
+
+func BenchmarkEncode(b *testing.B) {
+	chunk := dummyChunk()
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		chunk.Encode()
+	}
+}
+
+func BenchmarkDecode1(b *testing.B)     { benchmarkDecode(b, 1) }
+func BenchmarkDecode100(b *testing.B)   { benchmarkDecode(b, 100) }
+func BenchmarkDecode10000(b *testing.B) { benchmarkDecode(b, 10000) }
+
+func benchmarkDecode(b *testing.B, batchSize int) {
+	chunk := benchmarkChunk()
+	buf, err := chunk.Encode()
+	require.NoError(b, err)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		decodeContext := NewDecodeContext()
+		chunks := make([]Chunk, batchSize)
+		for j := 0; j < batchSize; j++ {
+			chunks[j].Decode(decodeContext, buf)
+		}
+	}
+}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index bbf6af38111f6..447adf62c7584 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -222,6 +222,7 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 		for i := 0; i < len(keys); i += maxRowReads {
 			page := keys[i:util.Min(i+maxRowReads, len(keys))]
 			go func(page bigtable.RowList) {
+				decodeContext := chunk.NewDecodeContext()
 				// rows are returned in key order, not order in row list
 				if err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
 					chunk, ok := chunks[row.Key()]
@@ -230,7 +231,7 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 						return false
 					}
 
-					err := chunk.Decode(row[columnFamily][0].Value)
+					err := chunk.Decode(decodeContext, row[columnFamily][0].Value)
 					if err != nil {
 						errs <- err
 						return false
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index e58bfd644a228..443c9f12b0860 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -256,6 +256,7 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	decodeContext := NewDecodeContext()
 	result := []Chunk{}
 	for _, chunk := range chunkSet {
 		key := chunk.ExternalKey()
@@ -263,7 +264,7 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 		if !ok {
 			return nil, fmt.Errorf("%v not found", key)
 		}
-		if err := chunk.Decode(buf); err != nil {
+		if err := chunk.Decode(decodeContext, buf); err != nil {
 			return nil, err
 		}
 		result = append(result, chunk)

From 80dae269c1e6a1a17a23a7335825e8e7f096cdc1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 4 Jan 2018 18:16:47 +0000
Subject: [PATCH 057/660] Re-use Metric when decoding chunks with the same
 Fingerprint (#634)

* Re-use Metric when decoding chunks with the same Fingerprint

For performance: we avoid parsing the JSON for each subsequent chunk
in the same timeseries.

Metric is the set of label=value pairs for a timeseries.
Fingerprint is a 64-bit hash of the Metric and we already assume that
chunks with the same Fingerprint must have the same Metric.

* Refactor: extract Metric decoding
---
 chunk.go | 30 ++++++++++++++++++++++++++----
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/chunk.go b/chunk.go
index f590fd7fb81d0..791ab9fa21733 100644
--- a/chunk.go
+++ b/chunk.go
@@ -212,16 +212,31 @@ func (c *Chunk) Encode() ([]byte, error) {
 
 // DecodeContext holds data that can be re-used between decodes of different chunks
 type DecodeContext struct {
-	reader *snappy.Reader
+	reader  *snappy.Reader
+	metrics map[model.Fingerprint]model.Metric
 }
 
 // NewDecodeContext creates a new, blank, DecodeContext
 func NewDecodeContext() *DecodeContext {
 	return &DecodeContext{
-		reader: snappy.NewReader(nil),
+		reader:  snappy.NewReader(nil),
+		metrics: make(map[model.Fingerprint]model.Metric),
 	}
 }
 
+// If we have decoded a chunk with the same fingerprint before, re-use its Metric, otherwise parse it
+func (dc *DecodeContext) metric(fingerprint model.Fingerprint, buf []byte) (model.Metric, error) {
+	metric, found := dc.metrics[fingerprint]
+	if !found {
+		err := json.NewDecoder(bytes.NewReader(buf)).Decode(&metric)
+		if err != nil {
+			return nil, errors.Wrap(err, "while parsing chunk metric")
+		}
+		dc.metrics[fingerprint] = metric
+	}
+	return metric, nil
+}
+
 // Decode the chunk from the given buffer, and confirm the chunk is the one we
 // expected.
 func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
@@ -247,7 +262,10 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	if err := binary.Read(r, binary.BigEndian, &metadataLen); err != nil {
 		return err
 	}
-	var tempMetadata Chunk
+	var tempMetadata struct {
+		Chunk
+		RawMetric json.RawMessage `json:"metric"` // Override to defer parsing
+	}
 	decodeContext.reader.Reset(&io.LimitedReader{
 		N: int64(metadataLen),
 		R: r,
@@ -266,7 +284,11 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 			return errors.WithStack(ErrWrongMetadata)
 		}
 	}
-	*c = tempMetadata
+	*c = tempMetadata.Chunk
+	c.Metric, err = decodeContext.metric(tempMetadata.Fingerprint, tempMetadata.RawMetric)
+	if err != nil {
+		return err
+	}
 
 	// Flag indicates if metadata was written to index, and if false implies
 	// we should read a header of the chunk containing the metadata.  Exists

From 03e5471304c8d3c1a6b7873ae1f5c8b4b5206ae9 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 4 Jan 2018 18:17:12 +0000
Subject: [PATCH 058/660] Avoid memory allocations when comparing chunk keys
 (#628)

* Add some benchmarks for chunk ByKey comparison

* Compare chunks by key without memory allocations

The replacement is a bit long-winded, but very fast in operation.

The ordering will not be the same: previously we sorted lexically, so
0x10 would come before 0x9. But the Fingerprint is a hash so we aren't
bothered about the exact ordering, just that chunks in the same series
come out together in time order.

Also we would now compare the same chunk with and without checksums
differently, but given the key format changed at a point in time you
can't get the same key with and without checksum.

* Extend decode benchmark to check checksum
---
 by_key.go      | 26 ++++++++++++++++++++------
 by_key_test.go | 34 ++++++++++++++++++++++++++++++++++
 chunk.go       |  2 +-
 chunk_test.go  | 11 ++++++++++-
 4 files changed, 65 insertions(+), 8 deletions(-)

diff --git a/by_key.go b/by_key.go
index 3508340502b50..c4674c1738c85 100644
--- a/by_key.go
+++ b/by_key.go
@@ -5,7 +5,21 @@ type ByKey []Chunk
 
 func (cs ByKey) Len() int           { return len(cs) }
 func (cs ByKey) Swap(i, j int)      { cs[i], cs[j] = cs[j], cs[i] }
-func (cs ByKey) Less(i, j int) bool { return cs[i].ExternalKey() < cs[j].ExternalKey() }
+func (cs ByKey) Less(i, j int) bool { return lessByKey(cs[i], cs[j]) }
+
+// This comparison uses all the same information as Chunk.ExternalKey()
+func lessByKey(a, b Chunk) bool {
+	return a.UserID < b.UserID ||
+		(a.UserID == b.UserID && (a.Fingerprint < b.Fingerprint ||
+			(a.Fingerprint == b.Fingerprint && (a.From < b.From ||
+				(a.From == b.From && (a.Through < b.Through ||
+					(a.Through == b.Through && a.Checksum < b.Checksum)))))))
+}
+
+func equalByKey(a, b Chunk) bool {
+	return a.UserID == b.UserID && a.Fingerprint == b.Fingerprint &&
+		a.From == b.From && a.Through == b.Through && a.Checksum == b.Checksum
+}
 
 // unique will remove duplicates from the input.
 // list must be sorted.
@@ -18,7 +32,7 @@ func unique(cs ByKey) ByKey {
 	result[0] = cs[0]
 	i, j := 0, 1
 	for j < len(cs) {
-		if result[i].ExternalKey() == cs[j].ExternalKey() {
+		if equalByKey(result[i], cs[j]) {
 			j++
 			continue
 		}
@@ -35,10 +49,10 @@ func merge(a, b ByKey) ByKey {
 	result := make(ByKey, 0, len(a)+len(b))
 	i, j := 0, 0
 	for i < len(a) && j < len(b) {
-		if a[i].ExternalKey() < b[j].ExternalKey() {
+		if lessByKey(a[i], b[j]) {
 			result = append(result, a[i])
 			i++
-		} else if a[i].ExternalKey() > b[j].ExternalKey() {
+		} else if lessByKey(b[j], a[i]) {
 			result = append(result, b[j])
 			j++
 		} else {
@@ -92,11 +106,11 @@ func nWayIntersect(sets []ByKey) ByKey {
 			result      = []Chunk{}
 		)
 		for i < len(left) && j < len(right) {
-			if left[i].ExternalKey() == right[j].ExternalKey() {
+			if equalByKey(left[i], right[j]) {
 				result = append(result, left[i])
 			}
 
-			if left[i].ExternalKey() < right[j].ExternalKey() {
+			if lessByKey(left[i], right[j]) {
 				i++
 			} else {
 				j++
diff --git a/by_key_test.go b/by_key_test.go
index 0e3a1bb3823df..cffa5a5e63141 100644
--- a/by_key_test.go
+++ b/by_key_test.go
@@ -1,9 +1,12 @@
 package chunk
 
 import (
+	"math/rand"
 	"reflect"
+	"sort"
 	"testing"
 
+	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -97,3 +100,34 @@ func TestNWayIntersect(t *testing.T) {
 		}
 	}
 }
+
+func BenchmarkByKeyLess(b *testing.B) {
+	a := ByKey{dummyChunk(), dummyChunk()}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		a.Less(0, 1)
+	}
+}
+
+func BenchmarkByKeySort100(b *testing.B)   { benchmarkByKeySort(b, 100) }
+func BenchmarkByKeySort1000(b *testing.B)  { benchmarkByKeySort(b, 1000) }
+func BenchmarkByKeySort10000(b *testing.B) { benchmarkByKeySort(b, 10000) }
+
+func benchmarkByKeySort(b *testing.B, batchSize int) {
+	chunks := []Chunk{}
+	for i := 0; i < batchSize; i++ {
+		chunk := dummyChunk()
+		// Tweak the dummy data slightly so the chunks are more likely to be different
+		// this makes the checksum wrong but we don't look at it
+		chunk.From += model.Time(rand.Intn(batchSize))
+		chunks = append(chunks, chunk)
+	}
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		sort.Sort(ByKey(chunks))
+	}
+}
diff --git a/chunk.go b/chunk.go
index 791ab9fa21733..fde99eb1aa0ec 100644
--- a/chunk.go
+++ b/chunk.go
@@ -280,7 +280,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	// we don't write the checksum to s3, so we have to copy the checksum in.
 	if c.ChecksumSet {
 		tempMetadata.Checksum, tempMetadata.ChecksumSet = c.Checksum, c.ChecksumSet
-		if c.ExternalKey() != tempMetadata.ExternalKey() {
+		if !equalByKey(*c, tempMetadata) {
 			return errors.WithStack(ErrWrongMetadata)
 		}
 	}
diff --git a/chunk_test.go b/chunk_test.go
index 168782efa89ef..a21313fd19167 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -239,9 +239,18 @@ func benchmarkDecode(b *testing.B, batchSize int) {
 
 	for i := 0; i < b.N; i++ {
 		decodeContext := NewDecodeContext()
+		b.StopTimer()
 		chunks := make([]Chunk, batchSize)
+		// Copy across the metadata so the check works out ok
 		for j := 0; j < batchSize; j++ {
-			chunks[j].Decode(decodeContext, buf)
+			chunks[j] = chunk
+			chunks[j].Metric = nil
+			chunks[j].Data = nil
+		}
+		b.StartTimer()
+		for j := 0; j < batchSize; j++ {
+			err := chunks[j].Decode(decodeContext, buf)
+			require.NoError(b, err)
 		}
 	}
 }

From 07fb53f6530eaea5adf59b369b7805b2d3472fd1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 4 Jan 2018 18:39:48 +0000
Subject: [PATCH 059/660] Fix mismatch between two merges

---
 chunk.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk.go b/chunk.go
index fde99eb1aa0ec..6faec1e448831 100644
--- a/chunk.go
+++ b/chunk.go
@@ -280,7 +280,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	// we don't write the checksum to s3, so we have to copy the checksum in.
 	if c.ChecksumSet {
 		tempMetadata.Checksum, tempMetadata.ChecksumSet = c.Checksum, c.ChecksumSet
-		if !equalByKey(*c, tempMetadata) {
+		if !equalByKey(*c, tempMetadata.Chunk) {
 			return errors.WithStack(ErrWrongMetadata)
 		}
 	}

From b5656f62ea49060fe2ece018e4fcb72ce740493e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 4 Jan 2018 20:22:55 +0000
Subject: [PATCH 060/660] Rewrite out-of-place comment (#637)

---
 chunk.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/chunk.go b/chunk.go
index 6faec1e448831..1e2fa66162232 100644
--- a/chunk.go
+++ b/chunk.go
@@ -290,9 +290,8 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 		return err
 	}
 
-	// Flag indicates if metadata was written to index, and if false implies
-	// we should read a header of the chunk containing the metadata.  Exists
-	// for backwards compatibility with older chunks, which did not have header.
+	// Older chunks always used DoubleDelta and did not write Encoding
+	// to JSON, so override if it has the zero value (Delta)
 	if c.Encoding == prom_chunk.Delta {
 		c.Encoding = prom_chunk.DoubleDelta
 	}

From 9bb889835bbbbb53b86d9845904aa10e62ebf639 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 8 Jan 2018 13:16:58 +0000
Subject: [PATCH 061/660] Use existing Fingerprint value instead of recomputing
 (#641)

---
 chunk.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/chunk.go b/chunk.go
index 1e2fa66162232..9d15df0e16852 100644
--- a/chunk.go
+++ b/chunk.go
@@ -317,13 +317,12 @@ func chunksToMatrix(chunks []Chunk) (model.Matrix, error) {
 	// Group chunks by series, sort and dedupe samples.
 	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
 	for _, c := range chunks {
-		fp := c.Metric.Fingerprint()
-		ss, ok := sampleStreams[fp]
+		ss, ok := sampleStreams[c.Fingerprint]
 		if !ok {
 			ss = &model.SampleStream{
 				Metric: c.Metric,
 			}
-			sampleStreams[fp] = ss
+			sampleStreams[c.Fingerprint] = ss
 		}
 
 		samples, err := c.Samples()

From 806f4d98f13189d02c34933d1a81d9e0bdb1a62d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 8 Jan 2018 13:17:15 +0000
Subject: [PATCH 062/660] More OpenTracing calls in chunk handling (#640)

---
 aws_storage_client.go |  4 ++++
 chunk.go              | 10 +++++++++-
 chunk_cache.go        |  8 ++++++++
 chunk_store.go        |  4 ++--
 chunk_test.go         |  3 ++-
 5 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 69d88e2832016..f031bf474e20b 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
@@ -423,6 +424,7 @@ type chunksPlusError struct {
 func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
 	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
 
 	var (
 		s3Chunks       []Chunk
@@ -473,7 +475,9 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 			finalChunks = append(finalChunks, in.chunks...)
 		}
 	}
+	sp.LogFields(otlog.Int("chunks fetched", len(finalChunks)))
 	if err != nil {
+		sp.LogFields(otlog.String("error", err.Error()))
 		return nil, err
 	}
 
diff --git a/chunk.go b/chunk.go
index 9d15df0e16852..6cae65b55571d 100644
--- a/chunk.go
+++ b/chunk.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"bytes"
+	"context"
 	"encoding/binary"
 	"encoding/json"
 	"fmt"
@@ -11,6 +12,8 @@ import (
 	"strings"
 
 	"github.com/golang/snappy"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
@@ -313,7 +316,11 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	})
 }
 
-func chunksToMatrix(chunks []Chunk) (model.Matrix, error) {
+func chunksToMatrix(ctx context.Context, chunks []Chunk) (model.Matrix, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "chunksToMatrix")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks", len(chunks)))
+
 	// Group chunks by series, sort and dedupe samples.
 	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
 	for _, c := range chunks {
@@ -332,6 +339,7 @@ func chunksToMatrix(chunks []Chunk) (model.Matrix, error) {
 
 		ss.Values = util.MergeSampleSets(ss.Values, samples)
 	}
+	sp.LogFields(otlog.Int("sample streams", len(sampleStreams)))
 
 	matrix := make(model.Matrix, 0, len(sampleStreams))
 	for _, ss := range sampleStreams {
diff --git a/chunk_cache.go b/chunk_cache.go
index 030f7cc0611af..3c5ae9459bf2a 100644
--- a/chunk_cache.go
+++ b/chunk_cache.go
@@ -8,6 +8,8 @@ import (
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/go-kit/kit/log/level"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/weaveworks/common/instrument"
 
@@ -134,6 +136,10 @@ func memcacheStatusCode(err error) string {
 
 // FetchChunkData gets chunks from the chunk cache.
 func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chunk, missing []Chunk, err error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "FetchChunkData")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
+
 	if c.memcache == nil {
 		return nil, chunks, nil
 	}
@@ -155,6 +161,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 		return nil, chunks, err
 	}
 
+	sp.LogFields(otlog.Int("chunks returned", len(items)))
 	decodeContext := NewDecodeContext()
 	for i, externalKey := range keys {
 		item, ok := items[externalKey]
@@ -172,6 +179,7 @@ func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chu
 
 		found = append(found, chunks[i])
 	}
+	sp.LogFields(otlog.Int("chunks found", len(found)), otlog.Int("chunks missing", len(missing)))
 
 	memcacheHits.Add(float64(len(found)))
 	return found, missing, nil
diff --git a/chunk_store.go b/chunk_store.go
index 02c8b5feed455..1862e21d2e5e8 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -183,7 +183,7 @@ func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Tim
 	if err != nil {
 		return nil, err
 	}
-	return chunksToMatrix(chunks)
+	return chunksToMatrix(ctx, chunks)
 }
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
@@ -301,7 +301,7 @@ outer:
 			}
 		}
 	}
-	return chunksToMatrix(chunks)
+	return chunksToMatrix(ctx, chunks)
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
diff --git a/chunk_test.go b/chunk_test.go
index a21313fd19167..3f566b8aa4aa7 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"fmt"
 	"sort"
 	"testing"
@@ -184,7 +185,7 @@ func TestChunksToMatrix(t *testing.T) {
 			},
 		},
 	} {
-		matrix, err := chunksToMatrix(c.chunks)
+		matrix, err := chunksToMatrix(context.Background(), c.chunks)
 		require.NoError(t, err)
 
 		sort.Sort(matrix)

From af6bddf68e62bf4b505141991ed7fab318f0a050 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 8 Jan 2018 15:21:05 +0000
Subject: [PATCH 063/660] Restrict to time interval when pulling samples from a
 chunk (#642)

And we already have a method which does this.

This speeds things up when the query time interval does not cover the
whole of a chunk.
---
 chunk.go            | 16 ++++++----------
 chunk_store.go      |  4 ++--
 chunk_store_test.go |  6 +++---
 chunk_test.go       |  8 ++++----
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/chunk.go b/chunk.go
index 6cae65b55571d..89fcdc4fcdd0c 100644
--- a/chunk.go
+++ b/chunk.go
@@ -17,6 +17,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/weaveworks/cortex/pkg/prom1/storage/metric"
 
 	errs "github.com/weaveworks/common/errors"
 	"github.com/weaveworks/cortex/pkg/util"
@@ -316,7 +317,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	})
 }
 
-func chunksToMatrix(ctx context.Context, chunks []Chunk) (model.Matrix, error) {
+func chunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "chunksToMatrix")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks", len(chunks)))
@@ -332,7 +333,7 @@ func chunksToMatrix(ctx context.Context, chunks []Chunk) (model.Matrix, error) {
 			sampleStreams[c.Fingerprint] = ss
 		}
 
-		samples, err := c.Samples()
+		samples, err := c.Samples(from, through)
 		if err != nil {
 			return nil, err
 		}
@@ -353,13 +354,8 @@ func chunksToMatrix(ctx context.Context, chunks []Chunk) (model.Matrix, error) {
 }
 
 // Samples returns all SamplePairs for the chunk.
-func (c *Chunk) Samples() ([]model.SamplePair, error) {
+func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, error) {
 	it := c.Data.NewIterator()
-	// TODO(juliusv): Pre-allocate this with the right length again once we
-	// add a method upstream to get the number of samples in a chunk.
-	var samples []model.SamplePair
-	for it.Scan() {
-		samples = append(samples, it.Value())
-	}
-	return samples, nil
+	interval := metric.Interval{OldestInclusive: from, NewestInclusive: through}
+	return prom_chunk.RangeValues(it, interval)
 }
diff --git a/chunk_store.go b/chunk_store.go
index 1862e21d2e5e8..ac421f86309b7 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -183,7 +183,7 @@ func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Tim
 	if err != nil {
 		return nil, err
 	}
-	return chunksToMatrix(ctx, chunks)
+	return chunksToMatrix(ctx, chunks, from, through)
 }
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
@@ -301,7 +301,7 @@ outer:
 			}
 		}
 	}
-	return chunksToMatrix(ctx, chunks)
+	return chunksToMatrix(ctx, chunks, from, through)
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 8efef456c1acd..fd3a6db955637 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -35,7 +35,7 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 }
 
 func createSampleStreamFrom(chunk Chunk) (*model.SampleStream, error) {
-	samples, err := chunk.Samples()
+	samples, err := chunk.Samples(chunk.From, chunk.Through)
 	if err != nil {
 		return nil, err
 	}
@@ -428,7 +428,7 @@ func TestChunkStoreRandom(t *testing.T) {
 			for _, chunk := range chunks {
 				assert.False(t, chunk.From.After(endTime))
 				assert.False(t, chunk.Through.Before(startTime))
-				samples, err := chunk.Samples()
+				samples, err := chunk.Samples(chunk.From, chunk.Through)
 				assert.NoError(t, err)
 				assert.Equal(t, 1, len(samples))
 				// TODO verify chunk contents
@@ -496,7 +496,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		for _, chunk := range chunks {
 			assert.False(t, chunk.From.After(endTime))
 			assert.False(t, chunk.Through.Before(startTime))
-			samples, err := chunk.Samples()
+			samples, err := chunk.Samples(chunk.From, chunk.Through)
 			assert.NoError(t, err)
 			assert.Equal(t, 1, len(samples))
 		}
diff --git a/chunk_test.go b/chunk_test.go
index 3f566b8aa4aa7..866f305059f13 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -140,10 +140,10 @@ func TestChunksToMatrix(t *testing.T) {
 		"toms": "code",
 	}
 	chunk1 := dummyChunkFor(metric)
-	chunk1Samples, err := chunk1.Samples()
+	chunk1Samples, err := chunk1.Samples(chunk1.From, chunk1.Through)
 	require.NoError(t, err)
 	chunk2 := dummyChunkFor(metric)
-	chunk2Samples, err := chunk2.Samples()
+	chunk2Samples, err := chunk2.Samples(chunk2.From, chunk2.Through)
 	require.NoError(t, err)
 
 	ss1 := &model.SampleStream{
@@ -158,7 +158,7 @@ func TestChunksToMatrix(t *testing.T) {
 		"toms": "code",
 	}
 	chunk3 := dummyChunkFor(otherMetric)
-	chunk3Samples, err := chunk3.Samples()
+	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
 	require.NoError(t, err)
 
 	ss2 := &model.SampleStream{
@@ -185,7 +185,7 @@ func TestChunksToMatrix(t *testing.T) {
 			},
 		},
 	} {
-		matrix, err := chunksToMatrix(context.Background(), c.chunks)
+		matrix, err := chunksToMatrix(context.Background(), c.chunks, chunk1.From, chunk3.Through)
 		require.NoError(t, err)
 
 		sort.Sort(matrix)

From 3c9f07a76346bbdc84610c2d30c1d04f855d1a5e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 9 Jan 2018 10:10:43 +0000
Subject: [PATCH 064/660] Change backoff algorithm to "full jitter" (#639)

* Change backoff algorithm to 'full jitter'

* Adjust backoff timing for different backoff algorithm

Previously first sleep was 50 ms then random between 50 and 150.
After this change we sleep rand(0-100) then rand(0-200) so the mean is
the same.
---
 aws_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index f031bf474e20b..59e3b6a60b29d 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -48,7 +48,7 @@ const (
 var backoffConfig = util.BackoffConfig{
 	// Backoff for dynamoDB requests, to match AWS lib - see:
 	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
-	MinBackoff: 50 * time.Millisecond,
+	MinBackoff: 100 * time.Millisecond,
 	MaxBackoff: 50 * time.Second,
 	MaxRetries: 20,
 }

From b54a047cb3771acf18df94875e45e2bc019f3c7d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 10 Jan 2018 16:50:36 +0000
Subject: [PATCH 065/660] Limit parallelism when fetching chunks from DynamoDB
 (#644)

If someone sends a query that requires 260,000 chunks we don't want
to hit DynamoDB from 260 goroutines in parallel.
---
 aws_storage_client.go      | 12 +++++++++---
 aws_storage_client_test.go |  9 +++++----
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index 59e3b6a60b29d..a0072b3711d0c 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -109,7 +109,8 @@ type DynamoDBConfig struct {
 	DynamoDB               util.URLValue
 	APILimit               float64
 	ApplicationAutoScaling util.URLValue
-	DynamoDBChunkGangSize  int
+	ChunkGangSize          int
+	ChunkGetMaxParallelism int
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -118,7 +119,8 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
 	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
-	f.IntVar(&cfg.DynamoDBChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
+	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
+	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
 }
 
 // AWSStorageConfig specifies config for storing data on AWS.
@@ -450,9 +452,13 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 		return s3Chunks, err
 	}
 
-	gangSize := a.cfg.DynamoDBChunkGangSize * dynamoDBMaxReadBatchSize
+	gangSize := a.cfg.ChunkGangSize * dynamoDBMaxReadBatchSize
 	if gangSize == 0 { // zero means turn feature off
 		gangSize = len(dynamoDBChunks)
+	} else {
+		if len(dynamoDBChunks)/gangSize > a.cfg.ChunkGetMaxParallelism {
+			gangSize = len(dynamoDBChunks)/a.cfg.ChunkGetMaxParallelism + 1
+		}
 	}
 
 	results := make(chan chunksPlusError)
diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 80df93a1530fb..6af399337f991 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -480,10 +480,11 @@ func TestAWSStorageClientChunks(t *testing.T) {
 		name           string
 		provisionedErr int
 		gangSize       int
+		maxParallelism int
 	}{
-		{"DynamoDB chunks", 0, 10},
-		{"DynamoDB chunks with parallel fetch disabled", 0, 0},
-		{"DynamoDB chunks retry logic", 2, 10},
+		{"DynamoDB chunks", 0, 10, 20},
+		{"DynamoDB chunks with parallel fetch disabled", 0, 0, 20},
+		{"DynamoDB chunks retry logic", 2, 10, 20},
 	}
 
 	for _, tt := range tests {
@@ -508,7 +509,7 @@ func TestAWSStorageClientChunks(t *testing.T) {
 
 			client := awsStorageClient{
 				cfg: AWSStorageConfig{
-					DynamoDBConfig: DynamoDBConfig{DynamoDBChunkGangSize: tt.gangSize},
+					DynamoDBConfig: DynamoDBConfig{ChunkGangSize: tt.gangSize, ChunkGetMaxParallelism: tt.maxParallelism},
 				},
 				DynamoDB:                dynamoDB,
 				schemaCfg:               schemaConfig,

From 6c51c3dc107b42efb722cc3e7d47afffde57be4c Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Mon, 15 Jan 2018 13:32:02 +0000
Subject: [PATCH 066/660] Update tools (#650)

* Squashed 'tools/' changes from b783528..1fe184f

1fe184f Bazel rules for building gogo protobufs (#123)
b917bb8 Merge pull request #122 from weaveworks/fix-scope-gc
c029ce0 Add regex to match scope VMs
0d4824b Merge pull request #121 from weaveworks/provisioning-readme-terraform
5a82d64 Move terraform instructions to tf section
d285d78 Merge pull request #120 from weaveworks/gocyclo-return-value
76b94a4 Do not spawn subshell when reading cyclo output
93b3c0d Use golang:1.9.2-stretch image
d40728f Gocyclo should return error code if issues detected
c4ac1c3 Merge pull request #114 from weaveworks/tune-spell-check
8980656 Only check files
12ebc73 Don't spell-check pki files
578904a Special-case spell-check the same way we do code checks
e772ed5 Special-case on mime type and extension using just patterns
ae82b50 Merge pull request #117 from weaveworks/test-verbose
8943473 Propagate verbose flag to 'go test'.
7c79b43 Merge pull request #113 from weaveworks/update-shfmt-instructions
258ef01 Merge pull request #115 from weaveworks/extra-linting
e690202 Use tools in built image to lint itself
126eb56 Add shellcheck to bring linting in line with scope
63ad68f Don't run lint on files under .git
51d908a Update shfmt instructions
e91cb0d Merge pull request #112 from weaveworks/add-python-lint-tools
0c87554 Add yapf and flake8 to golang build image
35679ee Merge pull request #110 from weaveworks/parallel-push-errors
3ae41b6 Remove unneeded if block
51ff31a Exit on first error
0faad9f Check for errors when pushing images in parallel
74dc626 Merge pull request #108 from weaveworks/disable-apt-daily
b4f1d91 Merge pull request #107 from weaveworks/docker-17-update
7436aa1 Override apt daily job to not run immediately on boot
7980f15 Merge pull request #106 from weaveworks/document-docker-install-role
f741e53 Bump to Docker 17.06 from CE repo
61796a1 Update Docker CE Debian repo details
0d86f5e Allow for Docker package to be named docker-ce
065c68d Document selection of Docker installation role.
3809053 Just --porcelain; it defaults to v1
11400ea Merge pull request #105 from weaveworks/remove-weaveplugin-remnants
b8b4d64 remove weaveplugin remnants
35099c9 Merge pull request #104 from weaveworks/pull-docker-py
cdd48fc Pull docker-py to speed tests/builds up.
e1c6c24 Merge pull request #103 from weaveworks/test-build-tags
d5d71e0 Add -tags option so callers can pass in build tags
8949b2b Merge pull request #98 from weaveworks/git-status-tag
ac30687 Merge pull request #100 from weaveworks/python_linting
4b125b5 Pin yapf & flake8 versions
7efb485 Lint python linting function
444755b Swap diff direction to reflect changes required
c5b2434 Install flake8 & yapf
5600eac Lint python in build-tools repo
0b02ca9 Add python linting
c011c0d Merge pull request #79 from kinvolk/schu/python-shebang
6577d07 Merge pull request #99 from weaveworks/shfmt-version
00ce0dc Use git status instead of diff to add 'WIP' tag
411fd13 Use shfmt v1.3.0 instead of latest from master.
0d6d4da Run shfmt 1.3 on the code.
5cdba32 Add sudo
c322ca8 circle.yml: Install shfmt binary.
e59c225 Install shfmt 1.3 binary.
30706e6 Install pyhcl in the build container.
960d222 Merge pull request #97 from kinvolk/alban/update-shfmt-3
1d535c7 shellcheck: fix escaping issue
5542498 Merge pull request #96 from kinvolk/alban/update-shfmt-2
32f7cc5 shfmt: fix coding style
09f72af lint: print the diff in case of error
571c7d7 Merge pull request #95 from kinvolk/alban/update-shfmt
bead6ed Update for latest shfmt
b08dc4d Update for latest shfmt (#94)
2ed8aaa Add no-race argument to test script (#92)
80dd78e Merge pull request #91 from weaveworks/upgrade-go-1.8.1
08dcd0d Please ./lint as shfmt changed its rules between 1.0.0 and 1.3.0.
a8bc9ab Upgrade default Go version to 1.8.1.
41c5622 Merge pull request #90 from weaveworks/build-golang-service-conf
e8ebdd5 broaden imagetag regex to fix haskell build image
ba3fbfa Merge pull request #89 from weaveworks/build-golang-service-conf
e506f1b Fix up test script for updated shfmt
9216db8 Add stuff for service-conf build to build-goland image
66a9a93 Merge pull request #88 from weaveworks/haskell-image
cb3e3a2 shfmt
74a5239 Haskell build image
4ccd42b Trying circle quay login
b2c295f Merge branch 'common-build'
0ac746f Trim quay prefix in circle script
c405b31 Merge pull request #87 from weaveworks/common-build
9672d7c Push build images to quay as they have sane robot accounts
a2bf112 Review feedback
fef9b7d Add protobuf tools
10a77ea Update readme
254f266 Don't need the image name in
ffb59fc Adding a weaveworks/build-golang image with tags
b817368 Update min Weave Net docker version
cf87ca3 Merge pull request #86 from weaveworks/lock-kubeadm-version
3ae6919 Add example of custom SSH private key to tf_ssh's usage.
cf8bd8a Add example of custom SSH private key to tf_ansi's usage.
c7d3370 Lock kubeadm's Kubernetes version.
faaaa6f Merge pull request #84 from weaveworks/centos-rhel
ef552e7 Select weave-kube YAML URL based on K8S version.
b4c1198 Upgrade default kubernetes_version to 1.6.1.
b82805e Use a fixed version of kubeadm.
f33888b Factorise and make kubeconfig option optional.
f7b8b89 Install EPEL repo for CentOS.
615917a Fix error in decrypting AWS access key and secret.
86f97b4 Add CentOS 7 AMI and username for AWS via Terraform.
eafd810 Add tf_ansi example with Ansible variables.
2b05787 Skip setup of Docker over TCP for CentOS/RHEL.
84c420b Add docker-ce role for CentOS/RHEL.
00a820c Add setup_weave-net_debug.yml playbook for user issues' debugging.
3eae480 Upgrade default kubernetes_version to 1.5.4.
753921c Allow injection of Docker installation role.
e1ff90d Fix kubectl taint command for 1.5.
b989e97 Fix typo in kubectl taint for single node K8S cluster.
541f58d Remove 'install_recommends: no' for ethtool.
c3f9711 Make Ansible role docker-from-get.docker.com work on RHEL/CentOS.
038c0ae Add frequently used OS images, for convenience.
d30649f Add --insecure-registry to docker.conf
1dd9218 shfmt -i 4 -w push-images
6de96ac Add option to not push docker hub images
310f53d Add push-images script from cortex
8641381 Add port 6443 to kubeadm join commands for K8S 1.6+.
50bf0bc Force type of K8S token to string.
08ab1c0 Remove trailing whitespaces.
ae9efb8 Enable testing against K8S release candidates.
9e32194 Secure GCP servers for Scope: open port 80.
a22536a Secure GCP servers for Scope.
89c3a29 Merge pull request #78 from weaveworks/lint-merge-rebase-issue-in-docs
73ad56d Add linter function to avoid bad merge/rebase artefact
31d069d Change Python shebang to `#!/usr/bin/env python`
52d695c Merge pull request #77 from kinvolk/schu/fix-relative-weave-path
77aed01 Merge pull request #73 from weaveworks/mike/sched/fix-unicode-issue
7c080f4 integration/sanity_check: disable SC1090
d6d360a integration/gce.sh: update gcloud command
e8def2c provisioning/setup: fix shellcheck SC2140
cc02224 integration/config: fix weave path
9c0d6a5 Fix config_management/README.md
334708c Merge pull request #75 from kinvolk/alban/external-build-1
da2505d gce.sh: template: print creation date
e676854 integration tests: fix user account
8530836 host nameing: add repo name
b556c0a gce.sh: fix deletion of gce instances
2ecd1c2 integration: fix GCE --zones/--zone parameter
3e863df sched: Fix unicode encoding issues
51785b5 Use rm -f and set current dir using BASH_SOURCE.
f5c6d68 Merge pull request #71 from kinvolk/schu/fix-linter-warnings
0269628 Document requirement for `lint_sh`
9a3f09e Fix linter warnings
efcf9d2 Merge pull request #53 from weaveworks/2647-testing-mvp
d31ea57 Weave Kube playbook now works with multiple nodes.
27868dd Add GCP firewall rule for FastDP crypto.
edc8bb3 Differentiated name of dev and test playbooks, to avoid confusion.
efa3df7 Moved utility Ansible Yaml to library directory.
fcd2769 Add shorthands to run Ansible playbooks against Terraform-provisioned virtual machines.
f7946fb Add shorthands to SSH into Terraform-provisioned virtual machines.
aad5c6f Mention Terraform and Ansible in README.md.
dddabf0 Add Terraform output required for templates' creation.
dcc7d02 Add Ansible configuration playbooks for development environments.
f86481c Add Ansible configuration playbooks for Docker, K8S and Weave-Net.
efedd25 Git-ignore Ansible retry files.
765c4ca Add helper functions to setup Terraform programmatically.
801dd1d Add Terraform cloud provisioning scripts.
b8017e1 Install hclfmt on CircleCI.
4815e19 Git-ignore Terraform state files.
0aaebc7 Add script to generate cartesian product of dependencies of cross-version testing.
007d90a Add script to list OS images from GCP, AWS and DO.
ca65cc0 Add script to list relevant versions of Go, Docker and Kubernetes.
aa66f44 Scripts now source dependencies using absolute path (previously breaking make depending on current directory).
7865e86 Add -p option to parallelise lint.
36c1835 Merge pull request #69 from weaveworks/mflag
9857568 Use mflag and mflagext package from weaveworks/common.
9799112 Quote bash variable.
10a36b3 Merge pull request #67 from weaveworks/shfmt-ignore
a59884f Add support for .lintignore.
03cc598 Don't lint generated protobuf code.
2b55c2d Merge pull request #66 from weaveworks/reduce-test-timeout
d4e163c Make timeout a flag
49a8609 Reduce test timeout
8fa15cb Merge pull request #63 from weaveworks/test-defaults

git-subtree-dir: tools
git-subtree-split: 1fe184f1f5330c4444c4377bef84f2d30e7dc7fe

* Use keyed fields in composite literal

* Squashed 'tools/' changes from 1fe184f..ccc8316

ccc8316 Revert "Gocyclo should return error code if issues detected" (#124)

git-subtree-dir: tools
git-subtree-split: ccc831682b5d51e068b17fe9ad482f025abd1fbb
---
 aws_storage_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aws_storage_client.go b/aws_storage_client.go
index a0072b3711d0c..4eadbdcac763b 100644
--- a/aws_storage_client.go
+++ b/aws_storage_client.go
@@ -255,7 +255,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 }
 
 func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
-	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{"tableName", query.TableName}, ot.Tag{"hashValue", query.HashValue})
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
 	input := &dynamodb.QueryInput{
@@ -555,7 +555,7 @@ var placeholder = []byte{'c'}
 // Structure is identical to BatchWrite(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
 func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{"numChunks", len(chunks)})
+	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
 	defer sp.Finish()
 	outstanding := dynamoDBReadRequest{}
 	chunksByKey := map[string]Chunk{}

From c9bb6ad1e8cac2ce8e064c6b033b1a4af1343d7f Mon Sep 17 00:00:00 2001
From: Jonathan Lange <jml@mumak.net>
Date: Tue, 16 Jan 2018 14:50:20 +0000
Subject: [PATCH 067/660] Fix flaky chunk tests by being explicit about `now`
 (#656)

* Fix flaky chunk tests by being explicit about `now`

`dummyChunkFor` was consulting the system clock itself. This meant that tests
like `TestChunkStore_Get` would intermittently fail, because _sometimes_ the
created chunks would have timestamps outside the window chosen within
`TestChunkStore_Get` (which itself consults the system clock).

This fixes the tests by making `now` a parameter of `dummyChunkFor`, which
makes the created chunks deterministic.

It also adds a `now` parameter to `dummyChunk`, which isn't strictly
necessary, but I believe is less error prone.

* Review feedback

- Be even more deterministic by sharing `now` values
- Be even more deterministic by making benchmark chunk factory accept a `now` value
---
 aws_storage_client_test.go |  2 +-
 by_key_test.go             |  8 +++-----
 chunk_store_test.go        | 14 ++++++++------
 chunk_test.go              | 33 +++++++++++++++++----------------
 4 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 6af399337f991..853e67d074f3d 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -531,7 +531,7 @@ func testStorageClientChunks(t *testing.T, client StorageClient) {
 	for i := 0; i < 50; i++ {
 		chunks := []Chunk{}
 		for j := 0; j < batchSize; j++ {
-			chunk := dummyChunkFor(model.Metric{
+			chunk := dummyChunkFor(model.Now(), model.Metric{
 				model.MetricNameLabel: "foo",
 				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
 			})
diff --git a/by_key_test.go b/by_key_test.go
index cffa5a5e63141..3c63b0318d771 100644
--- a/by_key_test.go
+++ b/by_key_test.go
@@ -102,7 +102,8 @@ func TestNWayIntersect(t *testing.T) {
 }
 
 func BenchmarkByKeyLess(b *testing.B) {
-	a := ByKey{dummyChunk(), dummyChunk()}
+	now := model.Now()
+	a := ByKey{dummyChunk(now), dummyChunk(now)}
 
 	b.ResetTimer()
 
@@ -118,10 +119,7 @@ func BenchmarkByKeySort10000(b *testing.B) { benchmarkByKeySort(b, 10000) }
 func benchmarkByKeySort(b *testing.B, batchSize int) {
 	chunks := []Chunk{}
 	for i := 0; i < batchSize; i++ {
-		chunk := dummyChunk()
-		// Tweak the dummy data slightly so the chunks are more likely to be different
-		// this makes the checksum wrong but we don't look at it
-		chunk.From += model.Time(rand.Intn(batchSize))
+		chunk := dummyChunk(model.Now() + model.Time(rand.Intn(batchSize)))
 		chunks = append(chunks, chunk)
 	}
 
diff --git a/chunk_store_test.go b/chunk_store_test.go
index fd3a6db955637..6eed6d7be65cc 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -86,11 +86,11 @@ func TestChunkStore_Get(t *testing.T) {
 		"toms": "code",
 	}
 
-	fooChunk1 := dummyChunkFor(fooMetric1)
-	fooChunk2 := dummyChunkFor(fooMetric2)
+	fooChunk1 := dummyChunkFor(now, fooMetric1)
+	fooChunk2 := dummyChunkFor(now, fooMetric2)
 
-	barChunk1 := dummyChunkFor(barMetric1)
-	barChunk2 := dummyChunkFor(barMetric2)
+	barChunk1 := dummyChunkFor(now, barMetric1)
+	barChunk2 := dummyChunkFor(now, barMetric2)
 
 	fooSampleStream1, err := createSampleStreamFrom(fooChunk1)
 	require.NoError(t, err)
@@ -222,6 +222,8 @@ func TestChunkStore_Get(t *testing.T) {
 
 				sort.Sort(ByFingerprint(matrix1))
 				if !reflect.DeepEqual(tc.expect, matrix1) {
+					t.Fatalf("jml\nstart = %#v\nnow = %#v\nfooChunk1 = %#v\nfooChunk2 = %#v\nbarChunk1 = %#v\nbarChunk2 = %#v\n",
+						now.Add(-time.Hour), now, fooChunk1, fooChunk2, barChunk1, barChunk2)
 					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
 				}
 
@@ -251,13 +253,13 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 	metricName := "foo"
-	chunk1 := dummyChunkFor(model.Metric{
+	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar":  "baz",
 		"toms": "code",
 		"flip": "flop",
 	})
-	chunk2 := dummyChunkFor(model.Metric{
+	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar":  "beep",
 		"toms": "code",
diff --git a/chunk_test.go b/chunk_test.go
index 866f305059f13..ba61757b38f7c 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -16,16 +16,15 @@ import (
 
 const userID = "userID"
 
-func dummyChunk() Chunk {
-	return dummyChunkFor(model.Metric{
+func dummyChunk(now model.Time) Chunk {
+	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar":  "baz",
 		"toms": "code",
 	})
 }
 
-func dummyChunkFor(metric model.Metric) Chunk {
-	now := model.Now()
+func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
 	cs, _ := chunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
 	chunk := NewChunk(
 		userID,
@@ -44,6 +43,7 @@ func dummyChunkFor(metric model.Metric) Chunk {
 }
 
 func TestChunkCodec(t *testing.T) {
+	dummy := dummyChunk(model.Now())
 	decodeContext := NewDecodeContext()
 	for i, c := range []struct {
 		chunk Chunk
@@ -51,32 +51,32 @@ func TestChunkCodec(t *testing.T) {
 		f     func(*Chunk, []byte)
 	}{
 		// Basic round trip
-		{chunk: dummyChunk()},
+		{chunk: dummy},
 
 		// Checksum should fail
 		{
-			chunk: dummyChunk(),
+			chunk: dummy,
 			err:   ErrInvalidChecksum,
 			f:     func(_ *Chunk, buf []byte) { buf[4]++ },
 		},
 
 		// Checksum should fail
 		{
-			chunk: dummyChunk(),
+			chunk: dummy,
 			err:   ErrInvalidChecksum,
 			f:     func(c *Chunk, _ []byte) { c.Checksum = 123 },
 		},
 
 		// Metadata test should fail
 		{
-			chunk: dummyChunk(),
+			chunk: dummy,
 			err:   ErrWrongMetadata,
 			f:     func(c *Chunk, _ []byte) { c.Fingerprint++ },
 		},
 
 		// Metadata test should fail
 		{
-			chunk: dummyChunk(),
+			chunk: dummy,
 			err:   ErrWrongMetadata,
 			f:     func(c *Chunk, _ []byte) { c.UserID = "foo" },
 		},
@@ -139,10 +139,11 @@ func TestChunksToMatrix(t *testing.T) {
 		"bar":  "baz",
 		"toms": "code",
 	}
-	chunk1 := dummyChunkFor(metric)
+	now := model.Now()
+	chunk1 := dummyChunkFor(now, metric)
 	chunk1Samples, err := chunk1.Samples(chunk1.From, chunk1.Through)
 	require.NoError(t, err)
-	chunk2 := dummyChunkFor(metric)
+	chunk2 := dummyChunkFor(now, metric)
 	chunk2Samples, err := chunk2.Samples(chunk2.From, chunk2.Through)
 	require.NoError(t, err)
 
@@ -157,7 +158,7 @@ func TestChunksToMatrix(t *testing.T) {
 		"bar":  "baz",
 		"toms": "code",
 	}
-	chunk3 := dummyChunkFor(otherMetric)
+	chunk3 := dummyChunkFor(now, otherMetric)
 	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
 	require.NoError(t, err)
 
@@ -194,9 +195,9 @@ func TestChunksToMatrix(t *testing.T) {
 	}
 }
 
-func benchmarkChunk() Chunk {
+func benchmarkChunk(now model.Time) Chunk {
 	// This is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
-	return dummyChunkFor(model.Metric{
+	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel:              "container_cpu_usage_seconds_total",
 		"beta_kubernetes_io_arch":          "amd64",
 		"beta_kubernetes_io_instance_type": "c3.somesize",
@@ -218,7 +219,7 @@ func benchmarkChunk() Chunk {
 }
 
 func BenchmarkEncode(b *testing.B) {
-	chunk := dummyChunk()
+	chunk := dummyChunk(model.Now())
 
 	b.ResetTimer()
 
@@ -232,7 +233,7 @@ func BenchmarkDecode100(b *testing.B)   { benchmarkDecode(b, 100) }
 func BenchmarkDecode10000(b *testing.B) { benchmarkDecode(b, 10000) }
 
 func benchmarkDecode(b *testing.B, batchSize int) {
-	chunk := benchmarkChunk()
+	chunk := benchmarkChunk(model.Now())
 	buf, err := chunk.Encode()
 	require.NoError(b, err)
 

From ec3e91e31588057e7225a04ddd5f533da9936e9c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 2 Feb 2018 20:00:10 +0000
Subject: [PATCH 068/660] Re-use snappy writers via pool, for performance
 (#682)

---
 chunk.go | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/chunk.go b/chunk.go
index 89fcdc4fcdd0c..3a0cbe5890e3a 100644
--- a/chunk.go
+++ b/chunk.go
@@ -10,6 +10,7 @@ import (
 	"io"
 	"strconv"
 	"strings"
+	"sync"
 
 	"github.com/golang/snappy"
 	ot "github.com/opentracing/opentracing-go"
@@ -176,6 +177,10 @@ func (c *Chunk) ExternalKey() string {
 	return fmt.Sprintf("%s/%d:%d:%d", c.UserID, uint64(c.Fingerprint), int64(c.From), int64(c.Through))
 }
 
+var writerPool = sync.Pool{
+	New: func() interface{} { return snappy.NewWriter(nil) },
+}
+
 // Encode writes the chunk out to a big write buffer, then calculates the checksum.
 func (c *Chunk) Encode() ([]byte, error) {
 	var buf bytes.Buffer
@@ -187,7 +192,10 @@ func (c *Chunk) Encode() ([]byte, error) {
 	}
 
 	// Encode chunk metadata into snappy-compressed buffer
-	if err := json.NewEncoder(snappy.NewWriter(&buf)).Encode(c); err != nil {
+	writer := writerPool.Get().(*snappy.Writer)
+	defer writerPool.Put(writer)
+	writer.Reset(&buf)
+	if err := json.NewEncoder(writer).Encode(c); err != nil {
 		return nil, err
 	}
 

From 84a56ef4b9842df82ea1468bf05245e2745f0583 Mon Sep 17 00:00:00 2001
From: Bob Cotton <bob.cotton@gmail.com>
Date: Wed, 7 Feb 2018 02:30:34 -0700
Subject: [PATCH 069/660] Add tracing the the gcp QueryPages (#691)

Trace the time querying the table index
---
 gcp/storage_client.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 447adf62c7584..86948716199d6 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -7,6 +7,8 @@ import (
 	"strings"
 
 	"cloud.google.com/go/bigtable"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -108,6 +110,9 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 }
 
 func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
+	defer sp.Finish()
+
 	table := s.client.Open(query.TableName)
 
 	var rowRange bigtable.RowRange
@@ -128,6 +133,7 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 		return callback(bigtableReadBatch(r), false)
 	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
 	if err != nil {
+		sp.LogFields(otlog.String("error", err.Error()))
 		return errors.WithStack(err)
 	}
 	return nil

From 3fdd7cb9f7e5f68d631bd052e901f8d2a5dea6ff Mon Sep 17 00:00:00 2001
From: Bob Cotton <bob.cotton@gmail.com>
Date: Wed, 7 Feb 2018 02:32:48 -0700
Subject: [PATCH 070/660] Teach storage_client.QueryPages to use a RowRange
 that filters on the server. (#687)

Send in a lexicographically greater-than string to ReadRows and let the bigtable server
stop sending rows when's its finished.

We measured a 25-30% reduction in response time on the p90.
---
 gcp/storage_client.go | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 86948716199d6..130de96a5e64e 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -119,17 +119,12 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	if len(query.RangeValuePrefix) > 0 {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
 	} else if len(query.RangeValueStart) > 0 {
-		rowRange = bigtable.InfiniteRange(query.HashValue + separator + string(query.RangeValueStart))
+		rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+string('\xff'))
 	} else {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator)
 	}
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
-		// Bigtable doesn't know when to stop, as we're reading "until the end of the
-		// row" in DynamoDB.  So we need to check the prefix of the row is still correct.
-		if !strings.HasPrefix(r.Key(), query.HashValue+separator) {
-			return false
-		}
 		return callback(bigtableReadBatch(r), false)
 	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
 	if err != nil {

From b413bdb69085c0af2d7f014688abb12932202452 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 8 Feb 2018 20:16:40 +0000
Subject: [PATCH 071/660] Cassandra storage and table client (#677)

* Storage/Table Client implementation for Cassandra.

* Vendoring for GoCQL.

* Various fixes:
- Can't for DESCRIBE TABLES with CQL (only with CQLSH); use KeyspaceMetadata instead.
- Can't use CQL parameter substitution for table names; use fmt.Sprintf instead.
- Plumb though Cassandra TableClient.
- Create the Keyspace if it doesn't exist.
---
 cassandra/instrumentation.go |  38 ++++++
 cassandra/storage_client.go  | 230 +++++++++++++++++++++++++++++++++++
 cassandra/table_client.go    |  60 +++++++++
 storage/factory.go           |  13 +-
 4 files changed, 338 insertions(+), 3 deletions(-)
 create mode 100644 cassandra/instrumentation.go
 create mode 100644 cassandra/storage_client.go
 create mode 100644 cassandra/table_client.go

diff --git a/cassandra/instrumentation.go b/cassandra/instrumentation.go
new file mode 100644
index 0000000000000..b24d222e4d458
--- /dev/null
+++ b/cassandra/instrumentation.go
@@ -0,0 +1,38 @@
+package cassandra
+
+import (
+	"context"
+	"strings"
+
+	"github.com/gocql/gocql"
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+var requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	Namespace: "cortex",
+	Name:      "cassandra_request_duration_seconds",
+	Help:      "Time spent doing Cassandra requests.",
+	Buckets:   prometheus.ExponentialBuckets(0.001, 4, 6),
+}, []string{"operation", "status_code"})
+
+func init() {
+	prometheus.MustRegister(requestDuration)
+}
+
+type observer struct{}
+
+func err(err error) string {
+	if err != nil {
+		return "500"
+	}
+	return "200"
+}
+
+func (observer) ObserveBatch(ctx context.Context, b gocql.ObservedBatch) {
+	requestDuration.WithLabelValues("BATCH", err(b.Err)).Observe(b.End.Sub(b.Start).Seconds())
+}
+
+func (observer) ObserveQuery(cts context.Context, q gocql.ObservedQuery) {
+	parts := strings.SplitN(q.Statement, " ", 2)
+	requestDuration.WithLabelValues(parts[0], err(q.Err)).Observe(q.End.Sub(q.Start).Seconds())
+}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
new file mode 100644
index 0000000000000..c9dc9e4489a07
--- /dev/null
+++ b/cassandra/storage_client.go
@@ -0,0 +1,230 @@
+package cassandra
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"time"
+
+	"github.com/gocql/gocql"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+const (
+	maxRowReads = 100
+)
+
+// Config for a StorageClient
+type Config struct {
+	address           string
+	keyspace          string
+	consistency       string
+	replicationFactor int
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.address, "cassandra.address", "", "Address of Cassandra instances.")
+	f.StringVar(&cfg.keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
+	f.StringVar(&cfg.consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
+	f.IntVar(&cfg.replicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
+}
+
+func (cfg *Config) session() (*gocql.Session, error) {
+	consistency, err := gocql.ParseConsistencyWrapper(cfg.consistency)
+	if err != nil {
+		return nil, err
+	}
+
+	if err := cfg.createKeyspace(); err != nil {
+		return nil, err
+	}
+
+	cluster := gocql.NewCluster(cfg.address)
+	cluster.Keyspace = cfg.keyspace
+	cluster.Consistency = consistency
+	cluster.BatchObserver = observer{}
+	cluster.QueryObserver = observer{}
+
+	return cluster.CreateSession()
+}
+
+// createKeyspace will create the desired keyspace if it doesn't exist.
+func (cfg *Config) createKeyspace() error {
+	cluster := gocql.NewCluster(cfg.address)
+	cluster.Keyspace = "system"
+	cluster.Timeout = 20 * time.Second
+	session, err := cluster.CreateSession()
+	if err != nil {
+		return err
+	}
+	defer session.Close()
+
+	return session.Query(fmt.Sprintf(
+		`CREATE KEYSPACE IF NOT EXISTS %s
+		 WITH replication = {
+			 'class' : 'SimpleStrategy',
+			 'replication_factor' : %d
+		 }`,
+		cfg.keyspace, cfg.replicationFactor)).Exec()
+}
+
+// storageClient implements chunk.storageClient for GCP.
+type storageClient struct {
+	cfg       Config
+	schemaCfg chunk.SchemaConfig
+	session   *gocql.Session
+}
+
+// NewStorageClient returns a new StorageClient.
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	session, err := cfg.session()
+	if err != nil {
+		return nil, err
+	}
+
+	return &storageClient{
+		cfg:       cfg,
+		schemaCfg: schemaCfg,
+		session:   session,
+	}, nil
+}
+
+func (s *storageClient) Close() {
+	s.session.Close()
+}
+
+type writeBatch struct {
+	b *gocql.Batch
+}
+
+func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
+	return writeBatch{
+		b: gocql.NewBatch(gocql.UnloggedBatch),
+	}
+}
+
+func (b writeBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	b.b.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)", tableName),
+		hashValue, rangeValue, value)
+}
+
+func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+	cassandraBatch := batch.(writeBatch)
+	return s.session.ExecuteBatch(cassandraBatch.b.WithContext(ctx))
+}
+
+func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+	var q *gocql.Query
+	if len(query.RangeValuePrefix) > 0 {
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?", query.TableName),
+			query.HashValue, query.RangeValuePrefix)
+	} else if len(query.RangeValueStart) > 0 {
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?", query.TableName),
+			query.HashValue, query.RangeValueStart)
+	} else {
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?", query.TableName),
+			query.HashValue)
+	}
+
+	iter := q.WithContext(ctx).Iter()
+	defer iter.Close()
+	scanner := iter.Scanner()
+	for scanner.Next() {
+		var b readBatch
+		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
+			return err
+		}
+		if callback(b, false) {
+			return nil
+		}
+	}
+	return scanner.Err()
+}
+
+// readBatch represents a batch of rows read from Cassandra.
+type readBatch struct {
+	rangeValue []byte
+	value      []byte
+}
+
+// Len implements chunk.ReadBatch; in Cassandra we 'stream' results back
+// one-by-one, so this always returns 1.
+func (readBatch) Len() int {
+	return 1
+}
+
+func (b readBatch) RangeValue(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	return b.rangeValue
+}
+
+func (b readBatch) Value(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	return b.value
+}
+
+func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	b := gocql.NewBatch(gocql.UnloggedBatch).WithContext(ctx)
+
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].ExternalKey()
+		tableName := s.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+
+		// Must provide a range key, even though its not useds - hence 0x00.
+		b.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)", tableName), key, buf)
+	}
+
+	return s.session.ExecuteBatch(b)
+}
+
+func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	outs := make(chan chunk.Chunk, len(input))
+	errs := make(chan error, len(input))
+
+	for i := 0; i < len(input); i++ {
+		c := input[i]
+		go func(c chunk.Chunk) {
+			out, err := s.getChunk(ctx, c)
+			if err != nil {
+				errs <- err
+			} else {
+				outs <- out
+			}
+		}(c)
+	}
+
+	output := make([]chunk.Chunk, 0, len(input))
+	for i := 0; i < len(input); i++ {
+		select {
+		case c := <-outs:
+			output = append(output, c)
+		case err := <-errs:
+			return nil, err
+		}
+	}
+
+	return output, nil
+}
+
+func (s *storageClient) getChunk(ctx context.Context, input chunk.Chunk) (chunk.Chunk, error) {
+	tableName := s.schemaCfg.ChunkTables.TableFor(input.From)
+	var buf []byte
+	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
+		WithContext(ctx).Scan(&buf); err != nil {
+		return input, err
+	}
+	decodeContext := chunk.NewDecodeContext()
+	err := input.Decode(decodeContext, buf)
+	return input, err
+}
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
new file mode 100644
index 0000000000000..3feafd0047aed
--- /dev/null
+++ b/cassandra/table_client.go
@@ -0,0 +1,60 @@
+package cassandra
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	"github.com/gocql/gocql"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+type tableClient struct {
+	cfg     Config
+	session *gocql.Session
+}
+
+// NewTableClient returns a new TableClient.
+func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
+	session, err := cfg.session()
+	if err != nil {
+		return nil, err
+	}
+	return &tableClient{
+		cfg:     cfg,
+		session: session,
+	}, nil
+}
+
+func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
+	md, err := c.session.KeyspaceMetadata(c.cfg.keyspace)
+	if err != nil {
+		return nil, err
+	}
+	result := []string{}
+	for name := range md.Tables {
+		result = append(result, name)
+	}
+	return result, nil
+}
+
+func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	return c.session.Query(fmt.Sprintf(`
+		CREATE TABLE IF NOT EXISTS %s (
+			hash text,
+			range blob,
+			value blob,
+			PRIMARY KEY (hash, range)
+		)`, desc.Name)).WithContext(ctx).Exec()
+}
+
+func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
+	return chunk.TableDesc{
+		Name: name,
+	}, dynamodb.TableStatusActive, nil
+}
+
+func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
+	return nil
+}
diff --git a/storage/factory.go b/storage/factory.go
index ad8e11758a97a..dd5e00ea0154a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
 	"github.com/weaveworks/cortex/pkg/util"
 )
@@ -16,14 +17,16 @@ import (
 type Config struct {
 	StorageClient string
 	chunk.AWSStorageConfig
-	GCPStorageConfig gcp.Config
+	GCPStorageConfig       gcp.Config
+	CassandraStorageConfig cassandra.Config
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, inmemory).")
+	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, cassandra, inmemory).")
 	cfg.AWSStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
+	cfg.CassandraStorageConfig.RegisterFlags(f)
 }
 
 // NewStorageClient makes a storage client based on the configuration.
@@ -39,8 +42,10 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageCl
 		return chunk.NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "cassandra":
+		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
 	}
 }
 
@@ -57,6 +62,8 @@ func NewTableClient(cfg Config) (chunk.TableClient, error) {
 		return chunk.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
 	case "gcp":
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
+	case "cassandra":
+		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
 	}

From bccad3033b35143aa638490afe4296d6a6136f68 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 14 Feb 2018 10:57:15 +0000
Subject: [PATCH 072/660] Don't forget to register syncTableDuration (#701)

---
 table_manager.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/table_manager.go b/table_manager.go
index a334ae5c3ac94..13b5b3b09f6d0 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -39,6 +39,7 @@ var (
 
 func init() {
 	prometheus.MustRegister(tableCapacity)
+	prometheus.MustRegister(syncTableDuration)
 }
 
 // Tags is a string-string map that implements flag.Value.

From a36cb1c0a9bb2a02bac0ac4071d9d8eba00df398 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 19 Feb 2018 12:56:05 +0000
Subject: [PATCH 073/660] Move SchemaConfig.MaxChunkAge to NewTableManager()
 (#693)

Since it's only needed in one place - TableManager - and it's hard to
see what went wrong when you forget to set the value, insist it is
passed in as a parameter.
---
 aws_storage_client_test.go |  1 +
 chunk_store_test.go        |  2 +-
 schema_config.go           |  1 -
 table_manager.go           | 24 +++++++++++++-----------
 table_manager_test.go      | 23 ++++++++++-------------
 5 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/aws_storage_client_test.go b/aws_storage_client_test.go
index 853e67d074f3d..a2a18bc80ec6f 100644
--- a/aws_storage_client_test.go
+++ b/aws_storage_client_test.go
@@ -499,6 +499,7 @@ func TestAWSStorageClientChunks(t *testing.T) {
 			}
 			tableManager, err := NewTableManager(
 				schemaConfig,
+				maxChunkAge,
 				&dynamoTableClient{
 					DynamoDB: dynamoDB,
 				},
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 6eed6d7be65cc..332ab75d60971 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -25,7 +25,7 @@ import (
 func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	storage := NewMockStorage()
 	schemaCfg := SchemaConfig{}
-	tableManager, err := NewTableManager(schemaCfg, storage)
+	tableManager, err := NewTableManager(schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
 	err = tableManager.syncTables(context.Background())
 	require.NoError(t, err)
diff --git a/schema_config.go b/schema_config.go
index 7608d82a68f44..963c71fde0c29 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -37,7 +37,6 @@ type SchemaConfig struct {
 
 	// duration a table will be created before it is needed.
 	CreationGracePeriod time.Duration
-	MaxChunkAge         time.Duration
 
 	// Config for the index & chunk tables.
 	OriginalTableName string
diff --git a/table_manager.go b/table_manager.go
index 13b5b3b09f6d0..3ef0efa85a706 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -102,18 +102,20 @@ func (ts Tags) AWSTags() []*dynamodb.Tag {
 
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
-	client TableClient
-	cfg    SchemaConfig
-	done   chan struct{}
-	wait   sync.WaitGroup
+	client      TableClient
+	cfg         SchemaConfig
+	maxChunkAge time.Duration
+	done        chan struct{}
+	wait        sync.WaitGroup
 }
 
 // NewTableManager makes a new TableManager
-func NewTableManager(cfg SchemaConfig, tableClient TableClient) (*TableManager, error) {
+func NewTableManager(cfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient) (*TableManager, error) {
 	return &TableManager{
-		cfg:    cfg,
-		client: tableClient,
-		done:   make(chan struct{}),
+		cfg:         cfg,
+		maxChunkAge: maxChunkAge,
+		client:      tableClient,
+		done:        make(chan struct{}),
 	}, nil
 }
 
@@ -188,7 +190,7 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		var (
 			tablePeriodSecs = int64(m.cfg.IndexTables.Period / time.Second)
 			gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
-			maxChunkAgeSecs = int64(m.cfg.MaxChunkAge / time.Second)
+			maxChunkAgeSecs = int64(m.maxChunkAge / time.Second)
 			firstTable      = m.cfg.IndexTables.From.Unix() / tablePeriodSecs
 			now             = mtime.Now().Unix()
 		)
@@ -206,13 +208,13 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 
 	if m.cfg.UsePeriodicTables {
 		result = append(result, m.cfg.IndexTables.periodicTables(
-			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
+			m.cfg.CreationGracePeriod, m.maxChunkAge,
 		)...)
 	}
 
 	if m.cfg.ChunkTables.From.IsSet() {
 		result = append(result, m.cfg.ChunkTables.periodicTables(
-			m.cfg.CreationGracePeriod, m.cfg.MaxChunkAge,
+			m.cfg.CreationGracePeriod, m.maxChunkAge,
 		)...)
 	}
 
diff --git a/table_manager_test.go b/table_manager_test.go
index 46d8113d3830c..0bfe32b4f9c15 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -57,9 +57,8 @@ func TestTableManager(t *testing.T) {
 		},
 
 		CreationGracePeriod: gracePeriod,
-		MaxChunkAge:         maxChunkAge,
 	}
-	tableManager, err := NewTableManager(cfg, client)
+	tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -191,7 +190,7 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check at time zero, we have the base table with no tags.
 	{
-		tableManager, err := NewTableManager(SchemaConfig{}, client)
+		tableManager, err := NewTableManager(SchemaConfig{}, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -210,7 +209,7 @@ func TestTableManagerTags(t *testing.T) {
 	{
 		cfg := SchemaConfig{}
 		cfg.IndexTables.Tags.Set("foo=bar")
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -372,12 +371,11 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		},
 
 		CreationGracePeriod: gracePeriod,
-		MaxChunkAge:         maxChunkAge,
 	}
 
 	// Check tables are created with autoscale
 	{
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -427,7 +425,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		cfg.IndexTables.WriteScale.OutCooldown = 200
 		cfg.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -477,7 +475,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		cfg.IndexTables.WriteScale.OutCooldown = 200
 		cfg.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -543,7 +541,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		cfg.IndexTables.WriteScale.Enabled = false
 		cfg.ChunkTables.WriteScale.Enabled = false
 
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -655,12 +653,11 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 		},
 
 		CreationGracePeriod: gracePeriod,
-		MaxChunkAge:         maxChunkAge,
 	}
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
 	{
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -691,7 +688,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables are autoscaled even if there are less than the limit.
 	{
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -748,7 +745,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables past the limit do not autoscale but the latest N do.
 	{
-		tableManager, err := NewTableManager(cfg, client)
+		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}

From 8eaf16d6161d704c91074059fe47af10febf9395 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 22 Feb 2018 09:43:28 +0000
Subject: [PATCH 074/660] Diskcache: querier-local SSD backed chunk cache
 (#685)

* Refactor memcache(d) into chunk package.

- Caches don't know about chunk anymore, its all just []byte.
- Deal with empty memcached host by returning a noopCache, not special casing nil client.
- Make background writes a cache 'middleware'.
- s/memcache/memcached/.
- Refactor tests.

* Add diskcache, for caching chunks on local SSD in queriers.

- mmap a large file, treat it as a series of 2KB buckets.
- Use FNV hash to place key and chunk in buckets.
- Use existing memcached tests

* Add instrumentation middleware for caches.

* Add tests for background and tiered cache.

* Ensure the background goroutines have flushed before checking the cache for entries.

* Write chunks found in lower-tier caches back to the upper tier.

* Query diskcache first.
---
 cache/background.go                           | 113 +++++++++
 cache/background_extra_test.go                |   7 +
 cache/background_test.go                      |  60 +++++
 cache/cache.go                                |  59 +++++
 cache/cache_test.go                           | 125 ++++++++++
 cache/diskcache.go                            | 160 ++++++++++++
 cache/instrumented.go                         |  89 +++++++
 cache/memcached.go                            | 104 ++++++++
 .../memcached_client.go                       |  30 ++-
 cache/memcached_test.go                       |  39 +++
 cache/tiered.go                               |  65 +++++
 cache/tiered_test.go                          |  37 +++
 chunk_cache.go                                | 231 ------------------
 chunk_cache_test.go                           | 114 ---------
 chunk_store.go                                |  68 +++++-
 15 files changed, 938 insertions(+), 363 deletions(-)
 create mode 100644 cache/background.go
 create mode 100644 cache/background_extra_test.go
 create mode 100644 cache/background_test.go
 create mode 100644 cache/cache.go
 create mode 100644 cache/cache_test.go
 create mode 100644 cache/diskcache.go
 create mode 100644 cache/instrumented.go
 create mode 100644 cache/memcached.go
 rename memcache_client.go => cache/memcached_client.go (75%)
 create mode 100644 cache/memcached_test.go
 create mode 100644 cache/tiered.go
 create mode 100644 cache/tiered_test.go
 delete mode 100644 chunk_cache.go
 delete mode 100644 chunk_cache_test.go

diff --git a/cache/background.go b/cache/background.go
new file mode 100644
index 0000000000000..2560cc49a7e46
--- /dev/null
+++ b/cache/background.go
@@ -0,0 +1,113 @@
+package cache
+
+import (
+	"context"
+	"flag"
+	"sync"
+
+	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+var (
+	droppedWriteBack = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "cache_dropped_background_writes_total",
+		Help:      "Total count of dropped write backs to cache.",
+	})
+	queueLength = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "cache_background_queue_length",
+		Help:      "Length of the cache background write queue.",
+	})
+)
+
+func init() {
+	prometheus.MustRegister(droppedWriteBack)
+	prometheus.MustRegister(queueLength)
+}
+
+// BackgroundConfig is config for a Background Cache.
+type BackgroundConfig struct {
+	WriteBackGoroutines int
+	WriteBackBuffer     int
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *BackgroundConfig) RegisterFlags(f *flag.FlagSet) {
+	f.IntVar(&cfg.WriteBackGoroutines, "memcache.write-back-goroutines", 10, "How many goroutines to use to write back to memcache.")
+	f.IntVar(&cfg.WriteBackBuffer, "memcache.write-back-buffer", 10000, "How many chunks to buffer for background write back.")
+}
+
+type backgroundCache struct {
+	Cache
+
+	wg       sync.WaitGroup
+	quit     chan struct{}
+	bgWrites chan backgroundWrite
+}
+
+type backgroundWrite struct {
+	key string
+	buf []byte
+}
+
+// NewBackground returns a new Cache that does stores on background goroutines.
+func NewBackground(cfg BackgroundConfig, cache Cache) Cache {
+	c := &backgroundCache{
+		Cache:    cache,
+		quit:     make(chan struct{}),
+		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
+	}
+
+	c.wg.Add(cfg.WriteBackGoroutines)
+	for i := 0; i < cfg.WriteBackGoroutines; i++ {
+		go c.writeBackLoop()
+	}
+
+	return c
+}
+
+// Stop the background flushing goroutines.
+func (c *backgroundCache) Stop() error {
+	close(c.quit)
+	c.wg.Wait()
+
+	return c.Cache.Stop()
+}
+
+// StoreChunk writes chunks for the cache in the background.
+func (c *backgroundCache) StoreChunk(ctx context.Context, key string, buf []byte) error {
+	bgWrite := backgroundWrite{
+		key: key,
+		buf: buf,
+	}
+	select {
+	case c.bgWrites <- bgWrite:
+		queueLength.Inc()
+	default:
+		droppedWriteBack.Inc()
+	}
+	return nil
+}
+
+func (c *backgroundCache) writeBackLoop() {
+	defer c.wg.Done()
+
+	for {
+		select {
+		case bgWrite, ok := <-c.bgWrites:
+			if !ok {
+				return
+			}
+			queueLength.Dec()
+			err := c.Cache.StoreChunk(context.Background(), bgWrite.key, bgWrite.buf)
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error writing to memcache", "err", err)
+			}
+		case <-c.quit:
+			return
+		}
+	}
+}
diff --git a/cache/background_extra_test.go b/cache/background_extra_test.go
new file mode 100644
index 0000000000000..07aa21c847f88
--- /dev/null
+++ b/cache/background_extra_test.go
@@ -0,0 +1,7 @@
+package cache
+
+func Flush(c Cache) {
+	b := c.(*backgroundCache)
+	close(b.bgWrites)
+	b.wg.Wait()
+}
diff --git a/cache/background_test.go b/cache/background_test.go
new file mode 100644
index 0000000000000..ebf540abd755c
--- /dev/null
+++ b/cache/background_test.go
@@ -0,0 +1,60 @@
+package cache_test
+
+import (
+	"context"
+	"sync"
+	"testing"
+
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+)
+
+type mockCache struct {
+	sync.Mutex
+	cache map[string][]byte
+}
+
+func (m *mockCache) StoreChunk(_ context.Context, key string, buf []byte) error {
+	m.Lock()
+	defer m.Unlock()
+	m.cache[key] = buf
+	return nil
+}
+
+func (m *mockCache) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
+	m.Lock()
+	defer m.Unlock()
+	for _, key := range keys {
+		buf, ok := m.cache[key]
+		if ok {
+			found = append(found, key)
+			bufs = append(bufs, buf)
+		} else {
+			missing = append(missing, key)
+		}
+	}
+	return
+}
+
+func (m *mockCache) Stop() error {
+	return nil
+}
+
+func newMockCache() cache.Cache {
+	return &mockCache{
+		cache: map[string][]byte{},
+	}
+}
+
+func TestBackground(t *testing.T) {
+	c := cache.NewBackground(cache.BackgroundConfig{
+		WriteBackGoroutines: 1,
+		WriteBackBuffer:     100,
+	}, newMockCache())
+
+	keys, chunks := fillCache(t, c)
+	cache.Flush(c)
+
+	testCacheSingle(t, c, keys, chunks)
+	testCacheMultiple(t, c, keys, chunks)
+	testCacheMiss(t, c)
+}
diff --git a/cache/cache.go b/cache/cache.go
new file mode 100644
index 0000000000000..815f5c949be0e
--- /dev/null
+++ b/cache/cache.go
@@ -0,0 +1,59 @@
+package cache
+
+import (
+	"context"
+	"flag"
+)
+
+// Cache byte arrays by key.
+type Cache interface {
+	StoreChunk(ctx context.Context, key string, buf []byte) error
+	FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error)
+	Stop() error
+}
+
+// Config for building Caches.
+type Config struct {
+	EnableDiskcache bool
+
+	background     BackgroundConfig
+	memcache       MemcachedConfig
+	memcacheClient MemcachedClientConfig
+	diskcache      DiskcacheConfig
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.BoolVar(&cfg.EnableDiskcache, "cache.enable-diskcache", false, "Enable on-disk cache")
+
+	cfg.memcache.RegisterFlags(f)
+	cfg.memcacheClient.RegisterFlags(f)
+	cfg.diskcache.RegisterFlags(f)
+}
+
+// New creates a new Cache using Config.
+func New(cfg Config) (Cache, error) {
+	caches := []Cache{}
+
+	if cfg.EnableDiskcache {
+		cache, err := NewDiskcache(cfg.diskcache)
+		if err != nil {
+			return nil, err
+		}
+		caches = append(caches, instrument("diskcache", cache))
+	}
+
+	if cfg.memcacheClient.Host != "" {
+		client := newMemcachedClient(cfg.memcacheClient)
+		cache := NewMemcached(cfg.memcache, client)
+		caches = append(caches, instrument("memcache", cache))
+	}
+
+	var cache Cache = tiered(caches)
+	if len(caches) > 1 {
+		cache = instrument("tiered", cache)
+	}
+
+	cache = NewBackground(cfg.background, cache)
+	return cache, nil
+}
diff --git a/cache/cache_test.go b/cache/cache_test.go
new file mode 100644
index 0000000000000..b0df014dd234b
--- /dev/null
+++ b/cache/cache_test.go
@@ -0,0 +1,125 @@
+package cache_test
+
+import (
+	"context"
+	"math/rand"
+	"os"
+	"path"
+	"strconv"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+)
+
+func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
+	const (
+		userID   = "1"
+		chunkLen = 13 * 3600 // in seconds
+	)
+
+	// put 100 chunks from 0 to 99
+	keys := []string{}
+	chunks := []chunk.Chunk{}
+	for i := 0; i < 100; i++ {
+		ts := model.TimeFromUnix(int64(i * chunkLen))
+		promChunk, _ := prom_chunk.New().Add(model.SamplePair{
+			Timestamp: ts,
+			Value:     model.SampleValue(i),
+		})
+		c := chunk.NewChunk(
+			userID,
+			model.Fingerprint(1),
+			model.Metric{
+				model.MetricNameLabel: "foo",
+				"bar": "baz",
+			},
+			promChunk[0],
+			ts,
+			ts.Add(chunkLen),
+		)
+
+		buf, err := c.Encode()
+		require.NoError(t, err)
+
+		key := c.ExternalKey()
+		err = cache.StoreChunk(context.Background(), key, buf)
+		require.NoError(t, err)
+
+		keys = append(keys, key)
+		chunks = append(chunks, c)
+	}
+
+	return keys, chunks
+}
+
+func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []chunk.Chunk) {
+	for i := 0; i < 100; i++ {
+		index := rand.Intn(len(keys))
+		key := keys[index]
+
+		found, bufs, missingKeys, err := cache.FetchChunkData(context.Background(), []string{key})
+		require.NoError(t, err)
+		require.Len(t, found, 1)
+		require.Len(t, bufs, 1)
+		require.Len(t, missingKeys, 0)
+
+		foundChunks, missing, err := chunk.ProcessCacheResponse([]chunk.Chunk{chunks[index]}, found, bufs)
+		require.NoError(t, err)
+		require.Empty(t, missing)
+		require.Equal(t, chunks[index], foundChunks[0])
+	}
+}
+
+func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []chunk.Chunk) {
+	// test getting them all
+	found, bufs, missingKeys, err := cache.FetchChunkData(context.Background(), keys)
+	require.NoError(t, err)
+	require.Len(t, found, len(keys))
+	require.Len(t, bufs, len(keys))
+	require.Len(t, missingKeys, 0)
+
+	foundChunks, missing, err := chunk.ProcessCacheResponse(chunks, found, bufs)
+	require.NoError(t, err)
+	require.Empty(t, missing)
+	require.Equal(t, chunks, foundChunks)
+}
+
+func testCacheMiss(t *testing.T, cache cache.Cache) {
+	for i := 0; i < 100; i++ {
+		key := strconv.Itoa(rand.Int())
+		found, bufs, missing, err := cache.FetchChunkData(context.Background(), []string{key})
+		require.NoError(t, err)
+		require.Empty(t, found)
+		require.Empty(t, bufs)
+		require.Len(t, missing, 1)
+	}
+}
+
+func testCache(t *testing.T, cache cache.Cache) {
+	keys, chunks := fillCache(t, cache)
+	testCacheSingle(t, cache, keys, chunks)
+	testCacheMultiple(t, cache, keys, chunks)
+	testCacheMiss(t, cache)
+}
+
+func TestMemcache(t *testing.T) {
+	cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache())
+	testCache(t, cache)
+}
+
+func TestDiskcache(t *testing.T) {
+	dirname := os.TempDir()
+	filename := path.Join(dirname, "diskcache")
+	defer os.RemoveAll(filename)
+
+	cache, err := cache.NewDiskcache(cache.DiskcacheConfig{
+		Path: filename,
+		Size: 100 * 1024 * 1024,
+	})
+	require.NoError(t, err)
+	testCache(t, cache)
+}
diff --git a/cache/diskcache.go b/cache/diskcache.go
new file mode 100644
index 0000000000000..d32683aa40ae6
--- /dev/null
+++ b/cache/diskcache.go
@@ -0,0 +1,160 @@
+package cache
+
+import (
+	"context"
+	"encoding/binary"
+	"flag"
+	"fmt"
+	"hash/fnv"
+	"os"
+	"sync"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/tsdb/fileutil"
+	"golang.org/x/sys/unix"
+)
+
+// TODO: in the future we could cuckoo hash or linear probe.
+
+// Buckets contain key (~50), chunks (1024) and their metadata (~100)
+const bucketSize = 2048
+
+// DiskcacheConfig for the Disk cache.
+type DiskcacheConfig struct {
+	Path string
+	Size int
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *DiskcacheConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Path, "diskcache.path", "/var/run/chunks", "Path to file used to cache chunks.")
+	f.IntVar(&cfg.Size, "diskcache.size", 1024*1024*1024, "Size of file (bytes)")
+}
+
+// Diskcache is an on-disk chunk cache.
+type Diskcache struct {
+	mtx     sync.RWMutex
+	f       *os.File
+	buckets uint32
+	buf     []byte
+}
+
+// NewDiskcache creates a new on-disk cache.
+func NewDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
+	f, err := os.OpenFile(cfg.Path, os.O_RDWR|os.O_CREATE, 0644)
+	if err != nil {
+		return nil, errors.Wrap(err, "open")
+	}
+
+	if err := fileutil.Preallocate(f, int64(cfg.Size), true); err != nil {
+		return nil, errors.Wrap(err, "preallocate")
+	}
+
+	info, err := f.Stat()
+	if err != nil {
+		return nil, errors.Wrap(err, "stat")
+	}
+
+	buf, err := unix.Mmap(int(f.Fd()), 0, int(info.Size()), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED)
+	if err != nil {
+		f.Close()
+		return nil, err
+	}
+
+	buckets := len(buf) / bucketSize
+
+	return &Diskcache{
+		f:       f,
+		buf:     buf,
+		buckets: uint32(buckets),
+	}, nil
+}
+
+// Stop closes the file.
+func (d *Diskcache) Stop() error {
+	if err := unix.Munmap(d.buf); err != nil {
+		return err
+	}
+	return d.f.Close()
+}
+
+// FetchChunkData get chunks from the cache.
+func (d *Diskcache) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+	for _, key := range keys {
+		buf, ok := d.fetch(key)
+		if ok {
+			found = append(found, key)
+			bufs = append(bufs, buf)
+		} else {
+			missed = append(missed, key)
+		}
+	}
+	return
+}
+
+func (d *Diskcache) fetch(key string) ([]byte, bool) {
+	d.mtx.RLock()
+	defer d.mtx.RUnlock()
+
+	bucket := hash(key) % d.buckets
+	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
+
+	existingKey, n, ok := get(buf, 0)
+	if !ok || string(existingKey) != key {
+		return nil, false
+	}
+
+	existingValue, _, ok := get(buf, n)
+	if !ok {
+		return nil, false
+	}
+
+	result := make([]byte, len(existingValue), len(existingValue))
+	copy(result, existingValue)
+	return result, true
+}
+
+// StoreChunk puts a chunk into the cache.
+func (d *Diskcache) StoreChunk(ctx context.Context, key string, value []byte) error {
+	d.mtx.Lock()
+	defer d.mtx.Unlock()
+
+	bucket := hash(key) % d.buckets
+	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
+
+	n, err := put([]byte(key), buf, 0)
+	if err != nil {
+		return err
+	}
+
+	_, err = put(value, buf, n)
+	if err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func put(value []byte, buf []byte, n int) (int, error) {
+	if len(value)+n+4 > len(buf) {
+		return 0, errors.Wrap(fmt.Errorf("value too big: %d > %d", len(value), len(buf)), "put")
+	}
+	m := binary.PutUvarint(buf[n:], uint64(len(value)))
+	copy(buf[n+m:], value)
+	return len(value) + n + m, nil
+}
+
+func get(buf []byte, n int) ([]byte, int, bool) {
+	size, m := binary.Uvarint(buf[n:])
+	end := n + m + int(size)
+	if end > len(buf) {
+		return nil, 0, false
+	}
+	return buf[n+m : end], end, true
+}
+
+func hash(key string) uint32 {
+	h := fnv.New32()
+	h.Write([]byte(key))
+	return h.Sum32()
+}
diff --git a/cache/instrumented.go b/cache/instrumented.go
new file mode 100644
index 0000000000000..2b582d2f0b36c
--- /dev/null
+++ b/cache/instrumented.go
@@ -0,0 +1,89 @@
+package cache
+
+import (
+	"context"
+
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+	"github.com/prometheus/client_golang/prometheus"
+	instr "github.com/weaveworks/common/instrument"
+)
+
+var (
+	requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "cache_request_duration_seconds",
+		Help:      "Total time spent in seconds doing cache requests.",
+		// Cache requests are very quick: smallest bucket is 16us, biggest is 1s.
+		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
+	}, []string{"method", "status_code"})
+
+	fetchedKeys = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "cache_fetched_keys",
+		Help:      "Total count of chunks requested from cache.",
+	}, []string{"name"})
+
+	hits = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "cache_hits",
+		Help:      "Total count of chunks found in cache.",
+	}, []string{"name"})
+)
+
+func init() {
+	prometheus.MustRegister(requestDuration)
+	prometheus.MustRegister(fetchedKeys)
+	prometheus.MustRegister(hits)
+}
+
+func instrument(name string, cache Cache) Cache {
+	return &instrumentedCache{
+		name:        name,
+		fetchedKeys: fetchedKeys.WithLabelValues(name),
+		hits:        hits.WithLabelValues(name),
+		Cache:       cache,
+	}
+}
+
+type instrumentedCache struct {
+	name              string
+	fetchedKeys, hits prometheus.Counter
+	Cache
+}
+
+func (i *instrumentedCache) StoreChunk(ctx context.Context, key string, buf []byte) error {
+	return instr.TimeRequestHistogram(ctx, i.name+".store", requestDuration, func(ctx context.Context) error {
+		return i.Cache.StoreChunk(ctx, key, buf)
+	})
+}
+
+func (i *instrumentedCache) FetchChunkData(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+	var (
+		found   []string
+		bufs    [][]byte
+		missing []string
+	)
+	err := instr.TimeRequestHistogram(ctx, i.name+".fetch", requestDuration, func(ctx context.Context) error {
+		sp := ot.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("chunks requested", len(keys)))
+
+		var err error
+		found, bufs, missing, err = i.Cache.FetchChunkData(ctx, keys)
+
+		if err == nil {
+			sp.LogFields(otlog.Int("chunks found", len(found)), otlog.Int("chunks missing", len(keys)-len(found)))
+		} else {
+			sp.LogFields(otlog.Error(err))
+		}
+
+		return err
+	})
+	i.fetchedKeys.Add(float64(len(keys)))
+	i.hits.Add(float64(len(found)))
+	return found, bufs, missing, err
+}
+
+func (i *instrumentedCache) Stop() error {
+	return i.Cache.Stop()
+}
diff --git a/cache/memcached.go b/cache/memcached.go
new file mode 100644
index 0000000000000..7cfb33d4c7140
--- /dev/null
+++ b/cache/memcached.go
@@ -0,0 +1,104 @@
+package cache
+
+import (
+	"context"
+	"flag"
+	"time"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/prometheus/client_golang/prometheus"
+	instr "github.com/weaveworks/common/instrument"
+)
+
+var (
+	memcacheRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "memcache_request_duration_seconds",
+		Help:      "Total time spent in seconds doing memcache requests.",
+		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
+		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
+	}, []string{"method", "status_code"})
+)
+
+func init() {
+	prometheus.MustRegister(memcacheRequestDuration)
+}
+
+// MemcachedConfig is config to make a Memcached
+type MemcachedConfig struct {
+	Expiration time.Duration
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *MemcachedConfig) RegisterFlags(f *flag.FlagSet) {
+	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long chunks stay in the memcache.")
+}
+
+// Memcached type caches chunks in memcached
+type Memcached struct {
+	cfg      MemcachedConfig
+	memcache MemcachedClient
+}
+
+// NewMemcached makes a new Memcache
+func NewMemcached(cfg MemcachedConfig, client MemcachedClient) *Memcached {
+	c := &Memcached{
+		cfg:      cfg,
+		memcache: client,
+	}
+	return c
+}
+
+func memcacheStatusCode(err error) string {
+	// See https://godoc.org/github.com/bradfitz/gomemcache/memcache#pkg-variables
+	switch err {
+	case nil:
+		return "200"
+	case memcache.ErrCacheMiss:
+		return "404"
+	case memcache.ErrMalformedKey:
+		return "400"
+	default:
+		return "500"
+	}
+}
+
+// FetchChunkData gets chunks from the chunk cache.
+func (c *Memcached) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+	var items map[string]*memcache.Item
+	err = instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		var err error
+		items, err = c.memcache.GetMulti(keys)
+		return err
+	})
+	if err != nil {
+		return
+	}
+	for _, key := range keys {
+		item, ok := items[key]
+		if ok {
+			found = append(found, key)
+			bufs = append(bufs, item.Value)
+		} else {
+			missed = append(missed, key)
+		}
+	}
+	return
+}
+
+// StoreChunk serializes and stores a chunk in the chunk cache.
+func (c *Memcached) StoreChunk(ctx context.Context, key string, buf []byte) error {
+	return instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		item := memcache.Item{
+			Key:        key,
+			Value:      buf,
+			Expiration: int32(c.cfg.Expiration.Seconds()),
+		}
+		return c.memcache.Set(&item)
+	})
+}
+
+// Stop does nothing.
+func (*Memcached) Stop() error {
+	return nil
+}
diff --git a/memcache_client.go b/cache/memcached_client.go
similarity index 75%
rename from memcache_client.go
rename to cache/memcached_client.go
index f88556479ba5c..43a52fffab348 100644
--- a/memcache_client.go
+++ b/cache/memcached_client.go
@@ -1,4 +1,4 @@
-package chunk
+package cache
 
 import (
 	"flag"
@@ -13,9 +13,15 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
-// MemcacheClient is a memcache client that gets its server list from SRV
+// MemcachedClient interface exists for mocking memcacheClient.
+type MemcachedClient interface {
+	GetMulti(keys []string) (map[string]*memcache.Item, error)
+	Set(item *memcache.Item) error
+}
+
+// memcachedClient is a memcache client that gets its server list from SRV
 // records, and periodically updates that ServerList.
-type MemcacheClient struct {
+type memcachedClient struct {
 	*memcache.Client
 	serverList *memcache.ServerList
 	hostname   string
@@ -25,8 +31,8 @@ type MemcacheClient struct {
 	wait sync.WaitGroup
 }
 
-// MemcacheConfig defines how a MemcacheClient should be constructed.
-type MemcacheConfig struct {
+// MemcachedClientConfig defines how a MemcachedClient should be constructed.
+type MemcachedClientConfig struct {
 	Host           string
 	Service        string
 	Timeout        time.Duration
@@ -34,21 +40,21 @@ type MemcacheConfig struct {
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *MemcacheConfig) RegisterFlags(f *flag.FlagSet) {
+func (cfg *MemcachedClientConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Host, "memcached.hostname", "", "Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
 	f.StringVar(&cfg.Service, "memcached.service", "memcached", "SRV service used to discover memcache servers.")
 	f.DurationVar(&cfg.Timeout, "memcached.timeout", 100*time.Millisecond, "Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, "memcached.update-interval", 1*time.Minute, "Period with which to poll DNS for memcache servers.")
 }
 
-// NewMemcacheClient creates a new MemcacheClient that gets its server list
+// newMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
-func NewMemcacheClient(cfg MemcacheConfig) *MemcacheClient {
+func newMemcachedClient(cfg MemcachedClientConfig) *memcachedClient {
 	var servers memcache.ServerList
 	client := memcache.NewFromSelector(&servers)
 	client.Timeout = cfg.Timeout
 
-	newClient := &MemcacheClient{
+	newClient := &memcachedClient{
 		Client:     client,
 		serverList: &servers,
 		hostname:   cfg.Host,
@@ -66,12 +72,12 @@ func NewMemcacheClient(cfg MemcacheConfig) *MemcacheClient {
 }
 
 // Stop the memcache client.
-func (c *MemcacheClient) Stop() {
+func (c *memcachedClient) Stop() {
 	close(c.quit)
 	c.wait.Wait()
 }
 
-func (c *MemcacheClient) updateLoop(updateInterval time.Duration) error {
+func (c *memcachedClient) updateLoop(updateInterval time.Duration) error {
 	defer c.wait.Done()
 	ticker := time.NewTicker(updateInterval)
 	var err error
@@ -90,7 +96,7 @@ func (c *MemcacheClient) updateLoop(updateInterval time.Duration) error {
 
 // updateMemcacheServers sets a memcache server list from SRV records. SRV
 // priority & weight are ignored.
-func (c *MemcacheClient) updateMemcacheServers() error {
+func (c *memcachedClient) updateMemcacheServers() error {
 	_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
 	if err != nil {
 		return err
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
new file mode 100644
index 0000000000000..028fba8ef4609
--- /dev/null
+++ b/cache/memcached_test.go
@@ -0,0 +1,39 @@
+package cache_test
+
+import (
+	"sync"
+
+	"github.com/bradfitz/gomemcache/memcache"
+)
+
+type mockMemcache struct {
+	sync.RWMutex
+	contents map[string][]byte
+}
+
+func newMockMemcache() *mockMemcache {
+	return &mockMemcache{
+		contents: map[string][]byte{},
+	}
+}
+
+func (m *mockMemcache) GetMulti(keys []string) (map[string]*memcache.Item, error) {
+	m.RLock()
+	defer m.RUnlock()
+	result := map[string]*memcache.Item{}
+	for _, k := range keys {
+		if c, ok := m.contents[k]; ok {
+			result[k] = &memcache.Item{
+				Value: c,
+			}
+		}
+	}
+	return result, nil
+}
+
+func (m *mockMemcache) Set(item *memcache.Item) error {
+	m.Lock()
+	defer m.Unlock()
+	m.contents[item.Key] = item.Value
+	return nil
+}
diff --git a/cache/tiered.go b/cache/tiered.go
new file mode 100644
index 0000000000000..9151e3afa74e3
--- /dev/null
+++ b/cache/tiered.go
@@ -0,0 +1,65 @@
+package cache
+
+import "context"
+
+type tiered []Cache
+
+// NewTiered makes a new tiered cache.
+func NewTiered(caches []Cache) Cache {
+	return tiered(caches)
+}
+
+func (t tiered) StoreChunk(ctx context.Context, key string, buf []byte) error {
+	for _, c := range []Cache(t) {
+		if err := c.StoreChunk(ctx, key, buf); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (t tiered) FetchChunkData(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+	found := make(map[string][]byte, len(keys))
+	missing := keys
+	previousCaches := make([]Cache, 0, len(t))
+
+	for _, c := range []Cache(t) {
+		var (
+			err      error
+			passKeys []string
+			passBufs [][]byte
+		)
+
+		passKeys, passBufs, missing, err = c.FetchChunkData(ctx, missing)
+		if err != nil {
+			return nil, nil, nil, err
+		}
+
+		for i, key := range passKeys {
+			found[key] = passBufs[i]
+			tiered(previousCaches).StoreChunk(ctx, key, passBufs[i])
+		}
+
+		previousCaches = append(previousCaches, c)
+	}
+
+	resultKeys := make([]string, 0, len(found))
+	resultBufs := make([][]byte, 0, len(found))
+	for _, key := range keys {
+		if buf, ok := found[key]; ok {
+			resultKeys = append(resultKeys, key)
+			resultBufs = append(resultBufs, buf)
+		}
+	}
+
+	return resultKeys, resultBufs, missing, nil
+}
+
+func (t tiered) Stop() error {
+	for _, c := range []Cache(t) {
+		if err := c.Stop(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
new file mode 100644
index 0000000000000..32cc3b6bc4672
--- /dev/null
+++ b/cache/tiered_test.go
@@ -0,0 +1,37 @@
+package cache_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+)
+
+func TestTieredSimple(t *testing.T) {
+	for i := 1; i < 10; i++ {
+		caches := []cache.Cache{}
+		for j := 0; j <= i; j++ {
+			caches = append(caches, newMockCache())
+		}
+		cache := cache.NewTiered(caches)
+		testCache(t, cache)
+	}
+}
+
+func TestTiered(t *testing.T) {
+	level1, level2 := newMockCache(), newMockCache()
+	cache := cache.NewTiered([]cache.Cache{level1, level2})
+
+	err := level1.StoreChunk(context.Background(), "key1", []byte("hello"))
+	require.NoError(t, err)
+
+	err = level2.StoreChunk(context.Background(), "key2", []byte("world"))
+	require.NoError(t, err)
+
+	keys, bufs, missing, err := cache.FetchChunkData(context.Background(), []string{"key1", "key2", "key3"})
+	require.NoError(t, err)
+	require.Equal(t, []string{"key1", "key2"}, keys)
+	require.Equal(t, [][]byte{[]byte("hello"), []byte("world")}, bufs)
+	require.Equal(t, []string{"key3"}, missing)
+}
diff --git a/chunk_cache.go b/chunk_cache.go
deleted file mode 100644
index 3c5ae9459bf2a..0000000000000
--- a/chunk_cache.go
+++ /dev/null
@@ -1,231 +0,0 @@
-package chunk
-
-import (
-	"context"
-	"flag"
-	"sync"
-	"time"
-
-	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/go-kit/kit/log/level"
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/weaveworks/common/instrument"
-
-	"github.com/weaveworks/cortex/pkg/util"
-)
-
-var (
-	memcacheRequests = prometheus.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "memcache_requests_total",
-		Help:      "Total count of chunks requested from memcache.",
-	})
-
-	memcacheHits = prometheus.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "memcache_hits_total",
-		Help:      "Total count of chunks found in memcache.",
-	})
-
-	memcacheCorrupt = prometheus.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "memcache_corrupt_chunks_total",
-		Help:      "Total count of corrupt chunks found in memcache.",
-	})
-
-	memcacheDroppedWriteBack = prometheus.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "memcache_dropped_write_back",
-		Help:      "Total count of dropped write backs to memcache.",
-	})
-
-	memcacheRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "memcache_request_duration_seconds",
-		Help:      "Total time spent in seconds doing memcache requests.",
-		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
-		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code"})
-)
-
-func init() {
-	prometheus.MustRegister(memcacheRequests)
-	prometheus.MustRegister(memcacheHits)
-	prometheus.MustRegister(memcacheCorrupt)
-	prometheus.MustRegister(memcacheDroppedWriteBack)
-	prometheus.MustRegister(memcacheRequestDuration)
-}
-
-// Memcache caches things
-type Memcache interface {
-	GetMulti(keys []string) (map[string]*memcache.Item, error)
-	Set(item *memcache.Item) error
-}
-
-// CacheConfig is config to make a Cache
-type CacheConfig struct {
-	Expiration          time.Duration
-	WriteBackGoroutines int
-	WriteBackBuffer     int
-	memcacheConfig      MemcacheConfig
-}
-
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *CacheConfig) RegisterFlags(f *flag.FlagSet) {
-	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long chunks stay in the memcache.")
-	f.IntVar(&cfg.WriteBackGoroutines, "memcache.write-back-goroutines", 10, "How many goroutines to use to write back to memcache.")
-	f.IntVar(&cfg.WriteBackBuffer, "memcache.write-back-buffer", 10000, "How many chunks to buffer for background write back.")
-	cfg.memcacheConfig.RegisterFlags(f)
-}
-
-// Cache type caches chunks
-type Cache struct {
-	cfg      CacheConfig
-	memcache Memcache
-
-	wg       sync.WaitGroup
-	quit     chan struct{}
-	bgWrites chan backgroundWrite
-}
-
-type backgroundWrite struct {
-	key string
-	buf []byte
-}
-
-// NewCache makes a new Cache
-func NewCache(cfg CacheConfig) *Cache {
-	var memcache Memcache
-	if cfg.memcacheConfig.Host != "" {
-		memcache = NewMemcacheClient(cfg.memcacheConfig)
-	}
-	c := &Cache{
-		cfg:      cfg,
-		memcache: memcache,
-		quit:     make(chan struct{}),
-		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
-	}
-	c.wg.Add(cfg.WriteBackGoroutines)
-	for i := 0; i < cfg.WriteBackGoroutines; i++ {
-		go c.writeBackLoop()
-	}
-	return c
-}
-
-// Stop the background flushing goroutines.
-func (c *Cache) Stop() {
-	close(c.quit)
-	c.wg.Wait()
-}
-
-func memcacheStatusCode(err error) string {
-	// See https://godoc.org/github.com/bradfitz/gomemcache/memcache#pkg-variables
-	switch err {
-	case nil:
-		return "200"
-	case memcache.ErrCacheMiss:
-		return "404"
-	case memcache.ErrMalformedKey:
-		return "400"
-	default:
-		return "500"
-	}
-}
-
-// FetchChunkData gets chunks from the chunk cache.
-func (c *Cache) FetchChunkData(ctx context.Context, chunks []Chunk) (found []Chunk, missing []Chunk, err error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "FetchChunkData")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
-
-	if c.memcache == nil {
-		return nil, chunks, nil
-	}
-
-	memcacheRequests.Add(float64(len(chunks)))
-
-	keys := make([]string, 0, len(chunks))
-	for _, chunk := range chunks {
-		keys = append(keys, chunk.ExternalKey())
-	}
-
-	var items map[string]*memcache.Item
-	err = instrument.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
-		var err error
-		items, err = c.memcache.GetMulti(keys)
-		return err
-	})
-	if err != nil {
-		return nil, chunks, err
-	}
-
-	sp.LogFields(otlog.Int("chunks returned", len(items)))
-	decodeContext := NewDecodeContext()
-	for i, externalKey := range keys {
-		item, ok := items[externalKey]
-		if !ok {
-			missing = append(missing, chunks[i])
-			continue
-		}
-
-		if err := chunks[i].Decode(decodeContext, item.Value); err != nil {
-			memcacheCorrupt.Inc()
-			level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "failed to decode chunk from cache", "err", err)
-			missing = append(missing, chunks[i])
-			continue
-		}
-
-		found = append(found, chunks[i])
-	}
-	sp.LogFields(otlog.Int("chunks found", len(found)), otlog.Int("chunks missing", len(missing)))
-
-	memcacheHits.Add(float64(len(found)))
-	return found, missing, nil
-}
-
-// StoreChunk serializes and stores a chunk in the chunk cache.
-func (c *Cache) StoreChunk(ctx context.Context, key string, buf []byte) error {
-	if c.memcache == nil {
-		return nil
-	}
-
-	return instrument.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
-		item := memcache.Item{
-			Key:        key,
-			Value:      buf,
-			Expiration: int32(c.cfg.Expiration.Seconds()),
-		}
-		return c.memcache.Set(&item)
-	})
-}
-
-// BackgroundWrite writes chunks for the cache in the background
-func (c *Cache) BackgroundWrite(key string, buf []byte) {
-	bgWrite := backgroundWrite{
-		key: key,
-		buf: buf,
-	}
-	select {
-	case c.bgWrites <- bgWrite:
-	default:
-		memcacheDroppedWriteBack.Inc()
-	}
-}
-
-func (c *Cache) writeBackLoop() {
-	defer c.wg.Done()
-
-	for {
-		select {
-		case bgWrite := <-c.bgWrites:
-			err := c.StoreChunk(context.Background(), bgWrite.key, bgWrite.buf)
-			if err != nil {
-				level.Error(util.Logger).Log("msg", "error writing to memcache", "err", err)
-			}
-		case <-c.quit:
-			return
-		}
-	}
-}
diff --git a/chunk_cache_test.go b/chunk_cache_test.go
deleted file mode 100644
index 09144ce5826e8..0000000000000
--- a/chunk_cache_test.go
+++ /dev/null
@@ -1,114 +0,0 @@
-package chunk
-
-import (
-	"math/rand"
-	"sync"
-	"testing"
-
-	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/prometheus/common/model"
-	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"golang.org/x/net/context"
-)
-
-type mockMemcache struct {
-	sync.RWMutex
-	contents map[string][]byte
-}
-
-func newMockMemcache() *mockMemcache {
-	return &mockMemcache{
-		contents: map[string][]byte{},
-	}
-}
-
-func (m *mockMemcache) GetMulti(keys []string) (map[string]*memcache.Item, error) {
-	m.RLock()
-	defer m.RUnlock()
-	result := map[string]*memcache.Item{}
-	for _, k := range keys {
-		if c, ok := m.contents[k]; ok {
-			result[k] = &memcache.Item{
-				Value: c,
-			}
-		}
-	}
-	return result, nil
-}
-
-func (m *mockMemcache) Set(item *memcache.Item) error {
-	m.Lock()
-	defer m.Unlock()
-	m.contents[item.Key] = item.Value
-	return nil
-}
-
-func TestChunkCache(t *testing.T) {
-	c := Cache{
-		memcache: newMockMemcache(),
-	}
-
-	const (
-		chunkLen = 13 * 3600 // in seconds
-	)
-
-	// put 100 chunks from 0 to 99
-	keys := []string{}
-	chunks := []Chunk{}
-	for i := 0; i < 100; i++ {
-		ts := model.TimeFromUnix(int64(i * chunkLen))
-		promChunk, _ := chunk.New().Add(model.SamplePair{
-			Timestamp: ts,
-			Value:     model.SampleValue(i),
-		})
-		chunk := NewChunk(
-			userID,
-			model.Fingerprint(1),
-			model.Metric{
-				model.MetricNameLabel: "foo",
-				"bar": "baz",
-			},
-			promChunk[0],
-			ts,
-			ts.Add(chunkLen),
-		)
-
-		buf, err := chunk.Encode()
-		require.NoError(t, err)
-
-		key := chunk.ExternalKey()
-		err = c.StoreChunk(context.Background(), key, buf)
-		require.NoError(t, err)
-
-		keys = append(keys, key)
-		chunks = append(chunks, chunk)
-	}
-
-	for i := 0; i < 100; i++ {
-		index := rand.Intn(len(keys))
-		key := keys[index]
-
-		chunk, err := parseExternalKey(userID, key)
-		require.NoError(t, err)
-
-		found, missing, err := c.FetchChunkData(context.Background(), []Chunk{chunk})
-		require.NoError(t, err)
-		require.Empty(t, missing)
-		require.Len(t, found, 1)
-		require.Equal(t, chunks[index], found[0])
-	}
-
-	// test getting them all
-	receivedChunks := []Chunk{}
-	for i := 0; i < len(keys); i++ {
-		chunk, err := parseExternalKey(userID, keys[i])
-		require.NoError(t, err)
-		receivedChunks = append(receivedChunks, chunk)
-	}
-	found, missing, err := c.FetchChunkData(context.Background(), receivedChunks)
-	require.NoError(t, err)
-	require.Empty(t, missing)
-	require.Len(t, found, len(keys))
-	require.Equal(t, chunks, receivedChunks)
-}
diff --git a/chunk_store.go b/chunk_store.go
index ac421f86309b7..b57b53caff034 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -14,6 +14,7 @@ import (
 	"github.com/prometheus/prometheus/promql"
 
 	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -33,16 +34,22 @@ var (
 		},
 		HashBuckets: 1024,
 	})
+	cacheCorrupt = prometheus.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "cache_corrupt_chunks_total",
+		Help:      "Total count of corrupt chunks found in cache.",
+	})
 )
 
 func init() {
 	prometheus.MustRegister(indexEntriesPerChunk)
 	prometheus.MustRegister(rowWrites)
+	prometheus.MustRegister(cacheCorrupt)
 }
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	CacheConfig
+	CacheConfig cache.Config
 
 	// For injecting different schemas in tests.
 	schemaFactory func(cfg SchemaConfig) Schema
@@ -58,7 +65,7 @@ type Store struct {
 	cfg StoreConfig
 
 	storage StorageClient
-	cache   *Cache
+	cache   cache.Cache
 	schema  Schema
 }
 
@@ -75,11 +82,16 @@ func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (*
 		return nil, err
 	}
 
+	cache, err := cache.New(cfg.CacheConfig)
+	if err != nil {
+		return nil, err
+	}
+
 	return &Store{
 		cfg:     cfg,
 		storage: storage,
 		schema:  schema,
-		cache:   NewCache(cfg.CacheConfig),
+		cache:   cache,
 	}, nil
 }
 
@@ -196,15 +208,22 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 
 	// Filter out chunks that are not in the selected time range.
 	filtered := make([]Chunk, 0, len(chunks))
+	keys := make([]string, 0, len(chunks))
 	for _, chunk := range chunks {
 		if chunk.Through < from || through < chunk.From {
 			continue
 		}
 		filtered = append(filtered, chunk)
+		keys = append(keys, chunk.ExternalKey())
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
-	fromCache, missing, err := c.cache.FetchChunkData(ctx, filtered)
+	cacheHits, cacheBufs, _, err := c.cache.FetchChunkData(ctx, keys)
+	if err != nil {
+		level.Warn(logger).Log("msg", "error fetching from cache", "err", err)
+	}
+
+	fromCache, missing, err := ProcessCacheResponse(filtered, cacheHits, cacheBufs)
 	if err != nil {
 		level.Warn(logger).Log("msg", "error fetching from cache", "err", err)
 	}
@@ -240,6 +259,41 @@ outer:
 	return filteredChunks, nil
 }
 
+// ProcessCacheResponse decodes the chunks coming back from the cache, separating
+// hits and misses.
+func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found []Chunk, missing []Chunk, err error) {
+	decodeContext := NewDecodeContext()
+
+	i, j := 0, 0
+	for i < len(chunks) && j < len(keys) {
+		chunkKey := chunks[i].ExternalKey()
+
+		if chunkKey < keys[j] {
+			missing = append(missing, chunks[i])
+			i++
+		} else if chunkKey > keys[j] {
+			level.Debug(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
+			j++
+		} else {
+			chunk := chunks[i]
+			err = chunk.Decode(decodeContext, bufs[j])
+			if err != nil {
+				cacheCorrupt.Inc()
+				return
+			}
+			found = append(found, chunk)
+			i++
+			j++
+		}
+	}
+
+	for ; i < len(chunks); i++ {
+		missing = append(missing, chunks[i])
+	}
+
+	return
+}
+
 func (c *Store) getSeriesMatrix(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) (model.Matrix, error) {
 	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
@@ -466,13 +520,15 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 	return unique(chunkSet), nil
 }
 
-func (c *Store) writeBackCache(_ context.Context, chunks []Chunk) error {
+func (c *Store) writeBackCache(ctx context.Context, chunks []Chunk) error {
 	for i := range chunks {
 		encoded, err := chunks[i].Encode()
 		if err != nil {
 			return err
 		}
-		c.cache.BackgroundWrite(chunks[i].ExternalKey(), encoded)
+		if err := c.cache.StoreChunk(ctx, chunks[i].ExternalKey(), encoded); err != nil {
+			return err
+		}
 	}
 	return nil
 }

From 18f3af6b775e381019e842da9bf3ca05e8c589a1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 22 Feb 2018 13:29:56 +0000
Subject: [PATCH 075/660] Register cache background flags (#721)

---
 cache/cache.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cache/cache.go b/cache/cache.go
index 815f5c949be0e..cb832a02cd3b1 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -26,6 +26,7 @@ type Config struct {
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.EnableDiskcache, "cache.enable-diskcache", false, "Enable on-disk cache")
 
+	cfg.background.RegisterFlags(f)
 	cfg.memcache.RegisterFlags(f)
 	cfg.memcacheClient.RegisterFlags(f)
 	cfg.diskcache.RegisterFlags(f)

From cf84225b42fed76e16b5b26f9e9cbb26659aef8b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 27 Feb 2018 14:12:29 +0000
Subject: [PATCH 076/660] Move AWS storage implementations into pkg/chunk/aws
 (#705)

* Move AWS-related code into chunk/aws package.

This promotes decoupling and allows us to start reusing some the more generic chunk storage tests against the other backends.

Also:
- Set up test fixtures for the BigTable backend.
- Honor context cancellation in bigtable.GetChunks.
- Ensure test only asks for given chunks once per batch.
- Some renaming in the AWS pkg to remove the stutter.

* Update vendored bigtable client, use dep 0.4.1
(dep now removes a lot more files, e.g. tests)
---
 .../dynamodb_table_client.go                  |  15 +-
 aws/dynamodb_table_client_test.go             | 644 ++++++++++++++++
 aws/fixtures.go                               |  85 +++
 aws_storage_client_test.go => aws/mock.go     | 300 +-------
 .../storage_client.go                         | 101 +--
 aws/storage_client_test.go                    | 197 +++++
 chunk.go                                      |   4 +-
 chunk_store.go                                |   2 +-
 chunk_store_test.go                           |   2 +-
 chunk_test.go                                 |   4 +-
 gcp/fixtures.go                               |  82 ++
 gcp/storage_client.go                         |  21 +-
 schema_config.go                              |  29 +-
 schema_test.go                                |   2 +-
 storage/factory.go                            |  13 +-
 storage/storage_client_test.go                | 117 +++
 storage_client.go                             |   7 +
 table_client.go                               |   2 +-
 table_manager.go                              |  47 +-
 table_manager_test.go                         | 720 ++----------------
 20 files changed, 1341 insertions(+), 1053 deletions(-)
 rename dynamodb_table_client.go => aws/dynamodb_table_client.go (97%)
 create mode 100644 aws/dynamodb_table_client_test.go
 create mode 100644 aws/fixtures.go
 rename aws_storage_client_test.go => aws/mock.go (60%)
 rename aws_storage_client.go => aws/storage_client.go (90%)
 create mode 100644 aws/storage_client_test.go
 create mode 100644 gcp/fixtures.go
 create mode 100644 storage/storage_client_test.go

diff --git a/dynamodb_table_client.go b/aws/dynamodb_table_client.go
similarity index 97%
rename from dynamodb_table_client.go
rename to aws/dynamodb_table_client.go
index cc1f5b4a12f1d..6d8bb495ce69e 100644
--- a/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -1,4 +1,4 @@
-package chunk
+package aws
 
 import (
 	"context"
@@ -15,6 +15,7 @@ import (
 	"golang.org/x/time/rate"
 
 	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -39,7 +40,7 @@ type dynamoTableClient struct {
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
-func NewDynamoDBTableClient(cfg DynamoDBConfig) (TableClient, error) {
+func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
@@ -97,7 +98,7 @@ func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	return table, err
 }
 
-func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) error {
+func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	var tableARN *string
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
@@ -161,7 +162,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc TableDesc) erro
 	return nil
 }
 
-func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error) {
+func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
 	var tableARN *string
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
@@ -241,7 +242,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 	return
 }
 
-func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected TableDesc) error {
+func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	var err error
 	if !current.WriteScale.Enabled {
 		if expected.WriteScale.Enabled {
@@ -305,7 +306,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected Ta
 	return nil
 }
 
-func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc TableDesc) error {
+func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
 	// Registers or updates a scalable target
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
@@ -351,7 +352,7 @@ func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc TableDesc
 	})
 }
 
-func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc TableDesc) error {
+func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
 	// Deregister scalable target
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
new file mode 100644
index 0000000000000..66dcea5101e14
--- /dev/null
+++ b/aws/dynamodb_table_client_test.go
@@ -0,0 +1,644 @@
+package aws
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/request"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/mtime"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	tablePrefix      = "cortex_"
+	chunkTablePrefix = "chunks_"
+	tablePeriod      = 7 * 24 * time.Hour
+	gracePeriod      = 15 * time.Minute
+	maxChunkAge      = 12 * time.Hour
+	inactiveWrite    = 1
+	inactiveRead     = 2
+	write            = 200
+	read             = 100
+)
+
+func TestTableManagerAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	applicationAutoScaling := newMockApplicationAutoScaling()
+	client := dynamoTableClient{
+		DynamoDB:               dynamoDB,
+		ApplicationAutoScaling: applicationAutoScaling,
+	}
+
+	test := func(tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.SyncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			err := chunk.ExpectTables(ctx, client, expected)
+			require.NoError(t, err)
+		})
+	}
+
+	cfg := chunk.SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: chunk.PeriodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			WriteScale: chunk.AutoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+		},
+
+		ChunkTables: chunk.PeriodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			WriteScale: chunk.AutoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+		},
+
+		CreationGracePeriod: gracePeriod,
+	}
+
+	// Check tables are created with autoscale
+	{
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Create tables",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are updated with new settings
+	{
+		cfg.IndexTables.WriteScale.OutCooldown = 200
+		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 200,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 90.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are degristered when autoscaling is disabled for inactive tables
+	{
+		cfg.IndexTables.WriteScale.OutCooldown = 200
+		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 200,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 90.0,
+					},
+				},
+			},
+		)
+	}
+
+	// Check tables are degristered when autoscaling is disabled entirely
+	{
+		cfg.IndexTables.WriteScale.Enabled = false
+		cfg.ChunkTables.WriteScale.Enabled = false
+
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Update tables with new settings",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled: false,
+					},
+				},
+			},
+		)
+	}
+}
+
+func TestTableManagerInactiveAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	applicationAutoScaling := newMockApplicationAutoScaling()
+	client := dynamoTableClient{
+		DynamoDB:               dynamoDB,
+		ApplicationAutoScaling: applicationAutoScaling,
+	}
+
+	test := func(tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
+		t.Run(name, func(t *testing.T) {
+			ctx := context.Background()
+			mtime.NowForce(tm)
+			if err := tableManager.SyncTables(ctx); err != nil {
+				t.Fatal(err)
+			}
+			err := chunk.ExpectTables(ctx, client, expected)
+			require.NoError(t, err)
+		})
+	}
+
+	cfg := chunk.SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: chunk.PeriodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale: chunk.AutoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+			InactiveWriteScaleLastN: 2,
+		},
+
+		ChunkTables: chunk.PeriodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale: chunk.AutoScalingConfig{
+				Enabled:     true,
+				MinCapacity: 10,
+				MaxCapacity: 20,
+				OutCooldown: 100,
+				InCooldown:  100,
+				TargetValue: 80.0,
+			},
+			InactiveWriteScaleLastN: 2,
+		},
+
+		CreationGracePeriod: gracePeriod,
+	}
+
+	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
+	{
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"Legacy and latest tables",
+			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+
+	// Check inactive tables are autoscaled even if there are less than the limit.
+	{
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"1 week of inactive tables with latest",
+			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+
+	// Check inactive tables past the limit do not autoscale but the latest N do.
+	{
+		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(
+			tableManager,
+			"3 weeks of inactive tables with latest",
+			time.Unix(0, 0).Add(tablePeriod*3).Add(maxChunkAge).Add(gracePeriod),
+			[]chunk.TableDesc{
+				{
+					Name:             "",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             chunkTablePrefix + "0",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+				},
+				{
+					Name:             tablePrefix + "1",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "1",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "2",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             chunkTablePrefix + "2",
+					ProvisionedRead:  inactiveRead,
+					ProvisionedWrite: inactiveWrite,
+					WriteScale: chunk.AutoScalingConfig{
+						Enabled:     true,
+						MinCapacity: 10,
+						MaxCapacity: 20,
+						OutCooldown: 100,
+						InCooldown:  100,
+						TargetValue: 80.0,
+					},
+				},
+				{
+					Name:             tablePrefix + "3",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+				{
+					Name:             chunkTablePrefix + "3",
+					ProvisionedRead:  read,
+					ProvisionedWrite: write,
+				},
+			},
+		)
+	}
+}
+
+type mockApplicationAutoScalingClient struct {
+	applicationautoscalingiface.ApplicationAutoScalingAPI
+
+	scalableTargets map[string]mockScalableTarget
+	scalingPolicies map[string]mockScalingPolicy
+}
+
+type mockScalableTarget struct {
+	RoleARN     string
+	MinCapacity int64
+	MaxCapacity int64
+}
+
+type mockScalingPolicy struct {
+	ScaleInCooldown  int64
+	ScaleOutCooldown int64
+	TargetValue      float64
+}
+
+func newMockApplicationAutoScaling() *mockApplicationAutoScalingClient {
+	return &mockApplicationAutoScalingClient{
+		scalableTargets: map[string]mockScalableTarget{},
+		scalingPolicies: map[string]mockScalingPolicy{},
+	}
+}
+
+func (m *mockApplicationAutoScalingClient) RegisterScalableTarget(input *applicationautoscaling.RegisterScalableTargetInput) (*applicationautoscaling.RegisterScalableTargetOutput, error) {
+	m.scalableTargets[*input.ResourceId] = mockScalableTarget{
+		RoleARN:     *input.RoleARN,
+		MinCapacity: *input.MinCapacity,
+		MaxCapacity: *input.MaxCapacity,
+	}
+	return &applicationautoscaling.RegisterScalableTargetOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DeregisterScalableTarget(input *applicationautoscaling.DeregisterScalableTargetInput) (*applicationautoscaling.DeregisterScalableTargetOutput, error) {
+	delete(m.scalableTargets, *input.ResourceId)
+	return &applicationautoscaling.DeregisterScalableTargetOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DescribeScalableTargetsWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalableTargetsInput, options ...request.Option) (*applicationautoscaling.DescribeScalableTargetsOutput, error) {
+	scalableTarget, ok := m.scalableTargets[*input.ResourceIds[0]]
+	if !ok {
+		return &applicationautoscaling.DescribeScalableTargetsOutput{}, nil
+	}
+	return &applicationautoscaling.DescribeScalableTargetsOutput{
+		ScalableTargets: []*applicationautoscaling.ScalableTarget{
+			{
+				RoleARN:     aws.String(scalableTarget.RoleARN),
+				MinCapacity: aws.Int64(scalableTarget.MinCapacity),
+				MaxCapacity: aws.Int64(scalableTarget.MaxCapacity),
+			},
+		},
+	}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) PutScalingPolicy(input *applicationautoscaling.PutScalingPolicyInput) (*applicationautoscaling.PutScalingPolicyOutput, error) {
+	m.scalingPolicies[*input.ResourceId] = mockScalingPolicy{
+		ScaleInCooldown:  *input.TargetTrackingScalingPolicyConfiguration.ScaleInCooldown,
+		ScaleOutCooldown: *input.TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown,
+		TargetValue:      *input.TargetTrackingScalingPolicyConfiguration.TargetValue,
+	}
+	return &applicationautoscaling.PutScalingPolicyOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DeleteScalingPolicy(input *applicationautoscaling.DeleteScalingPolicyInput) (*applicationautoscaling.DeleteScalingPolicyOutput, error) {
+	delete(m.scalingPolicies, *input.ResourceId)
+	return &applicationautoscaling.DeleteScalingPolicyOutput{}, nil
+}
+
+func (m *mockApplicationAutoScalingClient) DescribeScalingPoliciesWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalingPoliciesInput, options ...request.Option) (*applicationautoscaling.DescribeScalingPoliciesOutput, error) {
+	scalingPolicy, ok := m.scalingPolicies[*input.ResourceId]
+	if !ok {
+		return &applicationautoscaling.DescribeScalingPoliciesOutput{}, nil
+	}
+	return &applicationautoscaling.DescribeScalingPoliciesOutput{
+		ScalingPolicies: []*applicationautoscaling.ScalingPolicy{
+			{
+				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
+					ScaleInCooldown:  aws.Int64(scalingPolicy.ScaleInCooldown),
+					ScaleOutCooldown: aws.Int64(scalingPolicy.ScaleOutCooldown),
+					TargetValue:      aws.Float64(scalingPolicy.TargetValue),
+				},
+			},
+		},
+	}, nil
+}
diff --git a/aws/fixtures.go b/aws/fixtures.go
new file mode 100644
index 0000000000000..7665b6d86aa41
--- /dev/null
+++ b/aws/fixtures.go
@@ -0,0 +1,85 @@
+package aws
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+type fixture struct {
+	name    string
+	clients func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error)
+}
+
+func (f fixture) Name() string {
+	return f.name
+}
+
+func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	return f.clients()
+}
+
+func (f fixture) Teardown() error {
+	return nil
+}
+
+// Fixtures for testing the various configuration of AWS storage.
+var Fixtures = []chunk.Fixture{
+	fixture{
+		name: "S3 chunks",
+		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+			schemaConfig := chunk.SchemaConfig{} // Defaults == S3
+			dynamoDB := newMockDynamoDB(0, 0)
+			table := &dynamoTableClient{
+				DynamoDB: dynamoDB,
+			}
+			storage := &storageClient{
+				DynamoDB:                dynamoDB,
+				S3:                      newMockS3(),
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+				schemaCfg:               schemaConfig,
+			}
+			return storage, table, schemaConfig, nil
+		},
+	},
+	dynamoDBFixture(0, 10, 20),
+	dynamoDBFixture(0, 0, 20),
+	dynamoDBFixture(2, 10, 20),
+}
+
+func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) chunk.Fixture {
+	return fixture{
+		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
+			provisionedErr, gangsize, maxParallelism),
+		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+			dynamoDB := newMockDynamoDB(0, provisionedErr)
+			schemaCfg := chunk.SchemaConfig{
+				ChunkTables: chunk.PeriodicTableConfig{
+					From:   util.NewDayValue(model.Now()),
+					Period: 10 * time.Minute,
+					Prefix: "chunks",
+				},
+			}
+			table := &dynamoTableClient{
+				DynamoDB: dynamoDB,
+			}
+			storage := &storageClient{
+				cfg: StorageConfig{
+					DynamoDBConfig: DynamoDBConfig{ChunkGangSize: gangsize, ChunkGetMaxParallelism: maxParallelism},
+				},
+				DynamoDB:                dynamoDB,
+				S3:                      newMockS3(),
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+				schemaCfg:               schemaCfg,
+			}
+			return storage, table, schemaCfg, nil
+		},
+	}
+}
diff --git a/aws_storage_client_test.go b/aws/mock.go
similarity index 60%
rename from aws_storage_client_test.go
rename to aws/mock.go
index a2a18bc80ec6f..19b820a01eaea 100644
--- a/aws_storage_client_test.go
+++ b/aws/mock.go
@@ -1,17 +1,13 @@
-package chunk
+package aws
 
 import (
 	"bytes"
 	"context"
 	"fmt"
 	"io/ioutil"
-	"math/rand"
 	"sort"
-	"strconv"
 	"strings"
 	"sync"
-	"testing"
-	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
@@ -21,9 +17,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/go-kit/kit/log/level"
-	"github.com/prometheus/common/model"
-	"github.com/stretchr/testify/require"
-
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -423,294 +416,3 @@ func (m *mockS3) GetObjectWithContext(_ aws.Context, req *s3.GetObjectInput, _ .
 		Body: ioutil.NopCloser(bytes.NewReader(buf)),
 	}, nil
 }
-
-func TestAWSStorageClient(t *testing.T) {
-	mockDB := newMockDynamoDB(0, 0)
-	client := awsStorageClient{
-		DynamoDB:                mockDB,
-		queryRequestFn:          mockDB.queryRequest,
-		batchGetItemRequestFn:   mockDB.batchGetItemRequest,
-		batchWriteItemRequestFn: mockDB.batchWriteItemRequest,
-	}
-	batch := client.NewWriteBatch()
-	for i := 0; i < 30; i++ {
-		batch.Add("table", fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
-	}
-	mockDB.createTable("table")
-
-	err := client.BatchWrite(context.Background(), batch)
-	require.NoError(t, err)
-
-	for i := 0; i < 30; i++ {
-		entry := IndexQuery{
-			TableName: "table",
-			HashValue: fmt.Sprintf("hash%d", i),
-		}
-		var have []IndexEntry
-		err := client.QueryPages(context.Background(), entry, func(read ReadBatch, lastPage bool) bool {
-			for j := 0; j < read.Len(); j++ {
-				have = append(have, IndexEntry{
-					RangeValue: read.RangeValue(j),
-				})
-			}
-			return !lastPage
-		})
-		require.NoError(t, err)
-		require.Equal(t, []IndexEntry{
-			{RangeValue: []byte(fmt.Sprintf("range%d", i))},
-		}, have)
-	}
-}
-
-func TestAWSStorageClientChunks(t *testing.T) {
-	t.Run("S3 chunks", func(t *testing.T) {
-		dynamoDB := newMockDynamoDB(0, 0)
-		client := awsStorageClient{
-			DynamoDB:                dynamoDB,
-			S3:                      newMockS3(),
-			queryRequestFn:          dynamoDB.queryRequest,
-			batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-			batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-		}
-
-		testStorageClientChunks(t, client)
-	})
-
-	tests := []struct {
-		name           string
-		provisionedErr int
-		gangSize       int
-		maxParallelism int
-	}{
-		{"DynamoDB chunks", 0, 10, 20},
-		{"DynamoDB chunks with parallel fetch disabled", 0, 0, 20},
-		{"DynamoDB chunks retry logic", 2, 10, 20},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
-			schemaConfig := SchemaConfig{
-				ChunkTables: periodicTableConfig{
-					From:   util.NewDayValue(model.Now()),
-					Period: 10 * time.Minute,
-					Prefix: "chunks",
-				},
-			}
-			tableManager, err := NewTableManager(
-				schemaConfig,
-				maxChunkAge,
-				&dynamoTableClient{
-					DynamoDB: dynamoDB,
-				},
-			)
-			require.NoError(t, err)
-			err = tableManager.syncTables(context.Background())
-			require.NoError(t, err)
-
-			client := awsStorageClient{
-				cfg: AWSStorageConfig{
-					DynamoDBConfig: DynamoDBConfig{ChunkGangSize: tt.gangSize, ChunkGetMaxParallelism: tt.maxParallelism},
-				},
-				DynamoDB:                dynamoDB,
-				schemaCfg:               schemaConfig,
-				queryRequestFn:          dynamoDB.queryRequest,
-				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-			}
-
-			testStorageClientChunks(t, client)
-		})
-	}
-}
-
-func testStorageClientChunks(t *testing.T, client StorageClient) {
-	const batchSize = 50
-
-	// Write a few batches of chunks.
-	written := []string{}
-	for i := 0; i < 50; i++ {
-		chunks := []Chunk{}
-		for j := 0; j < batchSize; j++ {
-			chunk := dummyChunkFor(model.Now(), model.Metric{
-				model.MetricNameLabel: "foo",
-				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
-			})
-			chunks = append(chunks, chunk)
-			_, err := chunk.Encode() // Need to encode it, side effect calculates crc
-			require.NoError(t, err)
-			written = append(written, chunk.ExternalKey())
-		}
-		err := client.PutChunks(context.Background(), chunks)
-		require.NoError(t, err)
-	}
-
-	// Get a few batches of chunks.
-	for i := 0; i < 50; i++ {
-		chunksToGet := []Chunk{}
-		for j := 0; j < batchSize; j++ {
-			key := written[rand.Intn(len(written))]
-			chunk, err := parseNewExternalKey(key)
-			require.NoError(t, err)
-			chunksToGet = append(chunksToGet, chunk)
-		}
-
-		chunksWeGot, err := client.GetChunks(context.Background(), chunksToGet)
-		require.NoError(t, err)
-
-		sort.Sort(ByKey(chunksToGet))
-		sort.Sort(ByKey(chunksWeGot))
-		require.Equal(t, len(chunksToGet), len(chunksWeGot))
-		for j := 0; j < len(chunksWeGot); j++ {
-			require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey())
-		}
-	}
-}
-
-func TestAWSStorageClientQueryPages(t *testing.T) {
-	entries := []IndexEntry{
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:1"),
-			Value:      []byte("10"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:2"),
-			Value:      []byte("20"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:3"),
-			Value:      []byte("30"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("baz:1"),
-			Value:      []byte("10"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("baz:2"),
-			Value:      []byte("20"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:1"),
-			Value:      []byte("abc"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:2"),
-			Value:      []byte("abc"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:3"),
-			Value:      []byte("abc"),
-		},
-	}
-
-	tests := []struct {
-		name           string
-		query          IndexQuery
-		provisionedErr int
-		want           []IndexEntry
-	}{
-		{
-			"check HashValue only",
-			IndexQuery{
-				TableName: "table",
-				HashValue: "flip",
-			},
-			0,
-			[]IndexEntry{entries[5], entries[6], entries[7]},
-		},
-		{
-			"check RangeValueStart",
-			IndexQuery{
-				TableName:       "table",
-				HashValue:       "foo",
-				RangeValueStart: []byte("bar:2"),
-			},
-			0,
-			[]IndexEntry{entries[1], entries[2], entries[3], entries[4]},
-		},
-		{
-			"check RangeValuePrefix",
-			IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("baz:"),
-			},
-			0,
-			[]IndexEntry{entries[3], entries[4]},
-		},
-		{
-			"check ValueEqual",
-			IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("bar"),
-				ValueEqual:       []byte("20"),
-			},
-			0,
-			[]IndexEntry{entries[1]},
-		},
-		{
-			"check retry logic",
-			IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("bar"),
-				ValueEqual:       []byte("20"),
-			},
-			2,
-			[]IndexEntry{entries[1]},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
-			client := awsStorageClient{
-				DynamoDB:                dynamoDB,
-				queryRequestFn:          dynamoDB.queryRequest,
-				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-			}
-
-			batch := client.NewWriteBatch()
-			for _, entry := range entries {
-				batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
-			}
-			dynamoDB.createTable("table")
-
-			err := client.BatchWrite(context.Background(), batch)
-			require.NoError(t, err)
-
-			var have []IndexEntry
-			err = client.QueryPages(context.Background(), tt.query, func(read ReadBatch, lastPage bool) bool {
-				for i := 0; i < read.Len(); i++ {
-					have = append(have, IndexEntry{
-						TableName:  tt.query.TableName,
-						HashValue:  tt.query.HashValue,
-						RangeValue: read.RangeValue(i),
-						Value:      read.Value(i),
-					})
-				}
-				return !lastPage
-			})
-			require.NoError(t, err)
-			require.Equal(t, tt.want, have)
-		})
-	}
-}
diff --git a/aws_storage_client.go b/aws/storage_client.go
similarity index 90%
rename from aws_storage_client.go
rename to aws/storage_client.go
index 4eadbdcac763b..770f2a01e104d 100644
--- a/aws_storage_client.go
+++ b/aws/storage_client.go
@@ -1,4 +1,4 @@
-package chunk
+package aws
 
 import (
 	"bytes"
@@ -27,6 +27,7 @@ import (
 
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -123,23 +124,23 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
 }
 
-// AWSStorageConfig specifies config for storing data on AWS.
-type AWSStorageConfig struct {
+// StorageConfig specifies config for storing data on AWS.
+type StorageConfig struct {
 	DynamoDBConfig
 	S3 util.URLValue
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *AWSStorageConfig) RegisterFlags(f *flag.FlagSet) {
+func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.DynamoDBConfig.RegisterFlags(f)
 
 	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 }
 
-type awsStorageClient struct {
-	cfg       AWSStorageConfig
-	schemaCfg SchemaConfig
+type storageClient struct {
+	cfg       StorageConfig
+	schemaCfg chunk.SchemaConfig
 
 	DynamoDB   dynamodbiface.DynamoDBAPI
 	S3         s3iface.S3API
@@ -152,8 +153,8 @@ type awsStorageClient struct {
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
-// NewAWSStorageClient makes a new AWS-backed StorageClient.
-func NewAWSStorageClient(cfg AWSStorageConfig, schemaCfg SchemaConfig) (StorageClient, error) {
+// NewStorageClient makes a new AWS-backed StorageClient.
+func NewStorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
@@ -170,27 +171,27 @@ func NewAWSStorageClient(cfg AWSStorageConfig, schemaCfg SchemaConfig) (StorageC
 	s3Client := s3.New(session.New(s3Config))
 	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
 
-	storageClient := awsStorageClient{
+	client := storageClient{
 		cfg:        cfg,
 		schemaCfg:  schemaCfg,
 		DynamoDB:   dynamoDB,
 		S3:         s3Client,
 		bucketName: bucketName,
 	}
-	storageClient.queryRequestFn = storageClient.queryRequest
-	storageClient.batchGetItemRequestFn = storageClient.batchGetItemRequest
-	storageClient.batchWriteItemRequestFn = storageClient.batchWriteItemRequest
-	return storageClient, nil
+	client.queryRequestFn = client.queryRequest
+	client.batchGetItemRequestFn = client.batchGetItemRequest
+	client.batchWriteItemRequestFn = client.batchWriteItemRequest
+	return client, nil
 }
 
-func (a awsStorageClient) NewWriteBatch() WriteBatch {
+func (a storageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
 // BatchWrite writes requests to the underlying storage, handling retries and backoff.
 // Structure is identical to getDynamoDBChunks(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) error {
+func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
@@ -254,7 +255,7 @@ func (a awsStorageClient) BatchWrite(ctx context.Context, input WriteBatch) erro
 	return backoff.Err()
 }
 
-func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -321,7 +322,7 @@ func (a awsStorageClient) QueryPages(ctx context.Context, query IndexQuery, call
 	return nil
 }
 
-func (a awsStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
+func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -365,19 +366,19 @@ type dynamoDBRequest interface {
 	Retryable() bool
 }
 
-func (a awsStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+func (a storageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.QueryRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a awsStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
+func (a storageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchGetItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a awsStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
+func (a storageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchWriteItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
@@ -419,18 +420,18 @@ func (a dynamoDBRequestAdapter) Retryable() bool {
 }
 
 type chunksPlusError struct {
-	chunks []Chunk
+	chunks []chunk.Chunk
 	err    error
 }
 
-func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
 
 	var (
-		s3Chunks       []Chunk
-		dynamoDBChunks []Chunk
+		s3Chunks       []chunk.Chunk
+		dynamoDBChunks []chunk.Chunk
 	)
 
 	for _, chunk := range chunks {
@@ -491,21 +492,21 @@ func (a awsStorageClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chun
 	return finalChunks, err
 }
 
-func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
-	incomingChunks := make(chan Chunk)
+func (a storageClient) getS3Chunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	incomingChunks := make(chan chunk.Chunk)
 	incomingErrors := make(chan error)
-	for _, chunk := range chunks {
-		go func(chunk Chunk) {
-			chunk, err := a.getS3Chunk(ctx, chunk)
+	for _, c := range chunks {
+		go func(c chunk.Chunk) {
+			c, err := a.getS3Chunk(ctx, c)
 			if err != nil {
 				incomingErrors <- err
 				return
 			}
-			incomingChunks <- chunk
-		}(chunk)
+			incomingChunks <- c
+		}(c)
 	}
 
-	result := []Chunk{}
+	result := []chunk.Chunk{}
 	errors := []error{}
 	for i := 0; i < len(chunks); i++ {
 		select {
@@ -522,29 +523,29 @@ func (a awsStorageClient) getS3Chunks(ctx context.Context, chunks []Chunk) ([]Ch
 	return result, nil
 }
 
-func (a awsStorageClient) getS3Chunk(ctx context.Context, chunk Chunk) (Chunk, error) {
+func (a storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.Chunk, error) {
 	var resp *s3.GetObjectOutput
 	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
 		var err error
 		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
 			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(chunk.ExternalKey()),
+			Key:    aws.String(c.ExternalKey()),
 		})
 		return err
 	})
 	if err != nil {
-		return Chunk{}, err
+		return chunk.Chunk{}, err
 	}
 	defer resp.Body.Close()
 	buf, err := ioutil.ReadAll(resp.Body)
 	if err != nil {
-		return Chunk{}, err
+		return chunk.Chunk{}, err
 	}
-	decodeContext := NewDecodeContext()
-	if err := chunk.Decode(decodeContext, buf); err != nil {
-		return Chunk{}, err
+	decodeContext := chunk.NewDecodeContext()
+	if err := c.Decode(decodeContext, buf); err != nil {
+		return chunk.Chunk{}, err
 	}
-	return chunk, nil
+	return c, nil
 }
 
 // As we're re-using the DynamoDB schema from the index for the chunk tables,
@@ -554,11 +555,11 @@ var placeholder = []byte{'c'}
 // Fetch a set of chunks from DynamoDB, handling retries and backoff.
 // Structure is identical to BatchWrite(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
 	defer sp.Finish()
 	outstanding := dynamoDBReadRequest{}
-	chunksByKey := map[string]Chunk{}
+	chunksByKey := map[string]chunk.Chunk{}
 	for _, chunk := range chunks {
 		key := chunk.ExternalKey()
 		chunksByKey[key] = chunk
@@ -566,7 +567,7 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 		outstanding.Add(tableName, key, placeholder)
 	}
 
-	result := []Chunk{}
+	result := []chunk.Chunk{}
 	unprocessed := dynamoDBReadRequest{}
 	backoff := util.NewBackoff(ctx, backoffConfig)
 	defer func() {
@@ -635,9 +636,9 @@ func (a awsStorageClient) getDynamoDBChunks(ctx context.Context, chunks []Chunk)
 	return result, nil
 }
 
-func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]Chunk) ([]Chunk, error) {
-	result := []Chunk{}
-	decodeContext := NewDecodeContext()
+func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]chunk.Chunk) ([]chunk.Chunk, error) {
+	result := []chunk.Chunk{}
+	decodeContext := chunk.NewDecodeContext()
 	for _, items := range response.Responses {
 		for _, item := range items {
 			key, ok := item[hashKey]
@@ -665,7 +666,7 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 	return result, nil
 }
 
-func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
+func (a storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
 		s3ChunkKeys    []string
 		s3ChunkBufs    [][]byte
@@ -701,7 +702,7 @@ func (a awsStorageClient) PutChunks(ctx context.Context, chunks []Chunk) error {
 	return a.BatchWrite(ctx, dynamoDBWrites)
 }
 
-func (a awsStorageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
+func (a storageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
 	incomingErrors := make(chan error)
 	for i := range bufs {
 		go func(i int) {
@@ -719,7 +720,7 @@ func (a awsStorageClient) putS3Chunks(ctx context.Context, keys []string, bufs [
 	return lastErr
 }
 
-func (a awsStorageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
+func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
 	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
 			Body:   bytes.NewReader(buf),
diff --git a/aws/storage_client_test.go b/aws/storage_client_test.go
new file mode 100644
index 0000000000000..e8376b8e83cb3
--- /dev/null
+++ b/aws/storage_client_test.go
@@ -0,0 +1,197 @@
+package aws
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+func TestAWSStorageClient(t *testing.T) {
+	mockDB := newMockDynamoDB(0, 0)
+	client := storageClient{
+		DynamoDB:                mockDB,
+		queryRequestFn:          mockDB.queryRequest,
+		batchGetItemRequestFn:   mockDB.batchGetItemRequest,
+		batchWriteItemRequestFn: mockDB.batchWriteItemRequest,
+	}
+	batch := client.NewWriteBatch()
+	for i := 0; i < 30; i++ {
+		batch.Add("table", fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
+	}
+	mockDB.createTable("table")
+
+	err := client.BatchWrite(context.Background(), batch)
+	require.NoError(t, err)
+
+	for i := 0; i < 30; i++ {
+		entry := chunk.IndexQuery{
+			TableName: "table",
+			HashValue: fmt.Sprintf("hash%d", i),
+		}
+		var have []chunk.IndexEntry
+		err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch, lastPage bool) bool {
+			for j := 0; j < read.Len(); j++ {
+				have = append(have, chunk.IndexEntry{
+					RangeValue: read.RangeValue(j),
+				})
+			}
+			return !lastPage
+		})
+		require.NoError(t, err)
+		require.Equal(t, []chunk.IndexEntry{
+			{RangeValue: []byte(fmt.Sprintf("range%d", i))},
+		}, have)
+	}
+}
+
+func TestAWSStorageClientQueryPages(t *testing.T) {
+	entries := []chunk.IndexEntry{
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:1"),
+			Value:      []byte("10"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:2"),
+			Value:      []byte("20"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("bar:3"),
+			Value:      []byte("30"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("baz:1"),
+			Value:      []byte("10"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "foo",
+			RangeValue: []byte("baz:2"),
+			Value:      []byte("20"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:1"),
+			Value:      []byte("abc"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:2"),
+			Value:      []byte("abc"),
+		},
+		{
+			TableName:  "table",
+			HashValue:  "flip",
+			RangeValue: []byte("bar:3"),
+			Value:      []byte("abc"),
+		},
+	}
+
+	tests := []struct {
+		name           string
+		query          chunk.IndexQuery
+		provisionedErr int
+		want           []chunk.IndexEntry
+	}{
+		{
+			"check HashValue only",
+			chunk.IndexQuery{
+				TableName: "table",
+				HashValue: "flip",
+			},
+			0,
+			[]chunk.IndexEntry{entries[5], entries[6], entries[7]},
+		},
+		{
+			"check RangeValueStart",
+			chunk.IndexQuery{
+				TableName:       "table",
+				HashValue:       "foo",
+				RangeValueStart: []byte("bar:2"),
+			},
+			0,
+			[]chunk.IndexEntry{entries[1], entries[2], entries[3], entries[4]},
+		},
+		{
+			"check RangeValuePrefix",
+			chunk.IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("baz:"),
+			},
+			0,
+			[]chunk.IndexEntry{entries[3], entries[4]},
+		},
+		{
+			"check ValueEqual",
+			chunk.IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("bar"),
+				ValueEqual:       []byte("20"),
+			},
+			0,
+			[]chunk.IndexEntry{entries[1]},
+		},
+		{
+			"check retry logic",
+			chunk.IndexQuery{
+				TableName:        "table",
+				HashValue:        "foo",
+				RangeValuePrefix: []byte("bar"),
+				ValueEqual:       []byte("20"),
+			},
+			2,
+			[]chunk.IndexEntry{entries[1]},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
+			client := storageClient{
+				DynamoDB:                dynamoDB,
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+			}
+
+			batch := client.NewWriteBatch()
+			for _, entry := range entries {
+				batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+			}
+			dynamoDB.createTable("table")
+
+			err := client.BatchWrite(context.Background(), batch)
+			require.NoError(t, err)
+
+			var have []chunk.IndexEntry
+			err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch, lastPage bool) bool {
+				for i := 0; i < read.Len(); i++ {
+					have = append(have, chunk.IndexEntry{
+						TableName:  tt.query.TableName,
+						HashValue:  tt.query.HashValue,
+						RangeValue: read.RangeValue(i),
+						Value:      read.Value(i),
+					})
+				}
+				return !lastPage
+			})
+			require.NoError(t, err)
+			require.Equal(t, tt.want, have)
+		})
+	}
+}
diff --git a/chunk.go b/chunk.go
index 3a0cbe5890e3a..e61c50a558bf1 100644
--- a/chunk.go
+++ b/chunk.go
@@ -74,7 +74,7 @@ func NewChunk(userID string, fp model.Fingerprint, metric model.Metric, c prom_c
 	}
 }
 
-// parseExternalKey is used to construct a partially-populated chunk from the
+// ParseExternalKey is used to construct a partially-populated chunk from the
 // key in DynamoDB.  This chunk can then be used to calculate the key needed
 // to fetch the Chunk data from Memcache/S3, and then fully populate the chunk
 // with decode().
@@ -87,7 +87,7 @@ func NewChunk(userID string, fp model.Fingerprint, metric model.Metric, c prom_c
 // Post-checksums, externals keys become the same across DynamoDB, Memcache
 // and S3.  Numbers become hex encoded.  Keys look like:
 // `<user id>/<fingerprint>:<start time>:<end time>:<checksum>`.
-func parseExternalKey(userID, externalKey string) (Chunk, error) {
+func ParseExternalKey(userID, externalKey string) (Chunk, error) {
 	if !strings.Contains(externalKey, "/") {
 		return parseLegacyChunkID(userID, externalKey)
 	}
diff --git a/chunk_store.go b/chunk_store.go
index b57b53caff034..7fed5bcbc3dcc 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -494,7 +494,7 @@ func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []Index
 			return nil, err
 		}
 
-		chunk, err := parseExternalKey(userID, chunkKey)
+		chunk, err := ParseExternalKey(userID, chunkKey)
 		if err != nil {
 			return nil, err
 		}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 332ab75d60971..663b73291141d 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -27,7 +27,7 @@ func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
 	schemaCfg := SchemaConfig{}
 	tableManager, err := NewTableManager(schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
-	err = tableManager.syncTables(context.Background())
+	err = tableManager.SyncTables(context.Background())
 	require.NoError(t, err)
 	store, err := NewStore(cfg, schemaCfg, storage)
 	require.NoError(t, err)
diff --git a/chunk_test.go b/chunk_test.go
index ba61757b38f7c..b5044012d605d 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -85,7 +85,7 @@ func TestChunkCodec(t *testing.T) {
 			buf, err := c.chunk.Encode()
 			require.NoError(t, err)
 
-			have, err := parseExternalKey(userID, c.chunk.ExternalKey())
+			have, err := ParseExternalKey(userID, c.chunk.ExternalKey())
 			require.NoError(t, err)
 
 			if c.f != nil {
@@ -126,7 +126,7 @@ func TestParseExternalKey(t *testing.T) {
 
 		{key: "invalidUserID/2:270d8f00:270d8f00:f84c5745", chunk: Chunk{}, err: ErrWrongMetadata},
 	} {
-		chunk, err := parseExternalKey(userID, c.key)
+		chunk, err := ParseExternalKey(userID, c.key)
 		require.Equal(t, c.err, errors.Cause(err))
 		require.Equal(t, c.chunk, chunk)
 	}
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
new file mode 100644
index 0000000000000..00f8ead45e812
--- /dev/null
+++ b/gcp/fixtures.go
@@ -0,0 +1,82 @@
+package gcp
+
+import (
+	"context"
+	"time"
+
+	"cloud.google.com/go/bigtable"
+	"cloud.google.com/go/bigtable/bttest"
+	"github.com/prometheus/common/model"
+	"google.golang.org/api/option"
+	"google.golang.org/grpc"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	proj, instance = "proj", "instance"
+)
+
+type fixture struct {
+	srv  *bttest.Server
+	name string
+}
+
+func (f *fixture) Name() string {
+	return f.name
+}
+
+func (f *fixture) Clients() (
+	sClient chunk.StorageClient, tClient chunk.TableClient,
+	schemaConfig chunk.SchemaConfig, err error,
+) {
+	f.srv, err = bttest.NewServer("localhost:0")
+	if err != nil {
+		return
+	}
+
+	conn, err := grpc.Dial(f.srv.Addr, grpc.WithInsecure())
+	if err != nil {
+		return
+	}
+
+	ctx := context.Background()
+	adminClient, err := bigtable.NewAdminClient(ctx, proj, instance, option.WithGRPCConn(conn))
+	if err != nil {
+		return
+	}
+
+	client, err := bigtable.NewClient(ctx, proj, instance, option.WithGRPCConn(conn))
+	if err != nil {
+		return
+	}
+
+	schemaConfig = chunk.SchemaConfig{
+		ChunkTables: chunk.PeriodicTableConfig{
+			From:   util.NewDayValue(model.Now()),
+			Period: 10 * time.Minute,
+			Prefix: "chunks",
+		},
+	}
+	sClient = &storageClient{
+		schemaCfg: schemaConfig,
+		client:    client,
+	}
+	tClient = &tableClient{
+		client: adminClient,
+	}
+	return
+}
+
+func (f *fixture) Teardown() error {
+	f.srv.Close()
+	return nil
+}
+
+// Fixtures for unit testing GCP storage.
+var Fixtures = []chunk.Fixture{
+	&fixture{
+		name: "GCP",
+	},
+}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 130de96a5e64e..c2000e3548810 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -224,24 +224,35 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 			page := keys[i:util.Min(i+maxRowReads, len(keys))]
 			go func(page bigtable.RowList) {
 				decodeContext := chunk.NewDecodeContext()
+
+				var processingErr error
+				var recievedChunks = 0
+
 				// rows are returned in key order, not order in row list
-				if err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
+				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
 					chunk, ok := chunks[row.Key()]
 					if !ok {
-						errs <- fmt.Errorf("Got row for unknown chunk: %s", row.Key())
+						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
 						return false
 					}
 
 					err := chunk.Decode(decodeContext, row[columnFamily][0].Value)
 					if err != nil {
-						errs <- err
+						processingErr = err
 						return false
 					}
 
+					recievedChunks++
 					outs <- chunk
 					return true
-				}); err != nil {
+				})
+
+				if processingErr != nil {
+					errs <- processingErr
+				} else if err != nil {
 					errs <- errors.WithStack(err)
+				} else if recievedChunks < len(page) {
+					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for BigTable, received %d", len(page), recievedChunks))
 				}
 			}(page)
 		}
@@ -254,6 +265,8 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 			output = append(output, c)
 		case err := <-errs:
 			return nil, err
+		case <-ctx.Done():
+			return nil, ctx.Err()
 		}
 	}
 
diff --git a/schema_config.go b/schema_config.go
index 963c71fde0c29..bebc20ea56f4f 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -41,14 +41,14 @@ type SchemaConfig struct {
 	// Config for the index & chunk tables.
 	OriginalTableName string
 	UsePeriodicTables bool
-	IndexTables       periodicTableConfig
-	ChunkTables       periodicTableConfig
+	IndexTables       PeriodicTableConfig
+	ChunkTables       PeriodicTableConfig
 
 	// Deprecated configuration for setting tags on all tables.
 	Tags Tags
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet
+// RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
 	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
@@ -148,7 +148,8 @@ func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []
 	return result
 }
 
-type periodicTableConfig struct {
+// PeriodicTableConfig is configuration for a set of time-sharded tables.
+type PeriodicTableConfig struct {
 	From   util.DayValue
 	Prefix string
 	Period time.Duration
@@ -159,8 +160,8 @@ type periodicTableConfig struct {
 	InactiveWriteThroughput    int64
 	InactiveReadThroughput     int64
 
-	WriteScale              autoScalingConfig
-	InactiveWriteScale      autoScalingConfig
+	WriteScale              AutoScalingConfig
+	InactiveWriteScale      AutoScalingConfig
 	InactiveWriteScaleLastN int64
 
 	// Temporarily in place to support tags set on all tables, as means of
@@ -168,7 +169,8 @@ type periodicTableConfig struct {
 	globalTags *Tags
 }
 
-func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
 	f.Var(&cfg.From, argPrefix+".from", "Date after which to write chunks to DynamoDB.")
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
@@ -185,7 +187,8 @@ func (cfg *periodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
 }
 
-type autoScalingConfig struct {
+// AutoScalingConfig for DynamoDB tables.
+type AutoScalingConfig struct {
 	Enabled     bool
 	RoleARN     string
 	MinCapacity int64
@@ -195,7 +198,8 @@ type autoScalingConfig struct {
 	TargetValue float64
 }
 
-func (cfg *autoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.BoolVar(&cfg.Enabled, argPrefix+".enabled", false, "Should we enable autoscale for the table.")
 	f.StringVar(&cfg.RoleARN, argPrefix+".role-arn", "", "AWS AutoScaling role ARN")
 	f.Int64Var(&cfg.MinCapacity, argPrefix+".min-capacity", 3000, "DynamoDB minimum provision capacity.")
@@ -205,7 +209,7 @@ func (cfg *autoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
-func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {
+func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {
 	var (
 		periodSecs     = int64(cfg.Period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
@@ -254,7 +258,7 @@ func (cfg *periodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 
 // GetTags returns tags for the table. Exists to provide backwards
 // compatibility for the command-line.
-func (cfg *periodicTableConfig) GetTags() Tags {
+func (cfg *PeriodicTableConfig) GetTags() Tags {
 	tags := Tags(map[string]string{})
 	for k, v := range cfg.Tags {
 		tags[k] = v
@@ -267,7 +271,8 @@ func (cfg *periodicTableConfig) GetTags() Tags {
 	return tags
 }
 
-func (cfg *periodicTableConfig) TableFor(t model.Time) string {
+// TableFor calculates the table shard for a given point in time.
+func (cfg *PeriodicTableConfig) TableFor(t model.Time) string {
 	var (
 		periodSecs = int64(cfg.Period / time.Second)
 		table      = t.Unix() / periodSecs
diff --git a/schema_test.go b/schema_test.go
index a2f7c28288377..70403f6d68caf 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -75,7 +75,7 @@ func TestSchemaHashKeys(t *testing.T) {
 	cfg := SchemaConfig{
 		OriginalTableName: table,
 		UsePeriodicTables: true,
-		IndexTables: periodicTableConfig{
+		IndexTables: PeriodicTableConfig{
 			Prefix: periodicPrefix,
 			Period: 2 * 24 * time.Hour,
 			From:   util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
diff --git a/storage/factory.go b/storage/factory.go
index dd5e00ea0154a..b48b0d2f502a5 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/aws"
 	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
 	"github.com/weaveworks/cortex/pkg/util"
@@ -15,8 +16,8 @@ import (
 
 // Config chooses which storage client to use.
 type Config struct {
-	StorageClient string
-	chunk.AWSStorageConfig
+	StorageClient          string
+	AWSStorageConfig       aws.StorageConfig
 	GCPStorageConfig       gcp.Config
 	CassandraStorageConfig cassandra.Config
 }
@@ -35,11 +36,11 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageCl
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "aws":
-		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return chunk.NewAWSStorageClient(cfg.AWSStorageConfig, schemaCfg)
+		return aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
@@ -55,11 +56,11 @@ func NewTableClient(cfg Config) (chunk.TableClient, error) {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "aws":
-		path := strings.TrimPrefix(cfg.DynamoDB.URL.Path, "/")
+		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return chunk.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
+		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
 	case "gcp":
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
 	case "cassandra":
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
new file mode 100644
index 0000000000000..7691ae41aeccd
--- /dev/null
+++ b/storage/storage_client_test.go
@@ -0,0 +1,117 @@
+package storage
+
+import (
+	"context"
+	"math/rand"
+	"sort"
+	"strconv"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/aws"
+	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+)
+
+var fixtures = append(aws.Fixtures, gcp.Fixtures...)
+
+func TestStoreChunks(t *testing.T) {
+	for _, fixture := range fixtures {
+		t.Run(fixture.Name(), func(t *testing.T) {
+			storageClient, tableClient, schemaConfig, err := fixture.Clients()
+			require.NoError(t, err)
+			defer fixture.Teardown()
+
+			tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
+			require.NoError(t, err)
+
+			err = tableManager.SyncTables(context.Background())
+			require.NoError(t, err)
+
+			testStorageClientChunks(t, storageClient)
+		})
+	}
+}
+
+func testStorageClientChunks(t *testing.T, client chunk.StorageClient) {
+	const batchSize = 50
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	// Write a few batches of chunks.
+	written := []string{}
+	for i := 0; i < 50; i++ {
+		chunks := []chunk.Chunk{}
+		for j := 0; j < batchSize; j++ {
+			chunk := dummyChunkFor(model.Now(), model.Metric{
+				model.MetricNameLabel: "foo",
+				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
+			})
+			chunks = append(chunks, chunk)
+			_, err := chunk.Encode() // Need to encode it, side effect calculates crc
+			require.NoError(t, err)
+			written = append(written, chunk.ExternalKey())
+		}
+
+		err := client.PutChunks(ctx, chunks)
+		require.NoError(t, err)
+	}
+
+	// Get a few batches of chunks.
+	for i := 0; i < 50; i++ {
+		keysToGet := map[string]struct{}{}
+		chunksToGet := []chunk.Chunk{}
+		for len(chunksToGet) < batchSize {
+			key := written[rand.Intn(len(written))]
+			if _, ok := keysToGet[key]; ok {
+				continue
+			}
+			keysToGet[key] = struct{}{}
+			chunk, err := chunk.ParseExternalKey(userID, key)
+			require.NoError(t, err)
+			chunksToGet = append(chunksToGet, chunk)
+		}
+
+		chunksWeGot, err := client.GetChunks(ctx, chunksToGet)
+		require.NoError(t, err)
+		require.Equal(t, len(chunksToGet), len(chunksWeGot))
+
+		sort.Sort(chunk.ByKey(chunksToGet))
+		sort.Sort(chunk.ByKey(chunksWeGot))
+		for j := 0; j < len(chunksWeGot); j++ {
+			require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey(), strconv.Itoa(i))
+		}
+	}
+}
+
+const userID = "userID"
+
+func dummyChunk(now model.Time) chunk.Chunk {
+	return dummyChunkFor(now, model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+	})
+}
+
+func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
+	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	chunk := chunk.NewChunk(
+		userID,
+		metric.Fingerprint(),
+		metric,
+		cs[0],
+		now.Add(-time.Hour),
+		now,
+	)
+	// Force checksum calculation.
+	_, err := chunk.Encode()
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}
diff --git a/storage_client.go b/storage_client.go
index cc1d44d5c77a5..7ba0eace98366 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -27,3 +27,10 @@ type ReadBatch interface {
 	RangeValue(index int) []byte
 	Value(index int) []byte
 }
+
+// Fixture type for per-backend testing.
+type Fixture interface {
+	Name() string
+	Clients() (StorageClient, TableClient, SchemaConfig, error)
+	Teardown() error
+}
diff --git a/table_client.go b/table_client.go
index 4ff5f5d318d89..0898fde0c8a94 100644
--- a/table_client.go
+++ b/table_client.go
@@ -16,7 +16,7 @@ type TableDesc struct {
 	ProvisionedRead  int64
 	ProvisionedWrite int64
 	Tags             Tags
-	WriteScale       autoScalingConfig
+	WriteScale       AutoScalingConfig
 }
 
 // Equals returns true if other matches desc.
diff --git a/table_manager.go b/table_manager.go
index 3ef0efa85a706..a39357aeb35b6 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -27,7 +27,7 @@ var (
 	syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_sync_tables_seconds",
-		Help:      "Time spent doing syncTables.",
+		Help:      "Time spent doing SyncTables.",
 		Buckets:   prometheus.DefBuckets,
 	}, []string{"operation", "status_code"})
 	tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
@@ -137,8 +137,8 @@ func (m *TableManager) loop() {
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
-	if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
-		return m.syncTables(ctx)
+	if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.SyncTables", syncTableDuration, func(ctx context.Context) error {
+		return m.SyncTables(ctx)
 	}); err != nil {
 		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 	}
@@ -146,8 +146,8 @@ func (m *TableManager) loop() {
 	for {
 		select {
 		case <-ticker.C:
-			if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.syncTables", syncTableDuration, func(ctx context.Context) error {
-				return m.syncTables(ctx)
+			if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.SyncTables", syncTableDuration, func(ctx context.Context) error {
+				return m.SyncTables(ctx)
 			}); err != nil {
 				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 			}
@@ -157,7 +157,9 @@ func (m *TableManager) loop() {
 	}
 }
 
-func (m *TableManager) syncTables(ctx context.Context) error {
+// SyncTables will calculate the tables expected to exist, create those that do
+// not and update those that need it.  It is exposed for testing.
+func (m *TableManager) SyncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
 	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", expected)
 
@@ -294,3 +296,36 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 	}
 	return nil
 }
+
+// ExpectTables compares existing tables to an expected set of tables.  Exposed
+// for testing,
+func ExpectTables(ctx context.Context, client TableClient, expected []TableDesc) error {
+	tables, err := client.ListTables(ctx)
+	if err != nil {
+		return err
+	}
+
+	if len(expected) != len(tables) {
+		return fmt.Errorf("Unexpected number of tables: %v != %v", expected, tables)
+	}
+
+	sort.Strings(tables)
+	sort.Sort(byName(expected))
+
+	for i, expect := range expected {
+		if tables[i] != expect.Name {
+			return fmt.Errorf("Expected '%s', found '%s'", expect.Name, tables[i])
+		}
+
+		desc, _, err := client.DescribeTable(ctx, expect.Name)
+		if err != nil {
+			return err
+		}
+
+		if !desc.Equals(expect) {
+			return fmt.Errorf("Expected '%v', found '%v' for table '%s'", expect, desc, desc.Name)
+		}
+	}
+
+	return nil
+}
diff --git a/table_manager_test.go b/table_manager_test.go
index 0bfe32b4f9c15..9947fae3ab314 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -1,17 +1,15 @@
 package chunk
 
 import (
-	"sort"
+	"context"
+	"sync"
 	"testing"
 	"time"
 
-	"github.com/aws/aws-sdk-go/aws"
-	"github.com/aws/aws-sdk-go/aws/request"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
-	"golang.org/x/net/context"
 
 	"github.com/weaveworks/cortex/pkg/util"
 )
@@ -28,15 +26,57 @@ const (
 	read             = 100
 )
 
-func TestTableManager(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	client := dynamoTableClient{
-		DynamoDB: dynamoDB,
+type mockTableClient struct {
+	sync.Mutex
+	tables map[string]TableDesc
+}
+
+func newMockTableClient() *mockTableClient {
+	return &mockTableClient{
+		tables: map[string]TableDesc{},
+	}
+}
+
+func (m *mockTableClient) ListTables(_ context.Context) ([]string, error) {
+	m.Lock()
+	defer m.Unlock()
+
+	result := []string{}
+	for name := range m.tables {
+		result = append(result, name)
 	}
+	return result, nil
+}
+
+func (m *mockTableClient) CreateTable(_ context.Context, desc TableDesc) error {
+	m.Lock()
+	defer m.Unlock()
+
+	m.tables[desc.Name] = desc
+	return nil
+}
+
+func (m *mockTableClient) DescribeTable(_ context.Context, name string) (desc TableDesc, status string, err error) {
+	m.Lock()
+	defer m.Unlock()
+
+	return m.tables[name], dynamodb.TableStatusActive, nil
+}
+
+func (m *mockTableClient) UpdateTable(_ context.Context, current, expected TableDesc) error {
+	m.Lock()
+	defer m.Unlock()
+
+	m.tables[current.Name] = expected
+	return nil
+}
+
+func TestTableManager(t *testing.T) {
+	client := newMockTableClient()
 
 	cfg := SchemaConfig{
 		UsePeriodicTables: true,
-		IndexTables: periodicTableConfig{
+		IndexTables: PeriodicTableConfig{
 			Prefix: tablePrefix,
 			Period: tablePeriod,
 			From:   util.NewDayValue(model.TimeFromUnix(0)),
@@ -46,7 +86,7 @@ func TestTableManager(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 
-		ChunkTables: periodicTableConfig{
+		ChunkTables: PeriodicTableConfig{
 			Prefix: chunkTablePrefix,
 			Period: tablePeriod,
 			From:   util.NewDayValue(model.TimeFromUnix(0)),
@@ -67,10 +107,11 @@ func TestTableManager(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
-			if err := tableManager.syncTables(ctx); err != nil {
+			if err := tableManager.SyncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
-			expectTables(ctx, t, client, expected)
+			err := ExpectTables(ctx, client, expected)
+			require.NoError(t, err)
 		})
 	}
 
@@ -172,19 +213,17 @@ func TestTableManager(t *testing.T) {
 }
 
 func TestTableManagerTags(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	client := dynamoTableClient{
-		DynamoDB: dynamoDB,
-	}
+	client := newMockTableClient()
 
 	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
-			if err := tableManager.syncTables(ctx); err != nil {
+			if err := tableManager.SyncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
-			expectTables(ctx, t, client, expected)
+			err := ExpectTables(ctx, client, expected)
+			require.NoError(t, err)
 		})
 	}
 
@@ -224,644 +263,3 @@ func TestTableManagerTags(t *testing.T) {
 		)
 	}
 }
-
-type mockApplicationAutoScalingClient struct {
-	applicationautoscalingiface.ApplicationAutoScalingAPI
-
-	scalableTargets map[string]mockScalableTarget
-	scalingPolicies map[string]mockScalingPolicy
-}
-
-type mockScalableTarget struct {
-	RoleARN     string
-	MinCapacity int64
-	MaxCapacity int64
-}
-
-type mockScalingPolicy struct {
-	ScaleInCooldown  int64
-	ScaleOutCooldown int64
-	TargetValue      float64
-}
-
-func newMockApplicationAutoScaling() *mockApplicationAutoScalingClient {
-	return &mockApplicationAutoScalingClient{
-		scalableTargets: map[string]mockScalableTarget{},
-		scalingPolicies: map[string]mockScalingPolicy{},
-	}
-}
-
-func (m *mockApplicationAutoScalingClient) RegisterScalableTarget(input *applicationautoscaling.RegisterScalableTargetInput) (*applicationautoscaling.RegisterScalableTargetOutput, error) {
-	m.scalableTargets[*input.ResourceId] = mockScalableTarget{
-		RoleARN:     *input.RoleARN,
-		MinCapacity: *input.MinCapacity,
-		MaxCapacity: *input.MaxCapacity,
-	}
-	return &applicationautoscaling.RegisterScalableTargetOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DeregisterScalableTarget(input *applicationautoscaling.DeregisterScalableTargetInput) (*applicationautoscaling.DeregisterScalableTargetOutput, error) {
-	delete(m.scalableTargets, *input.ResourceId)
-	return &applicationautoscaling.DeregisterScalableTargetOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DescribeScalableTargetsWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalableTargetsInput, options ...request.Option) (*applicationautoscaling.DescribeScalableTargetsOutput, error) {
-	scalableTarget, ok := m.scalableTargets[*input.ResourceIds[0]]
-	if !ok {
-		return &applicationautoscaling.DescribeScalableTargetsOutput{}, nil
-	}
-	return &applicationautoscaling.DescribeScalableTargetsOutput{
-		ScalableTargets: []*applicationautoscaling.ScalableTarget{
-			{
-				RoleARN:     aws.String(scalableTarget.RoleARN),
-				MinCapacity: aws.Int64(scalableTarget.MinCapacity),
-				MaxCapacity: aws.Int64(scalableTarget.MaxCapacity),
-			},
-		},
-	}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) PutScalingPolicy(input *applicationautoscaling.PutScalingPolicyInput) (*applicationautoscaling.PutScalingPolicyOutput, error) {
-	m.scalingPolicies[*input.ResourceId] = mockScalingPolicy{
-		ScaleInCooldown:  *input.TargetTrackingScalingPolicyConfiguration.ScaleInCooldown,
-		ScaleOutCooldown: *input.TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown,
-		TargetValue:      *input.TargetTrackingScalingPolicyConfiguration.TargetValue,
-	}
-	return &applicationautoscaling.PutScalingPolicyOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DeleteScalingPolicy(input *applicationautoscaling.DeleteScalingPolicyInput) (*applicationautoscaling.DeleteScalingPolicyOutput, error) {
-	delete(m.scalingPolicies, *input.ResourceId)
-	return &applicationautoscaling.DeleteScalingPolicyOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DescribeScalingPoliciesWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalingPoliciesInput, options ...request.Option) (*applicationautoscaling.DescribeScalingPoliciesOutput, error) {
-	scalingPolicy, ok := m.scalingPolicies[*input.ResourceId]
-	if !ok {
-		return &applicationautoscaling.DescribeScalingPoliciesOutput{}, nil
-	}
-	return &applicationautoscaling.DescribeScalingPoliciesOutput{
-		ScalingPolicies: []*applicationautoscaling.ScalingPolicy{
-			{
-				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
-					ScaleInCooldown:  aws.Int64(scalingPolicy.ScaleInCooldown),
-					ScaleOutCooldown: aws.Int64(scalingPolicy.ScaleOutCooldown),
-					TargetValue:      aws.Float64(scalingPolicy.TargetValue),
-				},
-			},
-		},
-	}, nil
-}
-
-func TestTableManagerAutoScaling(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	applicationAutoScaling := newMockApplicationAutoScaling()
-	client := dynamoTableClient{
-		DynamoDB:               dynamoDB,
-		ApplicationAutoScaling: applicationAutoScaling,
-	}
-
-	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
-		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			mtime.NowForce(tm)
-			if err := tableManager.syncTables(ctx); err != nil {
-				t.Fatal(err)
-			}
-			expectTables(ctx, t, client, expected)
-		})
-	}
-
-	cfg := SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: periodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale: autoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-		},
-
-		ChunkTables: periodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale: autoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-		},
-
-		CreationGracePeriod: gracePeriod,
-	}
-
-	// Check tables are created with autoscale
-	{
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"Create tables",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-			},
-		)
-	}
-
-	// Check tables are updated with new settings
-	{
-		cfg.IndexTables.WriteScale.OutCooldown = 200
-		cfg.ChunkTables.WriteScale.TargetValue = 90.0
-
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 200,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 90.0,
-					},
-				},
-			},
-		)
-	}
-
-	// Check tables are degristered when autoscaling is disabled for inactive tables
-	{
-		cfg.IndexTables.WriteScale.OutCooldown = 200
-		cfg.ChunkTables.WriteScale.TargetValue = 90.0
-
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 200,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 90.0,
-					},
-				},
-			},
-		)
-	}
-
-	// Check tables are degristered when autoscaling is disabled entirely
-	{
-		cfg.IndexTables.WriteScale.Enabled = false
-		cfg.ChunkTables.WriteScale.Enabled = false
-
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: autoScalingConfig{
-						Enabled: false,
-					},
-				},
-			},
-		)
-	}
-}
-
-func TestTableManagerInactiveAutoScaling(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	applicationAutoScaling := newMockApplicationAutoScaling()
-	client := dynamoTableClient{
-		DynamoDB:               dynamoDB,
-		ApplicationAutoScaling: applicationAutoScaling,
-	}
-
-	test := func(tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
-		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			mtime.NowForce(tm)
-			if err := tableManager.syncTables(ctx); err != nil {
-				t.Fatal(err)
-			}
-			expectTables(ctx, t, client, expected)
-		})
-	}
-
-	cfg := SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: periodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale: autoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-			InactiveWriteScaleLastN: 2,
-		},
-
-		ChunkTables: periodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale: autoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-			InactiveWriteScaleLastN: 2,
-		},
-
-		CreationGracePeriod: gracePeriod,
-	}
-
-	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
-	{
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"Legacy and latest tables",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
-		)
-	}
-
-	// Check inactive tables are autoscaled even if there are less than the limit.
-	{
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"1 week of inactive tables with latest",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
-		)
-	}
-
-	// Check inactive tables past the limit do not autoscale but the latest N do.
-	{
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(
-			tableManager,
-			"3 weeks of inactive tables with latest",
-			time.Unix(0, 0).Add(tablePeriod*3).Add(maxChunkAge).Add(gracePeriod),
-			[]TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "2",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "2",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: autoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "3",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "3",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
-		)
-	}
-}
-
-func expectTables(ctx context.Context, t *testing.T, dynamo TableClient, expected []TableDesc) {
-	tables, err := dynamo.ListTables(ctx)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if len(expected) != len(tables) {
-		t.Fatalf("Unexpected number of tables: %v != %v", expected, tables)
-	}
-
-	sort.Strings(tables)
-	sort.Sort(byName(expected))
-
-	for i, expect := range expected {
-		if tables[i] != expect.Name {
-			t.Fatalf("Expected '%s', found '%s'", expect.Name, tables[i])
-		}
-
-		desc, _, err := dynamo.DescribeTable(ctx, expect.Name)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if !desc.Equals(expect) {
-			t.Fatalf("Expected '%v', found '%v' for table '%s'", expect, desc, desc.Name)
-		}
-	}
-}

From 8251e8c143990297521a46c09e042e1522bb5a79 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 7 Mar 2018 22:02:34 +0000
Subject: [PATCH 077/660] Log outstanding writes in DynamoDB (#734)

* Log DynamoDB write items left outstanding

* Add some comments on data types
---
 aws/storage_client.go | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 770f2a01e104d..22839ca5debd3 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -250,6 +250,9 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
+		if valuesLeft < 4 { // protect against logging lots of data
+			level.Info(util.Logger).Log("msg", "DynamoDB BatchWrite values left", "count", valuesLeft, "outstanding", outstanding, "unprocessed", unprocessed)
+		}
 		return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err())
 	}
 	return backoff.Err()
@@ -731,6 +734,7 @@ func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) e
 	})
 }
 
+// Slice of values returned; map key is attribute name
 type dynamoDBReadResponse []map[string]*dynamodb.AttributeValue
 
 func (b dynamoDBReadResponse) Len() int {
@@ -749,6 +753,7 @@ func (b dynamoDBReadResponse) Value(i int) []byte {
 	return chunkValue.B
 }
 
+// map key is table name; value is a slice of things to 'put'
 type dynamoDBWriteBatch map[string][]*dynamodb.WriteRequest
 
 func (b dynamoDBWriteBatch) Len() int {
@@ -795,6 +800,31 @@ func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
 	}
 }
 
+func (b dynamoDBWriteBatch) String() string {
+	buf := &bytes.Buffer{}
+	for table, reqs := range b {
+		for _, req := range reqs {
+			item := req.PutRequest.Item
+			hash := ""
+			if hashAttr, ok := item[hashKey]; ok {
+				if hashAttr.S != nil {
+					hash = *hashAttr.S
+				}
+			}
+			var rnge, value []byte
+			if rangeAttr, ok := item[rangeKey]; ok {
+				rnge = rangeAttr.B
+			}
+			if valueAttr, ok := item[valueKey]; ok {
+				value = valueAttr.B
+			}
+			fmt.Fprintf(buf, "%s: %s,%.32s,%.32s; ", table, hash, rnge, value)
+		}
+	}
+	return buf.String()
+}
+
+// map key is table name
 type dynamoDBReadRequest map[string]*dynamodb.KeysAndAttributes
 
 func (b dynamoDBReadRequest) Len() int {

From fb0208a04fefaeeeb2fcbfbf783123c3c149e895 Mon Sep 17 00:00:00 2001
From: Chris Marchbanks <csmarchbanks@gmail.com>
Date: Fri, 9 Mar 2018 08:10:16 -0700
Subject: [PATCH 078/660] Add additional jaeger spans to the query path (#738)

---
 chunk_store.go        | 4 ++++
 gcp/storage_client.go | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index 7fed5bcbc3dcc..6dedf14d4b0ae 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -8,6 +8,7 @@ import (
 	"sort"
 
 	"github.com/go-kit/kit/log/level"
+	ot "github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -163,6 +164,9 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 
 // Get implements ChunkStore
 func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) (model.Matrix, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
+	defer sp.Finish()
+
 	if through < from {
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index c2000e3548810..d3666fc474a8b 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -198,6 +198,10 @@ func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 }
 
 func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(input)))
+
 	chunks := map[string]map[string]chunk.Chunk{}
 	keys := map[string]bigtable.RowList{}
 	for _, c := range input {

From 18e1f5b4d9e3aad67314a05a81dda0a9a07ef5eb Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 13 Mar 2018 10:45:28 +0000
Subject: [PATCH 079/660] Register metric (#742)

---
 aws/dynamodb_table_client.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 6d8bb495ce69e..b134965edb8a6 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -33,6 +33,10 @@ var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheu
 	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
 }, []string{"operation", "status_code"})
 
+func init() {
+	prometheus.MustRegister(applicationAutoScalingRequestDuration)
+}
+
 type dynamoTableClient struct {
 	DynamoDB               dynamodbiface.DynamoDBAPI
 	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI

From 269a846a707e3f3de0e7460c12beada5bcf9a6d7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 13 Mar 2018 10:46:45 +0000
Subject: [PATCH 080/660]  Add master 'off-switch' for table capacity updates
 (#744)

* Fix log line that wasn't working

* Capture metrics on all tables, active or not

* Add master 'off-switch' for table capacity updates

When you have a problem, set this flag on table manager and manually
tweak the settings in your cloud console.
---
 schema_config.go |  4 ++++
 table_manager.go | 12 ++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index bebc20ea56f4f..1f03f31c4eda5 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -32,6 +32,9 @@ type SchemaConfig struct {
 	V7SchemaFrom     util.DayValue
 	V8SchemaFrom     util.DayValue
 
+	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
+	ThroughputUpdatesDisabled bool
+
 	// Period with which the table manager will poll for tables.
 	DynamoDBPollInterval time.Duration
 
@@ -58,6 +61,7 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema.")
 	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema.")
 
+	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
diff --git a/table_manager.go b/table_manager.go
index a39357aeb35b6..ab0fa2540703b 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -161,7 +161,7 @@ func (m *TableManager) loop() {
 // not and update those that need it.  It is exposed for testing.
 func (m *TableManager) SyncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
-	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", expected)
+	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", len(expected))
 
 	toCreate, toCheckThroughput, err := m.partitionTables(ctx, expected)
 	if err != nil {
@@ -275,14 +275,18 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 			return err
 		}
 
+		tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead))
+		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
+
+		if m.cfg.ThroughputUpdatesDisabled {
+			continue
+		}
+
 		if status != dynamodb.TableStatusActive {
 			level.Info(util.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name, "status", status)
 			continue
 		}
 
-		tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead))
-		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
-
 		if expected.Equals(current) {
 			level.Info(util.Logger).Log("msg", "provisioned throughput on table, skipping", "table", current.Name, "read", current.ProvisionedRead, "write", current.ProvisionedWrite)
 			continue

From 797a989fb92974d80f2fe6ebd03b0afa7bac6285 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Mon, 26 Mar 2018 17:09:38 +0100
Subject: [PATCH 081/660] Comma-separated addresses for cassandra. (#770)

---
 cassandra/storage_client.go | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index c9dc9e4489a07..6ca661d1b8274 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"strings"
 	"time"
 
 	"github.com/gocql/gocql"
@@ -17,7 +18,7 @@ const (
 
 // Config for a StorageClient
 type Config struct {
-	address           string
+	addresses         string
 	keyspace          string
 	consistency       string
 	replicationFactor int
@@ -25,7 +26,7 @@ type Config struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.address, "cassandra.address", "", "Address of Cassandra instances.")
+	f.StringVar(&cfg.addresses, "cassandra.addresses", "", "Comma-separated addresses of Cassandra instances.")
 	f.StringVar(&cfg.keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
 	f.StringVar(&cfg.consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
 	f.IntVar(&cfg.replicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
@@ -41,7 +42,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 		return nil, err
 	}
 
-	cluster := gocql.NewCluster(cfg.address)
+	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
 	cluster.Keyspace = cfg.keyspace
 	cluster.Consistency = consistency
 	cluster.BatchObserver = observer{}
@@ -52,7 +53,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 
 // createKeyspace will create the desired keyspace if it doesn't exist.
 func (cfg *Config) createKeyspace() error {
-	cluster := gocql.NewCluster(cfg.address)
+	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
 	cluster.Keyspace = "system"
 	cluster.Timeout = 20 * time.Second
 	session, err := cluster.CreateSession()

From 6012744df2963e54f1430c5a2c948e8860089156 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 28 Mar 2018 14:59:10 +0100
Subject: [PATCH 082/660] More ingester events (#772)

* Stop generating flush error events

They don't happen often enough to be meaningful.

* Generate event on each chunk flushed

This will make it possible to drill into the reasons for very short
chunks.

* Generate event on DynamoDB unprocessed items

This should make it possible to analyze which keys generate the most
retries.
Replaces the code which would log a few keys after all retries were
exhausted.

* Only report an event the first time a series is added to the flush queue

* Add a counter for chunks created, so we can get the rate

* Add Event for series creation
---
 aws/storage_client.go | 47 ++++++++++++++++++-------------------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 22839ca5debd3..1c8138931d508 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -188,6 +188,24 @@ func (a storageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
+func logRetry(unprocessed dynamoDBWriteBatch) {
+	for table, reqs := range unprocessed {
+		for _, req := range reqs {
+			item := req.PutRequest.Item
+			var hash, rnge string
+			if hashAttr, ok := item[hashKey]; ok {
+				if hashAttr.S != nil {
+					hash = *hashAttr.S
+				}
+			}
+			if rangeAttr, ok := item[rangeKey]; ok {
+				rnge = string(rangeAttr.B)
+			}
+			util.Event().Log("msg", "store retry", "table", table, "hashKey", hash, "rangeKey", rnge)
+		}
+	}
+}
+
 // BatchWrite writes requests to the underlying storage, handling retries and backoff.
 // Structure is identical to getDynamoDBChunks(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
@@ -228,6 +246,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
+				logRetry(requests)
 				unprocessed.TakeReqs(requests, -1)
 				backoff.Wait()
 				continue
@@ -239,6 +258,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 
 		// If there are unprocessed items, backoff and retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
+			logRetry(dynamoDBWriteBatch(unprocessedItems))
 			unprocessed.TakeReqs(unprocessedItems, -1)
 			// I am unclear why we don't count here; perhaps the idea is
 			// that while we are making _some_ progress we should carry on.
@@ -250,9 +270,6 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		if valuesLeft < 4 { // protect against logging lots of data
-			level.Info(util.Logger).Log("msg", "DynamoDB BatchWrite values left", "count", valuesLeft, "outstanding", outstanding, "unprocessed", unprocessed)
-		}
 		return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err())
 	}
 	return backoff.Err()
@@ -800,30 +817,6 @@ func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
 	}
 }
 
-func (b dynamoDBWriteBatch) String() string {
-	buf := &bytes.Buffer{}
-	for table, reqs := range b {
-		for _, req := range reqs {
-			item := req.PutRequest.Item
-			hash := ""
-			if hashAttr, ok := item[hashKey]; ok {
-				if hashAttr.S != nil {
-					hash = *hashAttr.S
-				}
-			}
-			var rnge, value []byte
-			if rangeAttr, ok := item[rangeKey]; ok {
-				rnge = rangeAttr.B
-			}
-			if valueAttr, ok := item[valueKey]; ok {
-				value = valueAttr.B
-			}
-			fmt.Fprintf(buf, "%s: %s,%.32s,%.32s; ", table, hash, rnge, value)
-		}
-	}
-	return buf.String()
-}
-
 // map key is table name
 type dynamoDBReadRequest map[string]*dynamodb.KeysAndAttributes
 

From 1c47a67d2d6f470933d2d012032225a408f79e05 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
Date: Tue, 3 Apr 2018 15:06:47 +0530
Subject: [PATCH 083/660] Use the right Batch creator to register stats. (#782)

Signed-off-by: Goutham Veeramachaneni <cs14btech11014@iith.ac.in>
---
 cassandra/storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 6ca661d1b8274..fe180b856eaa7 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -102,7 +102,7 @@ type writeBatch struct {
 
 func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
 	return writeBatch{
-		b: gocql.NewBatch(gocql.UnloggedBatch),
+		b: s.session.NewBatch(gocql.UnloggedBatch),
 	}
 }
 

From 37fee09ceb7f54ac7786ea84659a4d1b88f13830 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 4 Apr 2018 11:58:09 +0100
Subject: [PATCH 084/660] Move DynamoDB index tests to cover other storage
 backends, fix bunch of Cassandra issues. (#783)

* Move DynamoDB index tests to cover other storage backends.

* Refactor index tests:

- Factor out `forAllFixtures` function, to run a test over all the storage types.
- Move some utils functions to pkg/chunk/storage/utils_test.go.

* Make BigTable storage pass tests: Implement client-side filtering of results; server doesn't offer this facility in a consumable fashion.

* Don't pass QueryPage callback lastPage; its a horrible interface, and causes no ends of issues.

Instead, just have the DynamoDB implementation work it out for itself.

* Add Cassandra fixture.

Doesn't run by default, as it depends on an external Cassandra cluster.  See pkg/chunk/cassandra/fixtures.go for details on how to run.

* Make Cassandra pass the tests.

- Add errors.WithStack on all error coming back from Cassandra.
- Implement server-side value filtering.
- Correctly implement prefix queries.
- Don't call the test table "table", as this is a reserved word in CQL.
- Don't use Cassandra batches for writes, they don't do what you think they do.
- Correctly honor QueryPage callback value <--- this was the cause of the bug I was chasing...
---
 aws/storage_client.go          |   7 +-
 aws/storage_client_test.go     | 197 ---------------------------------
 cassandra/fixtures.go          |  80 +++++++++++++
 cassandra/storage_client.go    |  99 +++++++++++------
 cassandra/table_client.go      |   8 +-
 chunk_store.go                 |   4 +-
 gcp/storage_client.go          |  21 +++-
 inmemory_storage_client.go     |   4 +-
 storage/index_test.go          | 184 ++++++++++++++++++++++++++++++
 storage/storage_client_test.go | 133 +++++++---------------
 storage/utils_test.go          |  77 +++++++++++++
 storage_client.go              |   2 +-
 12 files changed, 482 insertions(+), 334 deletions(-)
 delete mode 100644 aws/storage_client_test.go
 create mode 100644 cassandra/fixtures.go
 create mode 100644 storage/index_test.go
 create mode 100644 storage/utils_test.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 1c8138931d508..b633104e5f413 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -275,7 +275,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -332,12 +332,15 @@ func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, c
 			return err
 		}
 
-		if getNextPage := callback(response, !page.HasNextPage()); !getNextPage {
+		if !callback(response) {
 			if err != nil {
 				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
 			}
 			return nil
 		}
+		if !page.HasNextPage() {
+			return nil
+		}
 	}
 	return nil
 }
diff --git a/aws/storage_client_test.go b/aws/storage_client_test.go
deleted file mode 100644
index e8376b8e83cb3..0000000000000
--- a/aws/storage_client_test.go
+++ /dev/null
@@ -1,197 +0,0 @@
-package aws
-
-import (
-	"context"
-	"fmt"
-	"testing"
-
-	"github.com/stretchr/testify/require"
-
-	"github.com/weaveworks/cortex/pkg/chunk"
-)
-
-func TestAWSStorageClient(t *testing.T) {
-	mockDB := newMockDynamoDB(0, 0)
-	client := storageClient{
-		DynamoDB:                mockDB,
-		queryRequestFn:          mockDB.queryRequest,
-		batchGetItemRequestFn:   mockDB.batchGetItemRequest,
-		batchWriteItemRequestFn: mockDB.batchWriteItemRequest,
-	}
-	batch := client.NewWriteBatch()
-	for i := 0; i < 30; i++ {
-		batch.Add("table", fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
-	}
-	mockDB.createTable("table")
-
-	err := client.BatchWrite(context.Background(), batch)
-	require.NoError(t, err)
-
-	for i := 0; i < 30; i++ {
-		entry := chunk.IndexQuery{
-			TableName: "table",
-			HashValue: fmt.Sprintf("hash%d", i),
-		}
-		var have []chunk.IndexEntry
-		err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch, lastPage bool) bool {
-			for j := 0; j < read.Len(); j++ {
-				have = append(have, chunk.IndexEntry{
-					RangeValue: read.RangeValue(j),
-				})
-			}
-			return !lastPage
-		})
-		require.NoError(t, err)
-		require.Equal(t, []chunk.IndexEntry{
-			{RangeValue: []byte(fmt.Sprintf("range%d", i))},
-		}, have)
-	}
-}
-
-func TestAWSStorageClientQueryPages(t *testing.T) {
-	entries := []chunk.IndexEntry{
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:1"),
-			Value:      []byte("10"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:2"),
-			Value:      []byte("20"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("bar:3"),
-			Value:      []byte("30"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("baz:1"),
-			Value:      []byte("10"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "foo",
-			RangeValue: []byte("baz:2"),
-			Value:      []byte("20"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:1"),
-			Value:      []byte("abc"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:2"),
-			Value:      []byte("abc"),
-		},
-		{
-			TableName:  "table",
-			HashValue:  "flip",
-			RangeValue: []byte("bar:3"),
-			Value:      []byte("abc"),
-		},
-	}
-
-	tests := []struct {
-		name           string
-		query          chunk.IndexQuery
-		provisionedErr int
-		want           []chunk.IndexEntry
-	}{
-		{
-			"check HashValue only",
-			chunk.IndexQuery{
-				TableName: "table",
-				HashValue: "flip",
-			},
-			0,
-			[]chunk.IndexEntry{entries[5], entries[6], entries[7]},
-		},
-		{
-			"check RangeValueStart",
-			chunk.IndexQuery{
-				TableName:       "table",
-				HashValue:       "foo",
-				RangeValueStart: []byte("bar:2"),
-			},
-			0,
-			[]chunk.IndexEntry{entries[1], entries[2], entries[3], entries[4]},
-		},
-		{
-			"check RangeValuePrefix",
-			chunk.IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("baz:"),
-			},
-			0,
-			[]chunk.IndexEntry{entries[3], entries[4]},
-		},
-		{
-			"check ValueEqual",
-			chunk.IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("bar"),
-				ValueEqual:       []byte("20"),
-			},
-			0,
-			[]chunk.IndexEntry{entries[1]},
-		},
-		{
-			"check retry logic",
-			chunk.IndexQuery{
-				TableName:        "table",
-				HashValue:        "foo",
-				RangeValuePrefix: []byte("bar"),
-				ValueEqual:       []byte("20"),
-			},
-			2,
-			[]chunk.IndexEntry{entries[1]},
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			dynamoDB := newMockDynamoDB(0, tt.provisionedErr)
-			client := storageClient{
-				DynamoDB:                dynamoDB,
-				queryRequestFn:          dynamoDB.queryRequest,
-				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-			}
-
-			batch := client.NewWriteBatch()
-			for _, entry := range entries {
-				batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
-			}
-			dynamoDB.createTable("table")
-
-			err := client.BatchWrite(context.Background(), batch)
-			require.NoError(t, err)
-
-			var have []chunk.IndexEntry
-			err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch, lastPage bool) bool {
-				for i := 0; i < read.Len(); i++ {
-					have = append(have, chunk.IndexEntry{
-						TableName:  tt.query.TableName,
-						HashValue:  tt.query.HashValue,
-						RangeValue: read.RangeValue(i),
-						Value:      read.Value(i),
-					})
-				}
-				return !lastPage
-			})
-			require.NoError(t, err)
-			require.Equal(t, tt.want, have)
-		})
-	}
-}
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
new file mode 100644
index 0000000000000..a70132b607224
--- /dev/null
+++ b/cassandra/fixtures.go
@@ -0,0 +1,80 @@
+package cassandra
+
+import (
+	"context"
+	"flag"
+	"os"
+
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+// GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
+// To enable these tests:
+// $ docker run --name cassandra --rm -p 9042:9042 cassandra:3.11
+// $ CASSANDRA_TEST_ADDRESSES=localhost:9042 go test ./pkg/chunk/storage
+
+type fixture struct {
+	name          string
+	storageClient chunk.StorageClient
+	tableClient   chunk.TableClient
+	schemaConfig  chunk.SchemaConfig
+}
+
+func (f fixture) Name() string {
+	return f.name
+}
+
+func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	return f.storageClient, f.tableClient, f.schemaConfig, nil
+}
+
+func (f fixture) Teardown() error {
+	return nil
+}
+
+// Fixtures for unit testing Cassandra integration.
+func Fixtures() ([]chunk.Fixture, error) {
+	addresses := os.Getenv("CASSANDRA_TEST_ADDRESSES")
+	if addresses == "" {
+		return nil, nil
+	}
+
+	cfg := Config{
+		addresses:         addresses,
+		keyspace:          "test",
+		consistency:       "QUORUM",
+		replicationFactor: 1,
+	}
+
+	// Get a SchemaConfig with the defaults.
+	flagSet := flag.NewFlagSet("flags", flag.PanicOnError)
+	schemaConfig := chunk.SchemaConfig{}
+	schemaConfig.RegisterFlags(flagSet)
+	err := flagSet.Parse([]string{})
+	if err != nil {
+		return nil, err
+	}
+	schemaConfig.IndexTables.From = util.NewDayValue(model.Now())
+	schemaConfig.ChunkTables.From = util.NewDayValue(model.Now())
+
+	storageClient, err := NewStorageClient(cfg, schemaConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	tableClient, err := NewTableClient(context.Background(), cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return []chunk.Fixture{
+		fixture{
+			name:          "Cassandra",
+			storageClient: storageClient,
+			tableClient:   tableClient,
+			schemaConfig:  schemaConfig,
+		},
+	}, nil
+}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index fe180b856eaa7..e24c5d09d8717 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -8,6 +8,7 @@ import (
 	"time"
 
 	"github.com/gocql/gocql"
+	"github.com/pkg/errors"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 )
@@ -35,11 +36,11 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 func (cfg *Config) session() (*gocql.Session, error) {
 	consistency, err := gocql.ParseConsistencyWrapper(cfg.consistency)
 	if err != nil {
-		return nil, err
+		return nil, errors.WithStack(err)
 	}
 
 	if err := cfg.createKeyspace(); err != nil {
-		return nil, err
+		return nil, errors.WithStack(err)
 	}
 
 	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
@@ -58,17 +59,18 @@ func (cfg *Config) createKeyspace() error {
 	cluster.Timeout = 20 * time.Second
 	session, err := cluster.CreateSession()
 	if err != nil {
-		return err
+		return errors.WithStack(err)
 	}
 	defer session.Close()
 
-	return session.Query(fmt.Sprintf(
+	err = session.Query(fmt.Sprintf(
 		`CREATE KEYSPACE IF NOT EXISTS %s
 		 WITH replication = {
 			 'class' : 'SimpleStrategy',
 			 'replication_factor' : %d
 		 }`,
 		cfg.keyspace, cfg.replicationFactor)).Exec()
+	return errors.WithStack(err)
 }
 
 // storageClient implements chunk.storageClient for GCP.
@@ -82,7 +84,7 @@ type storageClient struct {
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	session, err := cfg.session()
 	if err != nil {
-		return nil, err
+		return nil, errors.WithStack(err)
 	}
 
 	return &storageClient{
@@ -96,37 +98,66 @@ func (s *storageClient) Close() {
 	s.session.Close()
 }
 
+// Cassandra batching isn't really useful in this case, its more to do multiple
+// atomic writes.  Therefore we just do a bunch of writes in parallel.
 type writeBatch struct {
-	b *gocql.Batch
+	entries []chunk.IndexEntry
 }
 
 func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
-	return writeBatch{
-		b: s.session.NewBatch(gocql.UnloggedBatch),
-	}
+	return &writeBatch{}
 }
 
-func (b writeBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
-	b.b.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)", tableName),
-		hashValue, rangeValue, value)
+func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	b.entries = append(b.entries, chunk.IndexEntry{
+		TableName:  tableName,
+		HashValue:  hashValue,
+		RangeValue: rangeValue,
+		Value:      value,
+	})
 }
 
 func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
-	cassandraBatch := batch.(writeBatch)
-	return s.session.ExecuteBatch(cassandraBatch.b.WithContext(ctx))
+	b := batch.(*writeBatch)
+
+	for _, entry := range b.entries {
+		err := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)",
+			entry.TableName), entry.HashValue, entry.RangeValue, entry.Value).WithContext(ctx).Exec()
+		if err != nil {
+			return errors.WithStack(err)
+		}
+	}
+
+	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	var q *gocql.Query
-	if len(query.RangeValuePrefix) > 0 {
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?", query.TableName),
-			query.HashValue, query.RangeValuePrefix)
-	} else if len(query.RangeValueStart) > 0 {
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?", query.TableName),
-			query.HashValue, query.RangeValueStart)
-	} else {
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?", query.TableName),
-			query.HashValue)
+
+	switch {
+	case len(query.RangeValuePrefix) > 0 && query.ValueEqual == nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ?",
+			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'))
+
+	case len(query.RangeValuePrefix) > 0 && query.ValueEqual != nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ? AND value = ? ALLOW FILTERING",
+			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'), query.ValueEqual)
+
+	case len(query.RangeValueStart) > 0 && query.ValueEqual == nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?",
+			query.TableName), query.HashValue, query.RangeValueStart)
+
+	case len(query.RangeValueStart) > 0 && query.ValueEqual != nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND value = ? ALLOW FILTERING",
+			query.TableName), query.HashValue, query.RangeValueStart, query.ValueEqual)
+
+	case query.ValueEqual == nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?",
+			query.TableName), query.HashValue)
+
+	case query.ValueEqual != nil:
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? value = ? ALLOW FILTERING",
+			query.TableName), query.HashValue, query.ValueEqual)
 	}
 
 	iter := q.WithContext(ctx).Iter()
@@ -135,13 +166,13 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	for scanner.Next() {
 		var b readBatch
 		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
-			return err
+			return errors.WithStack(err)
 		}
-		if callback(b, false) {
+		if !callback(b) {
 			return nil
 		}
 	}
-	return scanner.Err()
+	return errors.WithStack(scanner.Err())
 }
 
 // readBatch represents a batch of rows read from Cassandra.
@@ -171,22 +202,24 @@ func (b readBatch) Value(index int) []byte {
 }
 
 func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-	b := gocql.NewBatch(gocql.UnloggedBatch).WithContext(ctx)
-
 	for i := range chunks {
 		// Encode the chunk first - checksum is calculated as a side effect.
 		buf, err := chunks[i].Encode()
 		if err != nil {
-			return err
+			return errors.WithStack(err)
 		}
 		key := chunks[i].ExternalKey()
 		tableName := s.schemaCfg.ChunkTables.TableFor(chunks[i].From)
 
 		// Must provide a range key, even though its not useds - hence 0x00.
-		b.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)", tableName), key, buf)
+		q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
+			tableName), key, buf)
+		if err := q.WithContext(ctx).Exec(); err != nil {
+			return errors.WithStack(err)
+		}
 	}
 
-	return s.session.ExecuteBatch(b)
+	return nil
 }
 
 func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
@@ -223,7 +256,7 @@ func (s *storageClient) getChunk(ctx context.Context, input chunk.Chunk) (chunk.
 	var buf []byte
 	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
 		WithContext(ctx).Scan(&buf); err != nil {
-		return input, err
+		return input, errors.WithStack(err)
 	}
 	decodeContext := chunk.NewDecodeContext()
 	err := input.Decode(decodeContext, buf)
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 3feafd0047aed..45865d2bba82e 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/gocql/gocql"
+	"github.com/pkg/errors"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 )
@@ -19,7 +20,7 @@ type tableClient struct {
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
 	session, err := cfg.session()
 	if err != nil {
-		return nil, err
+		return nil, errors.WithStack(err)
 	}
 	return &tableClient{
 		cfg:     cfg,
@@ -30,7 +31,7 @@ func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error)
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	md, err := c.session.KeyspaceMetadata(c.cfg.keyspace)
 	if err != nil {
-		return nil, err
+		return nil, errors.WithStack(err)
 	}
 	result := []string{}
 	for name := range md.Tables {
@@ -40,13 +41,14 @@ func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 }
 
 func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
-	return c.session.Query(fmt.Sprintf(`
+	err := c.session.Query(fmt.Sprintf(`
 		CREATE TABLE IF NOT EXISTS %s (
 			hash text,
 			range blob,
 			value blob,
 			PRIMARY KEY (hash, range)
 		)`, desc.Name)).WithContext(ctx).Exec()
+	return errors.WithStack(err)
 }
 
 func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
diff --git a/chunk_store.go b/chunk_store.go
index 6dedf14d4b0ae..d277c9c9522e0 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -466,7 +466,7 @@ func (c *Store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
 
-	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch, lastPage bool) (shouldContinue bool) {
+	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch) (shouldContinue bool) {
 		for i := 0; i < resp.Len(); i++ {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
@@ -475,7 +475,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 				Value:      resp.Value(i),
 			})
 		}
-		return !lastPage
+		return true
 	}); err != nil {
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
 		return nil, err
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index d3666fc474a8b..b2d256b760af2 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -1,6 +1,7 @@
 package gcp
 
 import (
+	"bytes"
 	"context"
 	"flag"
 	"fmt"
@@ -109,13 +110,24 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
 	table := s.client.Open(query.TableName)
 
 	var rowRange bigtable.RowRange
+
+	/* BigTable only seems to support regex match on cell values, so doing it
+	   client side for now
+	readOpts := []bigtable.ReadOption{
+		bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)),
+	}
+	if query.ValueEqual != nil {
+		readOpts = append(readOpts, bigtable.RowFilter(bigtable.ValueFilter(string(query.ValueEqual))))
+	}
+	*/
+
 	if len(query.RangeValuePrefix) > 0 {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
 	} else if len(query.RangeValueStart) > 0 {
@@ -125,8 +137,11 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	}
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
-		return callback(bigtableReadBatch(r), false)
-	}, bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)))
+		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
+			return callback(bigtableReadBatch(r))
+		}
+		return true
+	})
 	if err != nil {
 		sp.LogFields(otlog.String("error", err.Error()))
 		return errors.WithStack(err)
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 443c9f12b0860..3dea4728db927 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -158,7 +158,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 }
 
 // QueryPages implements StorageClient.
-func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error {
+func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
 	logger := util.WithContext(ctx, util.Logger)
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
@@ -232,7 +232,7 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 		result = append(result, item)
 	}
 
-	callback(result, true)
+	callback(result)
 	return nil
 }
 
diff --git a/storage/index_test.go b/storage/index_test.go
new file mode 100644
index 0000000000000..b6cdc0f172cf1
--- /dev/null
+++ b/storage/index_test.go
@@ -0,0 +1,184 @@
+package storage
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+func TestIndexBasic(t *testing.T) {
+	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+		// Write out 30 entries, into different hash and range values.
+		batch := client.NewWriteBatch()
+		for i := 0; i < 30; i++ {
+			batch.Add(tableName, fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
+		}
+		err := client.BatchWrite(context.Background(), batch)
+		require.NoError(t, err)
+
+		// Make sure we get back the correct entries by hash value.
+		for i := 0; i < 30; i++ {
+			entry := chunk.IndexQuery{
+				TableName: tableName,
+				HashValue: fmt.Sprintf("hash%d", i),
+			}
+			var have []chunk.IndexEntry
+			err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch) bool {
+				for j := 0; j < read.Len(); j++ {
+					have = append(have, chunk.IndexEntry{
+						RangeValue: read.RangeValue(j),
+					})
+				}
+				return true
+			})
+			require.NoError(t, err)
+			require.Equal(t, []chunk.IndexEntry{
+				{RangeValue: []byte(fmt.Sprintf("range%d", i))},
+			}, have)
+		}
+	})
+}
+
+var entries = []chunk.IndexEntry{
+	{
+		TableName:  tableName,
+		HashValue:  "foo",
+		RangeValue: []byte("bar:1"),
+		Value:      []byte("10"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "foo",
+		RangeValue: []byte("bar:2"),
+		Value:      []byte("20"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "foo",
+		RangeValue: []byte("bar:3"),
+		Value:      []byte("30"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "foo",
+		RangeValue: []byte("baz:1"),
+		Value:      []byte("10"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "foo",
+		RangeValue: []byte("baz:2"),
+		Value:      []byte("20"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "flip",
+		RangeValue: []byte("bar:1"),
+		Value:      []byte("abc"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "flip",
+		RangeValue: []byte("bar:2"),
+		Value:      []byte("abc"),
+	},
+	{
+		TableName:  tableName,
+		HashValue:  "flip",
+		RangeValue: []byte("bar:3"),
+		Value:      []byte("abc"),
+	},
+}
+
+func TestQueryPages(t *testing.T) {
+	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+		batch := client.NewWriteBatch()
+		for _, entry := range entries {
+			batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+		}
+
+		err := client.BatchWrite(context.Background(), batch)
+		require.NoError(t, err)
+
+		tests := []struct {
+			name           string
+			query          chunk.IndexQuery
+			provisionedErr int
+			want           []chunk.IndexEntry
+		}{
+			{
+				"check HashValue only",
+				chunk.IndexQuery{
+					TableName: tableName,
+					HashValue: "flip",
+				},
+				0,
+				[]chunk.IndexEntry{entries[5], entries[6], entries[7]},
+			},
+			{
+				"check RangeValueStart",
+				chunk.IndexQuery{
+					TableName:       tableName,
+					HashValue:       "foo",
+					RangeValueStart: []byte("bar:2"),
+				},
+				0,
+				[]chunk.IndexEntry{entries[1], entries[2], entries[3], entries[4]},
+			},
+			{
+				"check RangeValuePrefix",
+				chunk.IndexQuery{
+					TableName:        tableName,
+					HashValue:        "foo",
+					RangeValuePrefix: []byte("baz:"),
+				},
+				0,
+				[]chunk.IndexEntry{entries[3], entries[4]},
+			},
+			{
+				"check ValueEqual",
+				chunk.IndexQuery{
+					TableName:        tableName,
+					HashValue:        "foo",
+					RangeValuePrefix: []byte("bar"),
+					ValueEqual:       []byte("20"),
+				},
+				0,
+				[]chunk.IndexEntry{entries[1]},
+			},
+			{
+				"check retry logic",
+				chunk.IndexQuery{
+					TableName:        tableName,
+					HashValue:        "foo",
+					RangeValuePrefix: []byte("bar"),
+					ValueEqual:       []byte("20"),
+				},
+				2,
+				[]chunk.IndexEntry{entries[1]},
+			},
+		}
+
+		for _, tt := range tests {
+			t.Run(tt.name, func(t *testing.T) {
+				var have []chunk.IndexEntry
+				err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
+					for i := 0; i < read.Len(); i++ {
+						have = append(have, chunk.IndexEntry{
+							TableName:  tt.query.TableName,
+							HashValue:  tt.query.HashValue,
+							RangeValue: read.RangeValue(i),
+							Value:      read.Value(i),
+						})
+					}
+					return true
+				})
+				require.NoError(t, err)
+				require.Equal(t, tt.want, have)
+			})
+		}
+	})
+}
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index 7691ae41aeccd..34457ff992f51 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -12,106 +12,57 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/aws"
-	"github.com/weaveworks/cortex/pkg/chunk/gcp"
-	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 )
 
-var fixtures = append(aws.Fixtures, gcp.Fixtures...)
-
-func TestStoreChunks(t *testing.T) {
-	for _, fixture := range fixtures {
-		t.Run(fixture.Name(), func(t *testing.T) {
-			storageClient, tableClient, schemaConfig, err := fixture.Clients()
-			require.NoError(t, err)
-			defer fixture.Teardown()
-
-			tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
-			require.NoError(t, err)
-
-			err = tableManager.SyncTables(context.Background())
-			require.NoError(t, err)
-
-			testStorageClientChunks(t, storageClient)
-		})
-	}
-}
-
-func testStorageClientChunks(t *testing.T, client chunk.StorageClient) {
-	const batchSize = 50
-	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
-	defer cancel()
+func TestChunksBasic(t *testing.T) {
+	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+		const batchSize = 50
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		// Write a few batches of chunks.
+		written := []string{}
+		for i := 0; i < 50; i++ {
+			chunks := []chunk.Chunk{}
+			for j := 0; j < batchSize; j++ {
+				chunk := dummyChunkFor(model.Now(), model.Metric{
+					model.MetricNameLabel: "foo",
+					"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
+				})
+				chunks = append(chunks, chunk)
+				_, err := chunk.Encode() // Need to encode it, side effect calculates crc
+				require.NoError(t, err)
+				written = append(written, chunk.ExternalKey())
+			}
 
-	// Write a few batches of chunks.
-	written := []string{}
-	for i := 0; i < 50; i++ {
-		chunks := []chunk.Chunk{}
-		for j := 0; j < batchSize; j++ {
-			chunk := dummyChunkFor(model.Now(), model.Metric{
-				model.MetricNameLabel: "foo",
-				"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
-			})
-			chunks = append(chunks, chunk)
-			_, err := chunk.Encode() // Need to encode it, side effect calculates crc
+			err := client.PutChunks(ctx, chunks)
 			require.NoError(t, err)
-			written = append(written, chunk.ExternalKey())
 		}
 
-		err := client.PutChunks(ctx, chunks)
-		require.NoError(t, err)
-	}
-
-	// Get a few batches of chunks.
-	for i := 0; i < 50; i++ {
-		keysToGet := map[string]struct{}{}
-		chunksToGet := []chunk.Chunk{}
-		for len(chunksToGet) < batchSize {
-			key := written[rand.Intn(len(written))]
-			if _, ok := keysToGet[key]; ok {
-				continue
+		// Get a few batches of chunks.
+		for i := 0; i < 50; i++ {
+			keysToGet := map[string]struct{}{}
+			chunksToGet := []chunk.Chunk{}
+			for len(chunksToGet) < batchSize {
+				key := written[rand.Intn(len(written))]
+				if _, ok := keysToGet[key]; ok {
+					continue
+				}
+				keysToGet[key] = struct{}{}
+				chunk, err := chunk.ParseExternalKey(userID, key)
+				require.NoError(t, err)
+				chunksToGet = append(chunksToGet, chunk)
 			}
-			keysToGet[key] = struct{}{}
-			chunk, err := chunk.ParseExternalKey(userID, key)
-			require.NoError(t, err)
-			chunksToGet = append(chunksToGet, chunk)
-		}
 
-		chunksWeGot, err := client.GetChunks(ctx, chunksToGet)
-		require.NoError(t, err)
-		require.Equal(t, len(chunksToGet), len(chunksWeGot))
+			chunksWeGot, err := client.GetChunks(ctx, chunksToGet)
+			require.NoError(t, err)
+			require.Equal(t, len(chunksToGet), len(chunksWeGot))
 
-		sort.Sort(chunk.ByKey(chunksToGet))
-		sort.Sort(chunk.ByKey(chunksWeGot))
-		for j := 0; j < len(chunksWeGot); j++ {
-			require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey(), strconv.Itoa(i))
+			sort.Sort(chunk.ByKey(chunksToGet))
+			sort.Sort(chunk.ByKey(chunksWeGot))
+			for j := 0; j < len(chunksWeGot); j++ {
+				require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey(), strconv.Itoa(i))
+			}
 		}
-	}
-}
-
-const userID = "userID"
-
-func dummyChunk(now model.Time) chunk.Chunk {
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
 	})
 }
-
-func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
-	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
-	chunk := chunk.NewChunk(
-		userID,
-		metric.Fingerprint(),
-		metric,
-		cs[0],
-		now.Add(-time.Hour),
-		now,
-	)
-	// Force checksum calculation.
-	_, err := chunk.Encode()
-	if err != nil {
-		panic(err)
-	}
-	return chunk
-}
diff --git a/storage/utils_test.go b/storage/utils_test.go
new file mode 100644
index 0000000000000..989a6222fc5e6
--- /dev/null
+++ b/storage/utils_test.go
@@ -0,0 +1,77 @@
+package storage
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/aws"
+	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
+	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+)
+
+const (
+	userID    = "userID"
+	tableName = "test"
+)
+
+type storageClientTest func(*testing.T, chunk.StorageClient)
+
+func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
+	fixtures := append(aws.Fixtures, gcp.Fixtures...)
+
+	cassandraFixtures, err := cassandra.Fixtures()
+	require.NoError(t, err)
+	fixtures = append(fixtures, cassandraFixtures...)
+
+	for _, fixture := range fixtures {
+		t.Run(fixture.Name(), func(t *testing.T) {
+			storageClient, tableClient, schemaConfig, err := fixture.Clients()
+			require.NoError(t, err)
+			defer fixture.Teardown()
+
+			tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
+			require.NoError(t, err)
+
+			err = tableManager.SyncTables(context.Background())
+			require.NoError(t, err)
+
+			err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
+				Name: tableName,
+			})
+			require.NoError(t, err)
+
+			storageClientTest(t, storageClient)
+		})
+	}
+}
+
+func dummyChunk(now model.Time) chunk.Chunk {
+	return dummyChunkFor(now, model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+	})
+}
+
+func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
+	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	chunk := chunk.NewChunk(
+		userID,
+		metric.Fingerprint(),
+		metric,
+		cs[0],
+		now.Add(-time.Hour),
+		now,
+	)
+	// Force checksum calculation.
+	_, err := chunk.Encode()
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}
diff --git a/storage_client.go b/storage_client.go
index 7ba0eace98366..662fe62afe1ae 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -9,7 +9,7 @@ type StorageClient interface {
 	BatchWrite(context.Context, WriteBatch) error
 
 	// For the read path.
-	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch, lastPage bool) (shouldContinue bool)) error
+	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error
 
 	// For storing and retrieving chunks.
 	PutChunks(ctx context.Context, chunks []Chunk) error

From 4d59914be576609c4af86b233111fbecc589042b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 4 Apr 2018 12:33:04 +0100
Subject: [PATCH 085/660] Retain encoded version of chunk (#781)

So we don't have to re-encode it.

Now we need to copy the buffer in TestChunkCodec() so we can change it
without changing the original.  And blank out the encoded data in
BenchmarkEncode() otherwise it just copies the pointer.
---
 chunk.go      | 15 ++++++++++++---
 chunk_test.go |  5 ++++-
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/chunk.go b/chunk.go
index e61c50a558bf1..1dc28016cbc7d 100644
--- a/chunk.go
+++ b/chunk.go
@@ -59,6 +59,9 @@ type Chunk struct {
 	// This flag is used for very old chunks, where the metadata is read out
 	// of the index.
 	metadataInIndex bool
+
+	// The encoded version of the chunk, held so we don't need to re-encode it
+	encoded []byte
 }
 
 // NewChunk creates a new chunk
@@ -183,6 +186,10 @@ var writerPool = sync.Pool{
 
 // Encode writes the chunk out to a big write buffer, then calculates the checksum.
 func (c *Chunk) Encode() ([]byte, error) {
+	if c.encoded != nil {
+		return c.encoded, nil
+	}
+
 	var buf bytes.Buffer
 
 	// Write 4 empty bytes first - we will come back and put the len in here.
@@ -216,10 +223,10 @@ func (c *Chunk) Encode() ([]byte, error) {
 	}
 
 	// Now work out the checksum
-	output := buf.Bytes()
+	c.encoded = buf.Bytes()
 	c.ChecksumSet = true
-	c.Checksum = crc32.Checksum(output, castagnoliTable)
-	return output, nil
+	c.Checksum = crc32.Checksum(c.encoded, castagnoliTable)
+	return c.encoded, nil
 }
 
 // DecodeContext holds data that can be re-used between decodes of different chunks
@@ -259,6 +266,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 		if err != nil {
 			return err
 		}
+		c.encoded = input
 		return c.Data.UnmarshalFromBuf(input)
 	}
 
@@ -319,6 +327,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 		return err
 	}
 
+	c.encoded = input
 	return c.Data.Unmarshal(&io.LimitedReader{
 		N: int64(dataLen),
 		R: r,
diff --git a/chunk_test.go b/chunk_test.go
index b5044012d605d..040d01c511003 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -82,12 +82,14 @@ func TestChunkCodec(t *testing.T) {
 		},
 	} {
 		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
-			buf, err := c.chunk.Encode()
+			encoded, err := c.chunk.Encode()
 			require.NoError(t, err)
 
 			have, err := ParseExternalKey(userID, c.chunk.ExternalKey())
 			require.NoError(t, err)
 
+			buf := make([]byte, len(encoded))
+			copy(buf, encoded)
 			if c.f != nil {
 				c.f(&have, buf)
 			}
@@ -224,6 +226,7 @@ func BenchmarkEncode(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
+		chunk.encoded = nil
 		chunk.Encode()
 	}
 }

From d13354a82bc4b452be6d71f3c552a0ed427da174 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 4 Apr 2018 14:06:09 +0100
Subject: [PATCH 086/660] Retain chunks in ingester for idle time, so 'now'
 queries don't hit the store (#764)

* Wait till idle time has passed before flushing any chunk

* Bypass chunk store if query is within some time window

Adds an option -store.min-chunk-age that you can set just below the
ingester idle time, then all queries inside that window will not touch
the chunk store.

* Add more data to OpenTracing spans
---
 chunk_store.go | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index d277c9c9522e0..a1e41138b3573 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -6,9 +6,11 @@ import (
 	"flag"
 	"fmt"
 	"sort"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -51,6 +53,7 @@ func init() {
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
 	CacheConfig cache.Config
+	MinChunkAge time.Duration
 
 	// For injecting different schemas in tests.
 	schemaFactory func(cfg SchemaConfig) Schema
@@ -59,6 +62,7 @@ type StoreConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.CacheConfig.RegisterFlags(f)
+	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "minimum time between chunk update and being saved to the store")
 }
 
 // Store implements Store
@@ -172,12 +176,18 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	}
 
 	now := model.Now()
+	sp.LogFields(otlog.String("from", from.String()), otlog.String("through", through.String()), otlog.String("now", now.String()))
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
 		return nil, nil
 	}
 
+	if from.After(now.Add(-c.cfg.MinChunkAge)) {
+		// no data relevant to this query will have arrived at the store yet
+		return nil, nil
+	}
+
 	if through.After(now) {
 		// time-span end is in future ... regard as legal
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
@@ -187,6 +197,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	// Fetch metric name chunks if the matcher is of type equal,
 	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(allMatchers)
 	if ok && metricNameMatcher.Type == labels.MatchEqual {
+		sp.SetTag("metric", metricNameMatcher.Value)
 		return c.getMetricNameMatrix(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 

From efe160f3f93983b022b5c890d79204bf4a423379 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 6 Apr 2018 09:35:45 +0100
Subject: [PATCH 087/660] Use jsoniter library for chunk encoding/decoding
 (#785)

* Use jsoniter

* Dispense with LimitedReader, for performance

Each LimitedReader is allocated on the heap, and they are unnecessary:
the JSON decoder will read to the end of a valid object and stop, and
Prometheus chunk can unmarshal from a buffer.

In fact, the previous code had the JSON LimitedReader sized four bytes
too big anyway.

* Revert "Re-use Metric when decoding chunks with the same Fingerprint (#634)"

This reverts commit 79f4c4fbd3047f3324f15ccbf1451ac5eefac8ba.

Benefit is minimal when used with jsoniter: you have to add a tag
`jsoniter_sloppy` to get it to skip without allocating, and we rarely
get a lot of chunks in the same series in a real-life query.

* Add json encode/decode overrides for specific types

These override the overrides in prometheus/common.  That library is
being phased out so hard to get changes accepted.

* Vendor jsoniter and its dependencies
---
 chunk.go        | 56 +++++++++++++++------------------------------
 json_helpers.go | 60 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 38 deletions(-)
 create mode 100644 json_helpers.go

diff --git a/chunk.go b/chunk.go
index 1dc28016cbc7d..09bd6d0236ca4 100644
--- a/chunk.go
+++ b/chunk.go
@@ -4,15 +4,14 @@ import (
 	"bytes"
 	"context"
 	"encoding/binary"
-	"encoding/json"
 	"fmt"
 	"hash/crc32"
-	"io"
 	"strconv"
 	"strings"
 	"sync"
 
 	"github.com/golang/snappy"
+	jsoniter "github.com/json-iterator/go"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
@@ -29,6 +28,7 @@ const (
 	ErrInvalidChunkID  = errs.Error("invalid chunk ID")
 	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
 	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
+	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -181,7 +181,7 @@ func (c *Chunk) ExternalKey() string {
 }
 
 var writerPool = sync.Pool{
-	New: func() interface{} { return snappy.NewWriter(nil) },
+	New: func() interface{} { return snappy.NewBufferedWriter(nil) },
 }
 
 // Encode writes the chunk out to a big write buffer, then calculates the checksum.
@@ -202,11 +202,14 @@ func (c *Chunk) Encode() ([]byte, error) {
 	writer := writerPool.Get().(*snappy.Writer)
 	defer writerPool.Put(writer)
 	writer.Reset(&buf)
+	json := jsoniter.ConfigFastest
 	if err := json.NewEncoder(writer).Encode(c); err != nil {
 		return nil, err
 	}
+	writer.Close()
 
 	// Write the metadata length back at the start of the buffer.
+	// (note this length includes the 4 bytes for the length itself)
 	binary.BigEndian.PutUint32(metadataLenBytes[:], uint32(buf.Len()))
 	copy(buf.Bytes(), metadataLenBytes[:])
 
@@ -231,31 +234,16 @@ func (c *Chunk) Encode() ([]byte, error) {
 
 // DecodeContext holds data that can be re-used between decodes of different chunks
 type DecodeContext struct {
-	reader  *snappy.Reader
-	metrics map[model.Fingerprint]model.Metric
+	reader *snappy.Reader
 }
 
 // NewDecodeContext creates a new, blank, DecodeContext
 func NewDecodeContext() *DecodeContext {
 	return &DecodeContext{
-		reader:  snappy.NewReader(nil),
-		metrics: make(map[model.Fingerprint]model.Metric),
+		reader: snappy.NewReader(nil),
 	}
 }
 
-// If we have decoded a chunk with the same fingerprint before, re-use its Metric, otherwise parse it
-func (dc *DecodeContext) metric(fingerprint model.Fingerprint, buf []byte) (model.Metric, error) {
-	metric, found := dc.metrics[fingerprint]
-	if !found {
-		err := json.NewDecoder(bytes.NewReader(buf)).Decode(&metric)
-		if err != nil {
-			return nil, errors.Wrap(err, "while parsing chunk metric")
-		}
-		dc.metrics[fingerprint] = metric
-	}
-	return metric, nil
-}
-
 // Decode the chunk from the given buffer, and confirm the chunk is the one we
 // expected.
 func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
@@ -282,33 +270,27 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	if err := binary.Read(r, binary.BigEndian, &metadataLen); err != nil {
 		return err
 	}
-	var tempMetadata struct {
-		Chunk
-		RawMetric json.RawMessage `json:"metric"` // Override to defer parsing
-	}
-	decodeContext.reader.Reset(&io.LimitedReader{
-		N: int64(metadataLen),
-		R: r,
-	})
+	var tempMetadata Chunk
+	decodeContext.reader.Reset(r)
+	json := jsoniter.ConfigFastest
 	err := json.NewDecoder(decodeContext.reader).Decode(&tempMetadata)
 	if err != nil {
 		return err
 	}
+	if len(input)-r.Len() != int(metadataLen) {
+		return ErrMetadataLength
+	}
 
 	// Next, confirm the chunks matches what we expected.  Easiest way to do this
 	// is to compare what the decoded data thinks its external ID would be, but
 	// we don't write the checksum to s3, so we have to copy the checksum in.
 	if c.ChecksumSet {
 		tempMetadata.Checksum, tempMetadata.ChecksumSet = c.Checksum, c.ChecksumSet
-		if !equalByKey(*c, tempMetadata.Chunk) {
+		if !equalByKey(*c, tempMetadata) {
 			return errors.WithStack(ErrWrongMetadata)
 		}
 	}
-	*c = tempMetadata.Chunk
-	c.Metric, err = decodeContext.metric(tempMetadata.Fingerprint, tempMetadata.RawMetric)
-	if err != nil {
-		return err
-	}
+	*c = tempMetadata
 
 	// Older chunks always used DoubleDelta and did not write Encoding
 	// to JSON, so override if it has the zero value (Delta)
@@ -328,10 +310,8 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	}
 
 	c.encoded = input
-	return c.Data.Unmarshal(&io.LimitedReader{
-		N: int64(dataLen),
-		R: r,
-	})
+	remainingData := input[len(input)-r.Len():]
+	return c.Data.UnmarshalFromBuf(remainingData[:int(dataLen)])
 }
 
 func chunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
diff --git a/json_helpers.go b/json_helpers.go
new file mode 100644
index 0000000000000..fb0c576e17534
--- /dev/null
+++ b/json_helpers.go
@@ -0,0 +1,60 @@
+package chunk
+
+import (
+	"unsafe"
+
+	jsoniter "github.com/json-iterator/go"
+	"github.com/prometheus/common/model"
+)
+
+func init() {
+	jsoniter.RegisterTypeDecoderFunc("model.Metric", decodeMetric)
+	jsoniter.RegisterTypeDecoderFunc("model.Time", decodeModelTime)
+	jsoniter.RegisterTypeEncoderFunc("model.Time", encodeModelTime, modelTimeIsEmpty)
+}
+
+// decoding model.Metric via ReadMapCB is faster than the generic jsoniter
+// decoder because the latter allocates memory for each string via reflect.
+func decodeMetric(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
+	mapPtr := (*model.Metric)(ptr)
+	*mapPtr = make(model.Metric, 10)
+	iter.ReadMapCB(func(iter *jsoniter.Iterator, key string) bool {
+		value := iter.ReadString()
+		(*mapPtr)[model.LabelName(key)] = model.LabelValue(value)
+		return true
+	})
+}
+
+// Decode via jsoniter's float64 routine is faster than getting the string data and decoding as two integers
+func decodeModelTime(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
+	pt := (*model.Time)(ptr)
+	f := iter.ReadFloat64()
+	*pt = model.Time(int64(f * 1000))
+}
+
+// Write out the timestamp as an int divided by 1000. This is ~3x faster than converting to a float.
+// Adapted from https://github.com/prometheus/prometheus/blob/cc39021b2bb6f829c7a626e4bdce2f338d1b76db/web/api/v1/api.go#L829
+func encodeModelTime(ptr unsafe.Pointer, stream *jsoniter.Stream) {
+	pt := (*model.Time)(ptr)
+	t := int64(*pt)
+	if t < 0 {
+		stream.WriteRaw(`-`)
+		t = -t
+	}
+	stream.WriteInt64(t / 1000)
+	fraction := t % 1000
+	if fraction != 0 {
+		stream.WriteRaw(`.`)
+		if fraction < 100 {
+			stream.WriteRaw(`0`)
+		}
+		if fraction < 10 {
+			stream.WriteRaw(`0`)
+		}
+		stream.WriteInt64(fraction)
+	}
+}
+
+func modelTimeIsEmpty(ptr unsafe.Pointer) bool {
+	return false
+}

From 8a03df7e1f976f659981b0f3beaae1679483e598 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 6 Apr 2018 15:52:26 +0100
Subject: [PATCH 088/660] Fix auto scaling - don't set inactive write scaling
 on active tables (#750)

* Refactor TableManager test

To allow multiple tests to use the same helper function

* Add auto-scaling values to TableManager test

* Add test with auto-scaling of inactive tables only

* Don't set inactive write scaling on active tables

* fix typo in log message
---
 schema_config.go      |   9 ++-
 table_manager.go      |   2 +-
 table_manager_test.go | 161 +++++++++++++++++++++++++++++++++---------
 3 files changed, 132 insertions(+), 40 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 1f03f31c4eda5..5e68fc738dc15 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -241,11 +241,6 @@ func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 			Tags:             cfg.GetTags(),
 		}
 
-		// Autoscale last N tables (excluding lastTable which is active).
-		if cfg.InactiveWriteScale.Enabled && i >= (lastTable-cfg.InactiveWriteScaleLastN) && i < lastTable {
-			table.WriteScale = cfg.InactiveWriteScale
-		}
-
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
 			table.ProvisionedRead = cfg.ProvisionedReadThroughput
@@ -254,7 +249,11 @@ func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 			if cfg.WriteScale.Enabled {
 				table.WriteScale = cfg.WriteScale
 			}
+		} else if cfg.InactiveWriteScale.Enabled && i >= (lastTable-cfg.InactiveWriteScaleLastN) {
+			// Autoscale last N tables
+			table.WriteScale = cfg.InactiveWriteScale
 		}
+
 		result = append(result, table)
 	}
 	return result
diff --git a/table_manager.go b/table_manager.go
index ab0fa2540703b..2f8b2f70ec7fe 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -292,7 +292,7 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 			continue
 		}
 
-		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "old_read", expected.ProvisionedWrite)
+		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		err = m.client.UpdateTable(ctx, current, expected)
 		if err != nil {
 			return err
diff --git a/table_manager_test.go b/table_manager_test.go
index 9947fae3ab314..7102848efc841 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -24,6 +24,10 @@ const (
 	inactiveRead     = 2
 	write            = 200
 	read             = 100
+	autoScaleLastN   = 2
+	autoScaleMin     = 50
+	autoScaleMax     = 500
+	autoScaleTarget  = 80
 )
 
 type mockTableClient struct {
@@ -71,6 +75,31 @@ func (m *mockTableClient) UpdateTable(_ context.Context, current, expected Table
 	return nil
 }
 
+func tmTest(t *testing.T, client *mockTableClient, tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
+	t.Run(name, func(t *testing.T) {
+		ctx := context.Background()
+		mtime.NowForce(tm)
+		if err := tableManager.SyncTables(ctx); err != nil {
+			t.Fatal(err)
+		}
+		err := ExpectTables(ctx, client, expected)
+		require.NoError(t, err)
+	})
+}
+
+var activeScalingConfig = AutoScalingConfig{
+	Enabled:     true,
+	MinCapacity: autoScaleMin * 2,
+	MaxCapacity: autoScaleMax * 2,
+	TargetValue: autoScaleTarget,
+}
+var inactiveScalingConfig = AutoScalingConfig{
+	Enabled:     true,
+	MinCapacity: autoScaleMin,
+	MaxCapacity: autoScaleMax,
+	TargetValue: autoScaleTarget,
+}
+
 func TestTableManager(t *testing.T) {
 	client := newMockTableClient()
 
@@ -84,6 +113,9 @@ func TestTableManager(t *testing.T) {
 			ProvisionedReadThroughput:  read,
 			InactiveWriteThroughput:    inactiveWrite,
 			InactiveReadThroughput:     inactiveRead,
+			WriteScale:                 activeScalingConfig,
+			InactiveWriteScale:         inactiveScalingConfig,
+			InactiveWriteScaleLastN:    autoScaleLastN,
 		},
 
 		ChunkTables: PeriodicTableConfig{
@@ -103,108 +135,169 @@ func TestTableManager(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	test := func(name string, tm time.Time, expected []TableDesc) {
-		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			mtime.NowForce(tm)
-			if err := tableManager.SyncTables(ctx); err != nil {
-				t.Fatal(err)
-			}
-			err := ExpectTables(ctx, client, expected)
-			require.NoError(t, err)
-		})
-	}
-
 	// Check at time zero, we have the base table and one weekly table
-	test(
+	tmTest(t, client, tableManager,
 		"Initial test",
 		time.Unix(0, 0),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Check running twice doesn't change anything
-	test(
+	tmTest(t, client, tableManager,
 		"Nothing changed",
 		time.Unix(0, 0),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward grace period, check we still have write throughput on base table
-	test(
+	tmTest(t, client, tableManager,
 		"Move forward by grace period",
 		time.Unix(0, 0).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward max chunk age + grace period, check write throughput on base table has gone
-	test(
+	// (and we don't put inactive auto-scaling on base table)
+	tmTest(t, client, tableManager,
 		"Move forward by max chunk age + grace period",
 		time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
 			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period - grace period, check we add another weekly table
-	test(
+	tmTest(t, client, tableManager,
 		"Move forward by table period - grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(-gracePeriod),
 		[]TableDesc{
 			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + grace period, check we still have provisioned throughput
-	test(
+	tmTest(t, client, tableManager,
 		"Move forward by table period + grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
 		[]TableDesc{
 			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
 			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
-	test(
+	tmTest(t, client, tableManager,
 		"Move forward by table period + max chunk age + grace period",
 		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
 			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
 			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Check running twice doesn't change anything
-	test(
+	tmTest(t, client, tableManager,
 		"Nothing changed",
 		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
 			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+}
+
+func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
+	client := newMockTableClient()
+
+	cfg := SchemaConfig{
+		UsePeriodicTables: true,
+		IndexTables: PeriodicTableConfig{
+			Prefix: tablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale:         inactiveScalingConfig,
+			InactiveWriteScaleLastN:    autoScaleLastN,
+		},
+
+		ChunkTables: PeriodicTableConfig{
+			Prefix: chunkTablePrefix,
+			Period: tablePeriod,
+			From:   util.NewDayValue(model.TimeFromUnix(0)),
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+		},
+
+		CreationGracePeriod: gracePeriod,
+	}
+	tableManager, err := NewTableManager(cfg, maxChunkAge, client)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check at time zero, we have the base table and one weekly table
+	tmTest(t, client, tableManager,
+		"Initial test",
+		time.Unix(0, 0),
+		[]TableDesc{
+			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + grace period, check we still have provisioned throughput
+	tmTest(t, client, tableManager,
+		"Move forward by table period + grace period",
+		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
+	tmTest(t, client, tableManager,
+		"Move forward by table period + max chunk age + grace period",
+		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		[]TableDesc{
+			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
 			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
 			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
 			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},

From bc8b86c950a3e01591f3eb6ae744354ab150c80a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 16 Apr 2018 09:43:24 +0100
Subject: [PATCH 089/660] Return chunks fetched by GetChunks on error (#791)

* Return chunks fetched by GetChunks on error

If DynamoDB is overloaded, GetChunks() may time out, but we should
return the chunks fetch so they can be put into the cache.  Then if
the client retries we will get a bit further next time.

* Make backoff parameters configurable

* Be able to defer mock error for a number of operations

So we can run a test where some operations succeed and then subsequent
ones fail.

* Refactor: extract function to create chunks for testing

* Add test for DynamoDB operation with partial result

Some kludging required to (a) make sure it runs only on DynamoDB
and (b) access the error parameters which are on a non-exported type.

Also lots of magic numbers.  It does the job.
---
 aws/dynamodb_table_client.go   |  4 +-
 aws/fixtures.go                | 10 ++++-
 aws/mock.go                    | 16 +++++++-
 aws/storage_client.go          | 22 ++++-------
 storage/storage_client_test.go | 68 +++++++++++++++++++++++++++-------
 5 files changed, 89 insertions(+), 31 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index b134965edb8a6..be443040ccf79 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -41,6 +41,7 @@ type dynamoTableClient struct {
 	DynamoDB               dynamodbiface.DynamoDBAPI
 	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
 	limiter                *rate.Limiter
+	backoffConfig          util.BackoffConfig
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
@@ -63,6 +64,7 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 		DynamoDB:               dynamoDB,
 		ApplicationAutoScaling: applicationAutoScaling,
 		limiter:                rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
+		backoffConfig:          cfg.backoffConfig,
 	}, nil
 }
 
@@ -71,7 +73,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 		d.limiter.Wait(ctx)
 	}
 
-	backoff := util.NewBackoff(ctx, backoffConfig)
+	backoff := util.NewBackoff(ctx, d.backoffConfig)
 	for backoff.Ongoing() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 7665b6d86aa41..d38be932822e9 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -70,7 +70,15 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) chunk.Fixture
 			}
 			storage := &storageClient{
 				cfg: StorageConfig{
-					DynamoDBConfig: DynamoDBConfig{ChunkGangSize: gangsize, ChunkGetMaxParallelism: maxParallelism},
+					DynamoDBConfig: DynamoDBConfig{
+						ChunkGangSize:          gangsize,
+						ChunkGetMaxParallelism: maxParallelism,
+						backoffConfig: util.BackoffConfig{
+							MinBackoff: 1 * time.Millisecond,
+							MaxBackoff: 5 * time.Millisecond,
+							MaxRetries: 20,
+						},
+					},
 				},
 				DynamoDB:                dynamoDB,
 				S3:                      newMockS3(),
diff --git a/aws/mock.go b/aws/mock.go
index 19b820a01eaea..974963143dcfb 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -28,6 +28,7 @@ type mockDynamoDBClient struct {
 	mtx            sync.RWMutex
 	unprocessed    int
 	provisionedErr int
+	errAfter       int
 	tables         map[string]*mockDynamoDBTable
 }
 
@@ -47,6 +48,13 @@ func newMockDynamoDB(unprocessed int, provisionedErr int) *mockDynamoDBClient {
 	}
 }
 
+func (a storageClient) SetErrorParameters(provisionedErr, errAfter int) {
+	if m, ok := a.DynamoDB.(*mockDynamoDBClient); ok {
+		m.provisionedErr = provisionedErr
+		m.errAfter = errAfter
+	}
+}
+
 func (m *mockDynamoDBClient) createTable(name string) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
@@ -63,7 +71,9 @@ func (m *mockDynamoDBClient) batchWriteItemRequest(_ context.Context, input *dyn
 		UnprocessedItems: map[string][]*dynamodb.WriteRequest{},
 	}
 
-	if m.provisionedErr > 0 {
+	if m.errAfter > 0 {
+		m.errAfter--
+	} else if m.provisionedErr > 0 {
 		m.provisionedErr--
 		return &dynamoDBMockRequest{
 			result: resp,
@@ -122,7 +132,9 @@ func (m *mockDynamoDBClient) batchGetItemRequest(_ context.Context, input *dynam
 		UnprocessedKeys: map[string]*dynamodb.KeysAndAttributes{},
 	}
 
-	if m.provisionedErr > 0 {
+	if m.errAfter > 0 {
+		m.errAfter--
+	} else if m.provisionedErr > 0 {
 		m.provisionedErr--
 		return &dynamoDBMockRequest{
 			result: resp,
diff --git a/aws/storage_client.go b/aws/storage_client.go
index b633104e5f413..d9542814599d6 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -46,14 +46,6 @@ const (
 	dynamoDBMaxReadBatchSize  = 100
 )
 
-var backoffConfig = util.BackoffConfig{
-	// Backoff for dynamoDB requests, to match AWS lib - see:
-	// https://github.com/aws/aws-sdk-go/blob/master/service/dynamodb/customizations.go
-	MinBackoff: 100 * time.Millisecond,
-	MaxBackoff: 50 * time.Second,
-	MaxRetries: 20,
-}
-
 var (
 	dynamoRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
@@ -112,6 +104,7 @@ type DynamoDBConfig struct {
 	ApplicationAutoScaling util.URLValue
 	ChunkGangSize          int
 	ChunkGetMaxParallelism int
+	backoffConfig          util.BackoffConfig
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -122,6 +115,9 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
 	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
 	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
+	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
+	f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
+	f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
 }
 
 // StorageConfig specifies config for storing data on AWS.
@@ -213,7 +209,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
-	backoff := util.NewBackoff(ctx, backoffConfig)
+	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.NumRetries()))
 	}()
@@ -346,7 +342,7 @@ func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, c
 }
 
 func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
-	backoff := util.NewBackoff(ctx, backoffConfig)
+	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
 	}()
@@ -501,14 +497,12 @@ func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]c
 		in := <-results
 		if in.err != nil {
 			err = in.err // TODO: cancel other sub-queries at this point
-		} else {
-			finalChunks = append(finalChunks, in.chunks...)
 		}
+		finalChunks = append(finalChunks, in.chunks...)
 	}
 	sp.LogFields(otlog.Int("chunks fetched", len(finalChunks)))
 	if err != nil {
 		sp.LogFields(otlog.String("error", err.Error()))
-		return nil, err
 	}
 
 	// Return any chunks we did receive: a partial result may be useful
@@ -592,7 +586,7 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 
 	result := []chunk.Chunk{}
 	unprocessed := dynamoDBReadRequest{}
-	backoff := util.NewBackoff(ctx, backoffConfig)
+	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.NumRetries()))
 	}()
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index 34457ff992f51..8307090097bf9 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -5,6 +5,7 @@ import (
 	"math/rand"
 	"sort"
 	"strconv"
+	"strings"
 	"testing"
 	"time"
 
@@ -23,19 +24,10 @@ func TestChunksBasic(t *testing.T) {
 		// Write a few batches of chunks.
 		written := []string{}
 		for i := 0; i < 50; i++ {
-			chunks := []chunk.Chunk{}
-			for j := 0; j < batchSize; j++ {
-				chunk := dummyChunkFor(model.Now(), model.Metric{
-					model.MetricNameLabel: "foo",
-					"index":               model.LabelValue(strconv.Itoa(i*batchSize + j)),
-				})
-				chunks = append(chunks, chunk)
-				_, err := chunk.Encode() // Need to encode it, side effect calculates crc
-				require.NoError(t, err)
-				written = append(written, chunk.ExternalKey())
-			}
-
-			err := client.PutChunks(ctx, chunks)
+			keys, chunks, err := createChunks(i, batchSize)
+			require.NoError(t, err)
+			written = append(written, keys...)
+			err = client.PutChunks(ctx, chunks)
 			require.NoError(t, err)
 		}
 
@@ -66,3 +58,53 @@ func TestChunksBasic(t *testing.T) {
 		}
 	})
 }
+
+func createChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
+	keys := []string{}
+	chunks := []chunk.Chunk{}
+	for j := 0; j < batchSize; j++ {
+		chunk := dummyChunkFor(model.Now(), model.Metric{
+			model.MetricNameLabel: "foo",
+			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
+		})
+		chunks = append(chunks, chunk)
+		_, err := chunk.Encode() // Need to encode it, side effect calculates crc
+		if err != nil {
+			return nil, nil, err
+		}
+		keys = append(keys, chunk.ExternalKey())
+	}
+	return keys, chunks, nil
+}
+
+type clientWithErrorParameters interface {
+	SetErrorParameters(provisionedErr, errAfter int)
+}
+
+func TestChunksPartialError(t *testing.T) {
+	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+		// This test is currently very specialised for DynamoDB
+		if !strings.Contains(t.Name(), "DynamoDB") {
+			return
+		}
+		// We use some carefully-chosen numbers:
+		// Start with 150 chunks; DynamoDB writes batches in 25s so 6 batches.
+		// We tell the client to error after 7 operations so all writes succeed
+		// and then the 2nd read fails, so we read back only 100 chunks
+		if ep, ok := client.(clientWithErrorParameters); ok {
+			ep.SetErrorParameters(22, 7)
+		} else {
+			t.Error("DynamoDB test fixture does not support SetErrorParameters() call")
+			return
+		}
+		ctx := context.Background()
+		_, chunks, err := createChunks(0, 150)
+		require.NoError(t, err)
+		err = client.PutChunks(ctx, chunks)
+		require.NoError(t, err)
+
+		chunksWeGot, err := client.GetChunks(ctx, chunks)
+		require.Error(t, err)
+		require.Equal(t, 100, len(chunksWeGot))
+	})
+}

From 0d1282c88987726f300dbe094cb570225639afcf Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 16 Apr 2018 11:25:13 +0100
Subject: [PATCH 090/660] Remove backoff.WaitWithoutCounting() (#794)

The code has a bad smell - a comment saying we don't know why it's
there. Empirical evidence from tracing shows that we end up waiting so
long that a higher-level timeout will kick in and kill the whole
operation, even though DynamoDB is reporting some progress.
---
 aws/storage_client.go | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index d9542814599d6..e8d0cbf5512ed 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -252,14 +252,10 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 			return err
 		}
 
-		// If there are unprocessed items, backoff and retry those items.
+		// If there are unprocessed items, retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
 			logRetry(dynamoDBWriteBatch(unprocessedItems))
 			unprocessed.TakeReqs(unprocessedItems, -1)
-			// I am unclear why we don't count here; perhaps the idea is
-			// that while we are making _some_ progress we should carry on.
-			backoff.WaitWithoutCounting()
-			continue
 		}
 
 		backoff.Reset()
@@ -634,13 +630,9 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 		}
 		result = append(result, processedChunks...)
 
-		// If there are unprocessed items, backoff and retry those items.
+		// If there are unprocessed items, retry those items.
 		if unprocessedKeys := response.UnprocessedKeys; unprocessedKeys != nil && dynamoDBReadRequest(unprocessedKeys).Len() > 0 {
 			unprocessed.TakeReqs(unprocessedKeys, -1)
-			// I am unclear why we don't count here; perhaps the idea is
-			// that while we are making _some_ progress we should carry on.
-			backoff.WaitWithoutCounting()
-			continue
 		}
 
 		backoff.Reset()

From 6904ed70e90698bc6241f5e1f5629b75dc828934 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 20 Apr 2018 13:10:29 +0000
Subject: [PATCH 091/660] Check pointers are non-nil before following

---
 aws/dynamodb_table_client.go | 67 +++++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 12 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index be443040ccf79..14fa346b7704d 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -139,7 +139,9 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 			if err != nil {
 				return err
 			}
-			tableARN = output.TableDescription.TableArn
+			if output.TableDescription != nil {
+				tableARN = output.TableDescription.TableArn
+			}
 			return nil
 		})
 	}); err != nil {
@@ -175,11 +177,24 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 			out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 				TableName: aws.String(name),
 			})
+			if err != nil {
+				return err
+			}
 			desc.Name = name
-			desc.ProvisionedRead = *out.Table.ProvisionedThroughput.ReadCapacityUnits
-			desc.ProvisionedWrite = *out.Table.ProvisionedThroughput.WriteCapacityUnits
-			status = *out.Table.TableStatus
-			tableARN = out.Table.TableArn
+			if out.Table != nil {
+				if provision := out.Table.ProvisionedThroughput; provision != nil {
+					if provision.ReadCapacityUnits != nil {
+						desc.ProvisionedRead = *provision.ReadCapacityUnits
+					}
+					if provision.WriteCapacityUnits != nil {
+						desc.ProvisionedWrite = *provision.WriteCapacityUnits
+					}
+				}
+				if out.Table.TableStatus != nil {
+					status = *out.Table.TableStatus
+				}
+				tableARN = out.Table.TableArn
+			}
 			return err
 		})
 	})
@@ -192,6 +207,9 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
 				ResourceArn: tableARN,
 			})
+			if err != nil {
+				return err
+			}
 			desc.Tags = make(map[string]string, len(out.Tags))
 			for _, tag := range out.Tags {
 				desc.Tags[*tag.Key] = *tag.Value
@@ -208,14 +226,25 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
 					ServiceNamespace:  aws.String("dynamodb"),
 				})
+				if err != nil {
+					return err
+				}
 				switch l := len(out.ScalableTargets); l {
 				case 0:
 					return err
 				case 1:
 					desc.WriteScale.Enabled = true
-					desc.WriteScale.RoleARN = *out.ScalableTargets[0].RoleARN
-					desc.WriteScale.MinCapacity = *out.ScalableTargets[0].MinCapacity
-					desc.WriteScale.MaxCapacity = *out.ScalableTargets[0].MaxCapacity
+					if target := out.ScalableTargets[0]; target != nil {
+						if target.RoleARN != nil {
+							desc.WriteScale.RoleARN = *target.RoleARN
+						}
+						if target.MinCapacity != nil {
+							desc.WriteScale.MinCapacity = *target.MinCapacity
+						}
+						if target.MaxCapacity != nil {
+							desc.WriteScale.MaxCapacity = *target.MaxCapacity
+						}
+					}
 					return err
 				default:
 					return fmt.Errorf("more than one scalable target found for DynamoDB table")
@@ -231,13 +260,25 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
 					ServiceNamespace:  aws.String("dynamodb"),
 				})
+				if err != nil {
+					return err
+				}
 				switch l := len(out.ScalingPolicies); l {
 				case 0:
 					return err
 				case 1:
-					desc.WriteScale.InCooldown = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.ScaleInCooldown
-					desc.WriteScale.OutCooldown = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown
-					desc.WriteScale.TargetValue = *out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration.TargetValue
+					config := out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration
+					if config != nil {
+						if config.ScaleInCooldown != nil {
+							desc.WriteScale.InCooldown = *config.ScaleInCooldown
+						}
+						if config.ScaleOutCooldown != nil {
+							desc.WriteScale.OutCooldown = *config.ScaleOutCooldown
+						}
+						if config.TargetValue != nil {
+							desc.WriteScale.TargetValue = *config.TargetValue
+						}
+					}
 					return err
 				default:
 					return fmt.Errorf("more than one scaling policy found for DynamoDB table")
@@ -292,7 +333,9 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 				if err != nil {
 					return err
 				}
-				tableARN = out.Table.TableArn
+				if out.Table != nil {
+					tableARN = out.Table.TableArn
+				}
 				return nil
 			})
 		}); err != nil {

From 438629255d961f1e611b8266594136893f94a175 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 25 Apr 2018 10:08:02 +0100
Subject: [PATCH 092/660] Use eventual consistency reads for chunks (#800)

We're using them for the index read already.
---
 aws/storage_client.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index e8d0cbf5512ed..ac8b9c2689b32 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -825,7 +825,6 @@ func (b dynamoDBReadRequest) Add(tableName, hashValue string, rangeValue []byte)
 				aws.String(hashKey),
 				aws.String(valueKey),
 			},
-			ConsistentRead: aws.Bool(true),
 		}
 		b[tableName] = requests
 	}
@@ -852,7 +851,6 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 							aws.String(hashKey),
 							aws.String(valueKey),
 						},
-						ConsistentRead: aws.Bool(true),
 					}
 				}
 

From 7344c95f14f93166a33d4979400bc5ce5f44f2b5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 1 May 2018 06:55:57 +0100
Subject: [PATCH 093/660] Allow client time to be slightly in the future (#816)

without moaning in the logs and changing their query.
---
 chunk_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store.go b/chunk_store.go
index a1e41138b3573..c7482ec1d3722 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -188,7 +188,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 		return nil, nil
 	}
 
-	if through.After(now) {
+	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
 		through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes

From 7fcd048fedf0d30c0323fd77eede1944127838d2 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 1 May 2018 06:56:37 +0100
Subject: [PATCH 094/660] Add UserID to event for AWS retry (#812)

---
 aws/storage_client.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index ac8b9c2689b32..034418ebbbfe4 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -27,6 +27,7 @@ import (
 
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/util"
 )
@@ -184,7 +185,8 @@ func (a storageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
-func logRetry(unprocessed dynamoDBWriteBatch) {
+func logRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
+	userID, _ := user.ExtractOrgID(ctx)
 	for table, reqs := range unprocessed {
 		for _, req := range reqs {
 			item := req.PutRequest.Item
@@ -197,7 +199,7 @@ func logRetry(unprocessed dynamoDBWriteBatch) {
 			if rangeAttr, ok := item[rangeKey]; ok {
 				rnge = string(rangeAttr.B)
 			}
-			util.Event().Log("msg", "store retry", "table", table, "hashKey", hash, "rangeKey", rnge)
+			util.Event().Log("msg", "store retry", "table", table, "userID", userID, "hashKey", hash, "rangeKey", rnge)
 		}
 	}
 }
@@ -242,7 +244,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
-				logRetry(requests)
+				logRetry(ctx, requests)
 				unprocessed.TakeReqs(requests, -1)
 				backoff.Wait()
 				continue
@@ -254,7 +256,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 
 		// If there are unprocessed items, retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
-			logRetry(dynamoDBWriteBatch(unprocessedItems))
+			logRetry(ctx, dynamoDBWriteBatch(unprocessedItems))
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}
 

From c2c2a8b3a0a57b2cb9b022f08904ed7cfce86a9c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 11 May 2018 10:37:52 +0100
Subject: [PATCH 095/660] Move DynamoDB-specific test to aws directory (#797)

* Refactor: extract chunk test setup to new package chunk/testutils

* Move DynamoDB-specific test to aws directory

* Reset mock time after setting it

* Reduce magic numbers in test
---
 aws/dynamodb_table_client_test.go |  2 +
 aws/fixtures.go                   |  5 +-
 aws/storage_client_test.go        | 40 ++++++++++++++
 cassandra/fixtures.go             |  5 +-
 gcp/fixtures.go                   |  3 +-
 storage/storage_client_test.go    | 55 +------------------
 storage/utils_test.go             | 44 +--------------
 storage_client.go                 |  7 ---
 table_manager_test.go             |  2 +
 testutils/testutils.go            | 91 +++++++++++++++++++++++++++++++
 10 files changed, 147 insertions(+), 107 deletions(-)
 create mode 100644 aws/storage_client_test.go
 create mode 100644 testutils/testutils.go

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 66dcea5101e14..1ac518aa906fb 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -41,6 +41,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
+			defer mtime.NowReset()
 			if err := tableManager.SyncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
@@ -322,6 +323,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
+			defer mtime.NowReset()
 			if err := tableManager.SyncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
diff --git a/aws/fixtures.go b/aws/fixtures.go
index d38be932822e9..7bb34a55a091f 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -27,7 +28,7 @@ func (f fixture) Teardown() error {
 }
 
 // Fixtures for testing the various configuration of AWS storage.
-var Fixtures = []chunk.Fixture{
+var Fixtures = []testutils.Fixture{
 	fixture{
 		name: "S3 chunks",
 		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
@@ -52,7 +53,7 @@ var Fixtures = []chunk.Fixture{
 	dynamoDBFixture(2, 10, 20),
 }
 
-func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) chunk.Fixture {
+func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fixture {
 	return fixture{
 		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
 			provisionedErr, gangsize, maxParallelism),
diff --git a/aws/storage_client_test.go b/aws/storage_client_test.go
new file mode 100644
index 0000000000000..5ef280ee9436b
--- /dev/null
+++ b/aws/storage_client_test.go
@@ -0,0 +1,40 @@
+package aws
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
+)
+
+const (
+	tableName = "table"
+)
+
+func TestChunksPartialError(t *testing.T) {
+	fixture := dynamoDBFixture(0, 10, 20)
+	defer fixture.Teardown()
+	client, err := testutils.Setup(fixture, tableName)
+	require.NoError(t, err)
+
+	sc, ok := client.(*storageClient)
+	if !ok {
+		t.Error("DynamoDB test client has unexpected type")
+		return
+	}
+	ctx := context.Background()
+	// Create more chunks than we can read in one batch
+	_, chunks, err := testutils.CreateChunks(0, dynamoDBMaxReadBatchSize+50)
+	require.NoError(t, err)
+	err = client.PutChunks(ctx, chunks)
+	require.NoError(t, err)
+
+	// Make the read fail after 1 success, and keep failing until all retries are exhausted
+	sc.SetErrorParameters(999, 1)
+	// Try to read back all the chunks we created, so we should get an error plus the first batch
+	chunksWeGot, err := client.GetChunks(ctx, chunks)
+	require.Error(t, err)
+	require.Equal(t, dynamoDBMaxReadBatchSize, len(chunksWeGot))
+}
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index a70132b607224..fbde830741c47 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -7,6 +7,7 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -35,7 +36,7 @@ func (f fixture) Teardown() error {
 }
 
 // Fixtures for unit testing Cassandra integration.
-func Fixtures() ([]chunk.Fixture, error) {
+func Fixtures() ([]testutils.Fixture, error) {
 	addresses := os.Getenv("CASSANDRA_TEST_ADDRESSES")
 	if addresses == "" {
 		return nil, nil
@@ -69,7 +70,7 @@ func Fixtures() ([]chunk.Fixture, error) {
 		return nil, err
 	}
 
-	return []chunk.Fixture{
+	return []testutils.Fixture{
 		fixture{
 			name:          "Cassandra",
 			storageClient: storageClient,
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 00f8ead45e812..80e3fe8deb57c 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -11,6 +11,7 @@ import (
 	"google.golang.org/grpc"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -75,7 +76,7 @@ func (f *fixture) Teardown() error {
 }
 
 // Fixtures for unit testing GCP storage.
-var Fixtures = []chunk.Fixture{
+var Fixtures = []testutils.Fixture{
 	&fixture{
 		name: "GCP",
 	},
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index 8307090097bf9..e3c5947daf2a9 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -5,14 +5,13 @@ import (
 	"math/rand"
 	"sort"
 	"strconv"
-	"strings"
 	"testing"
 	"time"
 
-	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 )
 
 func TestChunksBasic(t *testing.T) {
@@ -24,7 +23,7 @@ func TestChunksBasic(t *testing.T) {
 		// Write a few batches of chunks.
 		written := []string{}
 		for i := 0; i < 50; i++ {
-			keys, chunks, err := createChunks(i, batchSize)
+			keys, chunks, err := testutils.CreateChunks(i, batchSize)
 			require.NoError(t, err)
 			written = append(written, keys...)
 			err = client.PutChunks(ctx, chunks)
@@ -58,53 +57,3 @@ func TestChunksBasic(t *testing.T) {
 		}
 	})
 }
-
-func createChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
-	keys := []string{}
-	chunks := []chunk.Chunk{}
-	for j := 0; j < batchSize; j++ {
-		chunk := dummyChunkFor(model.Now(), model.Metric{
-			model.MetricNameLabel: "foo",
-			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
-		})
-		chunks = append(chunks, chunk)
-		_, err := chunk.Encode() // Need to encode it, side effect calculates crc
-		if err != nil {
-			return nil, nil, err
-		}
-		keys = append(keys, chunk.ExternalKey())
-	}
-	return keys, chunks, nil
-}
-
-type clientWithErrorParameters interface {
-	SetErrorParameters(provisionedErr, errAfter int)
-}
-
-func TestChunksPartialError(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
-		// This test is currently very specialised for DynamoDB
-		if !strings.Contains(t.Name(), "DynamoDB") {
-			return
-		}
-		// We use some carefully-chosen numbers:
-		// Start with 150 chunks; DynamoDB writes batches in 25s so 6 batches.
-		// We tell the client to error after 7 operations so all writes succeed
-		// and then the 2nd read fails, so we read back only 100 chunks
-		if ep, ok := client.(clientWithErrorParameters); ok {
-			ep.SetErrorParameters(22, 7)
-		} else {
-			t.Error("DynamoDB test fixture does not support SetErrorParameters() call")
-			return
-		}
-		ctx := context.Background()
-		_, chunks, err := createChunks(0, 150)
-		require.NoError(t, err)
-		err = client.PutChunks(ctx, chunks)
-		require.NoError(t, err)
-
-		chunksWeGot, err := client.GetChunks(ctx, chunks)
-		require.Error(t, err)
-		require.Equal(t, 100, len(chunksWeGot))
-	})
-}
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 989a6222fc5e6..0dda70ab42bce 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -1,17 +1,14 @@
 package storage
 
 import (
-	"context"
 	"testing"
-	"time"
 
-	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/aws"
 	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
-	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 )
 
 const (
@@ -30,48 +27,11 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 
 	for _, fixture := range fixtures {
 		t.Run(fixture.Name(), func(t *testing.T) {
-			storageClient, tableClient, schemaConfig, err := fixture.Clients()
+			storageClient, err := testutils.Setup(fixture, tableName)
 			require.NoError(t, err)
 			defer fixture.Teardown()
 
-			tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
-			require.NoError(t, err)
-
-			err = tableManager.SyncTables(context.Background())
-			require.NoError(t, err)
-
-			err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
-				Name: tableName,
-			})
-			require.NoError(t, err)
-
 			storageClientTest(t, storageClient)
 		})
 	}
 }
-
-func dummyChunk(now model.Time) chunk.Chunk {
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-	})
-}
-
-func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
-	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
-	chunk := chunk.NewChunk(
-		userID,
-		metric.Fingerprint(),
-		metric,
-		cs[0],
-		now.Add(-time.Hour),
-		now,
-	)
-	// Force checksum calculation.
-	_, err := chunk.Encode()
-	if err != nil {
-		panic(err)
-	}
-	return chunk
-}
diff --git a/storage_client.go b/storage_client.go
index 662fe62afe1ae..c86f573b77bca 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -27,10 +27,3 @@ type ReadBatch interface {
 	RangeValue(index int) []byte
 	Value(index int) []byte
 }
-
-// Fixture type for per-backend testing.
-type Fixture interface {
-	Name() string
-	Clients() (StorageClient, TableClient, SchemaConfig, error)
-	Teardown() error
-}
diff --git a/table_manager_test.go b/table_manager_test.go
index 7102848efc841..1014aa22706f1 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -79,6 +79,7 @@ func tmTest(t *testing.T, client *mockTableClient, tableManager *TableManager, n
 	t.Run(name, func(t *testing.T) {
 		ctx := context.Background()
 		mtime.NowForce(tm)
+		defer mtime.NowReset()
 		if err := tableManager.SyncTables(ctx); err != nil {
 			t.Fatal(err)
 		}
@@ -312,6 +313,7 @@ func TestTableManagerTags(t *testing.T) {
 		t.Run(name, func(t *testing.T) {
 			ctx := context.Background()
 			mtime.NowForce(tm)
+			defer mtime.NowReset()
 			if err := tableManager.SyncTables(ctx); err != nil {
 				t.Fatal(err)
 			}
diff --git a/testutils/testutils.go b/testutils/testutils.go
new file mode 100644
index 0000000000000..dc9ecb8f05cff
--- /dev/null
+++ b/testutils/testutils.go
@@ -0,0 +1,91 @@
+package testutils
+
+import (
+	"context"
+	"strconv"
+	"time"
+
+	"github.com/prometheus/common/model"
+	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+const (
+	userID = "userID"
+)
+
+// Fixture type for per-backend testing.
+type Fixture interface {
+	Name() string
+	Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error)
+	Teardown() error
+}
+
+// Setup a fixture with initial tables
+func Setup(fixture Fixture, tableName string) (chunk.StorageClient, error) {
+	storageClient, tableClient, schemaConfig, err := fixture.Clients()
+	if err != nil {
+		return nil, err
+	}
+
+	tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
+	if err != nil {
+		return nil, err
+	}
+
+	err = tableManager.SyncTables(context.Background())
+	if err != nil {
+		return nil, err
+	}
+
+	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
+		Name: tableName,
+	})
+	return storageClient, err
+}
+
+// CreateChunks creates some chunks for testing
+func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
+	keys := []string{}
+	chunks := []chunk.Chunk{}
+	for j := 0; j < batchSize; j++ {
+		chunk := dummyChunkFor(model.Now(), model.Metric{
+			model.MetricNameLabel: "foo",
+			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
+		})
+		chunks = append(chunks, chunk)
+		_, err := chunk.Encode() // Need to encode it, side effect calculates crc
+		if err != nil {
+			return nil, nil, err
+		}
+		keys = append(keys, chunk.ExternalKey())
+	}
+	return keys, chunks, nil
+}
+
+func dummyChunk(now model.Time) chunk.Chunk {
+	return dummyChunkFor(now, model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":  "baz",
+		"toms": "code",
+	})
+}
+
+func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
+	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	chunk := chunk.NewChunk(
+		userID,
+		metric.Fingerprint(),
+		metric,
+		cs[0],
+		now.Add(-time.Hour),
+		now,
+	)
+	// Force checksum calculation.
+	_, err := chunk.Encode()
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}

From 2d3045f60a684b94006d93303d9f36c8088a4038 Mon Sep 17 00:00:00 2001
From: Anthony Woods <awoods@raintank.io>
Date: Thu, 31 May 2018 17:55:48 +0800
Subject: [PATCH 096/660] add new config options for connecting to cassandra
 (#832)

* add config setting for disableInitialHostLookup on cassandra

Some managed Cassandra cluster services, like Azure Cosmos DB, dont expose
the internal cluster topology. So we need to disable initial host lookup
and just send requests to the configured cluster address.

* Support SSL and authentication when connecting to cassandra

* use a helper function for setting gocql ClusterConfig
---
 cassandra/storage_client.go | 45 +++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index e24c5d09d8717..ea511cdebac0f 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -19,10 +19,17 @@ const (
 
 // Config for a StorageClient
 type Config struct {
-	addresses         string
-	keyspace          string
-	consistency       string
-	replicationFactor int
+	addresses                string
+	keyspace                 string
+	consistency              string
+	replicationFactor        int
+	disableInitialHostLookup bool
+	ssl                      bool
+	hostVerification         bool
+	caPath                   string
+	auth                     bool
+	username                 string
+	password                 string
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -31,6 +38,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
 	f.StringVar(&cfg.consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
 	f.IntVar(&cfg.replicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
+	f.BoolVar(&cfg.disableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
+	f.BoolVar(&cfg.ssl, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
+	f.BoolVar(&cfg.hostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
+	f.StringVar(&cfg.caPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
+	f.BoolVar(&cfg.auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
+	f.StringVar(&cfg.username, "cassandra.username", "", "Username to use when connecting to cassandra.")
+	f.StringVar(&cfg.password, "cassandra.password", "", "Password to use when connecting to cassandra.")
 }
 
 func (cfg *Config) session() (*gocql.Session, error) {
@@ -49,14 +63,37 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.BatchObserver = observer{}
 	cluster.QueryObserver = observer{}
 
+	cfg.setClusterConfig(cluster)
+
 	return cluster.CreateSession()
 }
 
+// apply config settings to a cassandra ClusterConfig
+func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
+	cluster.DisableInitialHostLookup = cfg.disableInitialHostLookup
+
+	if cfg.ssl {
+		cluster.SslOpts = &gocql.SslOptions{
+			CaPath:                 cfg.caPath,
+			EnableHostVerification: cfg.hostVerification,
+		}
+	}
+	if cfg.auth {
+		cluster.Authenticator = gocql.PasswordAuthenticator{
+			Username: cfg.username,
+			Password: cfg.password,
+		}
+	}
+}
+
 // createKeyspace will create the desired keyspace if it doesn't exist.
 func (cfg *Config) createKeyspace() error {
 	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
 	cluster.Keyspace = "system"
 	cluster.Timeout = 20 * time.Second
+
+	cfg.setClusterConfig(cluster)
+
 	session, err := cluster.CreateSession()
 	if err != nil {
 		return errors.WithStack(err)

From f77c75b2b81d6a13b803cc6b718f901aebe6fa0a Mon Sep 17 00:00:00 2001
From: Chris Marchbanks <csmarchbanks@gmail.com>
Date: Fri, 1 Jun 2018 10:12:50 -0600
Subject: [PATCH 097/660] Deprecate v7 and v8 schema versions (#834)

* Deprecate v7 and v8 schema versions

* Add a note for why v7 and v8 are deprecated
---
 schema.go        | 4 ++--
 schema_config.go | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/schema.go b/schema.go
index 3e14978c1c2f0..464dc358447ad 100644
--- a/schema.go
+++ b/schema.go
@@ -126,7 +126,7 @@ func v6Schema(cfg SchemaConfig) Schema {
 	}
 }
 
-// v7 schema is an extension of v6, with support for queries with no metric names
+// DEPRECATED: v7 schema is an extension of v6, with support for queries with no metric names, but is broken
 func v7Schema(cfg SchemaConfig) Schema {
 	return schema{
 		cfg.dailyBuckets,
@@ -134,7 +134,7 @@ func v7Schema(cfg SchemaConfig) Schema {
 	}
 }
 
-// v8 schema is an extension of v6, with support for a labelset/series index
+// DEPRECATED: v8 schema is an extension of v6, with support for a labelset/series index, but is too slow in practice
 func v8Schema(cfg SchemaConfig) Schema {
 	return schema{
 		cfg.dailyBuckets,
diff --git a/schema_config.go b/schema_config.go
index 5e68fc738dc15..d9e65862bef98 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -58,8 +58,8 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
 	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
-	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema.")
-	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema.")
+	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema (Deprecated).")
+	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema (Deprecated).")
 
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")

From 16c63070da44ea3913e7277970b90b6d5783e5b8 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@users.noreply.github.com>
Date: Mon, 18 Jun 2018 10:24:11 -0700
Subject: [PATCH 098/660] Adding more protections against invalid messages
 (#773)

* If the write request to dynamo fails to validation issues in the data, drop the data
* adding debug output of dropped dynamo requests
* reducing max label value length to 2048
* adding CLI flags for setting max length values of label names and label values
* If the write request to dynamo fails to validation issues in the data, drop the data
* adding debug output of dropped dynamo requests
* moving ValidateSample function and tests to ingester package. It's only referenced by the ingester struct and with the addition of flags for the label name/value lengths, it makes more sense for it to live in the ingester pkg
* adding some test cases to ensure length checks are performed
* adding default Name & Value lengths for labels
minor formatting adjustments from gofmt
unit test cases for names and values of labels that are too long
* reducing max label value length to 2048
* moving ValidateSample function and tests to ingester package. It's only referenced by the ingester struct and with the addition of flags for the label name/value lengths, it makes more sense for it to live in the ingester pkg
* adding some test cases to ensure length checks are performed
* adding default Name & Value lengths for labels
minor formatting adjustments from gofmt
unit test cases for names and values of labels that are too long
* formatting adjustments from lint pass
* adding "os" import back after it got dropped during merge
* Adding comments for default values in ingester and removing some rebase/merge artifacts that are no longer needed in the ingester.
* gofmt changes for validate.go
* converting ValidationException error string to constant.
* changing casing of ValidationException as there is no need to export it
* reverting an edit to return error information on validation errors.
* removing path from gitignore that was added in error
* updating storage_client to use constant for validationException in second usage within file.
Updated error output and comment in getDynamoDBChunks when handling validationException errors, so that they make more sense for the context (read vs write)
* tidying up some lingering merge artifacts.
* removing unused import left from bad merge artifacts
* moving max name/value length flags into their own sub config structure and out of the main ingester.go file.
* tidying up references to new structure and adjusting test function to match new call signature.
---
 aws/storage_client.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 034418ebbbfe4..1cec3b9da947d 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -45,6 +45,7 @@ const (
 	// See http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html.
 	dynamoDBMaxWriteBatchSize = 25
 	dynamoDBMaxReadBatchSize  = 100
+	validationException       = "ValidationException"
 )
 
 var (
@@ -248,6 +249,12 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 				unprocessed.TakeReqs(requests, -1)
 				backoff.Wait()
 				continue
+			} else if ok && awsErr.Code() == validationException {
+				// this write will never work, so the only option is to drop the offending items and continue.
+				// TODO: add more debug options for capturing data/telemetry about the offending items?
+				level.Warn(util.Logger).Log("Data lost while flushing to Dynamo: %v", awsErr)
+				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				continue
 			}
 
 			// All other errors are critical.
@@ -620,6 +627,12 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 				unprocessed.TakeReqs(requests, -1)
 				backoff.Wait()
 				continue
+			} else if ok && awsErr.Code() == validationException {
+				// this read will never work, so the only option is to drop the offending request and continue.
+				// TODO: add more debug options for capturing data/telemetry about the offending items?
+				level.Warn(util.Logger).Log("Error while fetching data from Dynamo: %v", awsErr)
+				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				continue
 			}
 
 			// All other errors are critical.

From d6b34d206262734773475fbf4bcd3c8d243f4c32 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@users.noreply.github.com>
Date: Mon, 25 Jun 2018 08:59:22 -0700
Subject: [PATCH 099/660] Adding counter for dropped dynamo requests (#852)

* adding dropped dynamo request counter and incrementing it when ValidationExceptions are encountered.
---
 aws/storage_client.go | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 1cec3b9da947d..815e30c318929 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -68,6 +68,11 @@ var (
 		Name:      "dynamo_failures_total",
 		Help:      "The total number of errors while storing chunks to the chunk store.",
 	}, []string{tableNameLabel, errorReasonLabel, "operation"})
+	dynamoDroppedRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_dropped_requests_total",
+		Help:      "The total number of requests which were dropped due to errors encountered from dynamo.",
+	}, []string{tableNameLabel, errorReasonLabel, "operation"})
 	dynamoQueryPagesCount = prometheus.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_query_pages_count",
@@ -97,6 +102,7 @@ func init() {
 	prometheus.MustRegister(dynamoQueryPagesCount)
 	prometheus.MustRegister(dynamoQueryRetryCount)
 	prometheus.MustRegister(s3RequestDuration)
+	prometheus.MustRegister(dynamoDroppedRequests)
 }
 
 // DynamoDBConfig specifies config for a DynamoDB database.
@@ -251,9 +257,13 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this write will never work, so the only option is to drop the offending items and continue.
-				// TODO: add more debug options for capturing data/telemetry about the offending items?
 				level.Warn(util.Logger).Log("Data lost while flushing to Dynamo: %v", awsErr)
 				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
+				// to determine if a request was dropped (or not)
+				for tableName := range requests {
+					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchWriteItem").Inc()
+				}
 				continue
 			}
 
@@ -629,9 +639,13 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this read will never work, so the only option is to drop the offending request and continue.
-				// TODO: add more debug options for capturing data/telemetry about the offending items?
 				level.Warn(util.Logger).Log("Error while fetching data from Dynamo: %v", awsErr)
 				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
+				// to determine if a request was dropped (or not)
+				for tableName := range requests {
+					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchGetItemPages").Inc()
+				}
 				continue
 			}
 

From 9af92ec38f29f0cbdbf470e3be2dbba9d133dfbc Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 11 Jul 2018 12:29:19 +0100
Subject: [PATCH 100/660] Move samples & label validation to the distributor.
 (#869)

* Move functions for extracting metric name from labels & matchers into its own package, to prevent circular imports.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Move validation of labels and sample time to the distributor.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Remove unnecessary consts.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Don't drop whole batch on single validation failure; try and return the last error.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Bump max-label-names-per-series from 20 to 30.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go      | 5 +++--
 chunk_store_test.go | 4 ++--
 schema.go           | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index c7482ec1d3722..03838ba17c8aa 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -19,6 +19,7 @@ import (
 	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 var (
@@ -137,7 +138,7 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 
 	writeReqs := c.storage.NewWriteBatch()
 	for _, chunk := range chunks {
-		metricName, err := util.ExtractMetricNameFromMetric(chunk.Metric)
+		metricName, err := extract.MetricNameFromMetric(chunk.Metric)
 		if err != nil {
 			return nil, err
 		}
@@ -195,7 +196,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	}
 
 	// Fetch metric name chunks if the matcher is of type equal,
-	metricNameMatcher, matchers, ok := util.ExtractMetricNameMatcherFromMatchers(allMatchers)
+	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
 	if ok && metricNameMatcher.Type == labels.MatchEqual {
 		sp.SetTag("metric", metricNameMatcher.Value)
 		return c.getMetricNameMatrix(ctx, from, through, matchers, metricNameMatcher.Value)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 663b73291141d..7c1347be2662a 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -14,7 +14,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/weaveworks/cortex/pkg/util/extract"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/test"
@@ -211,7 +211,7 @@ func TestChunkStore_Get(t *testing.T) {
 					t.Fatal(err)
 				}
 
-				metricNameMatcher, _, ok := util.ExtractMetricNameMatcherFromMatchers(matchers)
+				metricNameMatcher, _, ok := extract.MetricNameMatcherFromMatchers(matchers)
 				if schema.requireMetricName && (!ok || metricNameMatcher.Type != labels.MatchEqual) {
 					return
 				}
diff --git a/schema.go b/schema.go
index 464dc358447ad..9657fa1cfbf43 100644
--- a/schema.go
+++ b/schema.go
@@ -8,7 +8,7 @@ import (
 	"strings"
 
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 var (
@@ -521,7 +521,7 @@ func (entries v7Entries) GetWriteEntries(bucket Bucket, metricName model.LabelVa
 		return nil, err
 	}
 
-	metricName, err = util.ExtractMetricNameFromMetric(labels)
+	metricName, err = extract.MetricNameFromMetric(labels)
 	if err != nil {
 		return nil, err
 	}

From 29061cc250a6fa9764fdeaed7a01ba384f9867cf Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 12 Jul 2018 11:01:50 +0100
Subject: [PATCH 101/660] Prevent OOMs in the chunk store. (#873)

Prevent OOM in the chunk store.

- Merge and dedupe set of strings, not sets of parse chunks.
- Limit number of chunks fetched in a single query.
- Add lots more debug logging to the querier to help track this all down.

* Review feedback: Use simple Less function for chunks in test.
* Review feedback: Intersect chunkIDs as we receive them.
* Review feedback: unify logging and tracing in the chunk store.
* Review feedback: remove debug logging from tests.
* Review feedback: explain spanLogger.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 by_key.go                      | 128 --------------------------
 by_key_test.go                 | 131 --------------------------
 chunk.go                       |   5 +
 chunk_store.go                 | 163 ++++++++++++++++++++++-----------
 chunk_store_test.go            |  12 ++-
 storage/by_key_test.go         |  12 +++
 storage/storage_client_test.go |   4 +-
 strings.go                     |  59 ++++++++++++
 8 files changed, 196 insertions(+), 318 deletions(-)
 delete mode 100644 by_key.go
 delete mode 100644 by_key_test.go
 create mode 100644 storage/by_key_test.go
 create mode 100644 strings.go

diff --git a/by_key.go b/by_key.go
deleted file mode 100644
index c4674c1738c85..0000000000000
--- a/by_key.go
+++ /dev/null
@@ -1,128 +0,0 @@
-package chunk
-
-// ByKey allow you to sort chunks by ID
-type ByKey []Chunk
-
-func (cs ByKey) Len() int           { return len(cs) }
-func (cs ByKey) Swap(i, j int)      { cs[i], cs[j] = cs[j], cs[i] }
-func (cs ByKey) Less(i, j int) bool { return lessByKey(cs[i], cs[j]) }
-
-// This comparison uses all the same information as Chunk.ExternalKey()
-func lessByKey(a, b Chunk) bool {
-	return a.UserID < b.UserID ||
-		(a.UserID == b.UserID && (a.Fingerprint < b.Fingerprint ||
-			(a.Fingerprint == b.Fingerprint && (a.From < b.From ||
-				(a.From == b.From && (a.Through < b.Through ||
-					(a.Through == b.Through && a.Checksum < b.Checksum)))))))
-}
-
-func equalByKey(a, b Chunk) bool {
-	return a.UserID == b.UserID && a.Fingerprint == b.Fingerprint &&
-		a.From == b.From && a.Through == b.Through && a.Checksum == b.Checksum
-}
-
-// unique will remove duplicates from the input.
-// list must be sorted.
-func unique(cs ByKey) ByKey {
-	if len(cs) == 0 {
-		return ByKey{}
-	}
-
-	result := make(ByKey, 1, len(cs))
-	result[0] = cs[0]
-	i, j := 0, 1
-	for j < len(cs) {
-		if equalByKey(result[i], cs[j]) {
-			j++
-			continue
-		}
-		result = append(result, cs[j])
-		i++
-		j++
-	}
-	return result
-}
-
-// merge will merge & dedupe two lists of chunks.
-// list musts be sorted and not contain dupes.
-func merge(a, b ByKey) ByKey {
-	result := make(ByKey, 0, len(a)+len(b))
-	i, j := 0, 0
-	for i < len(a) && j < len(b) {
-		if lessByKey(a[i], b[j]) {
-			result = append(result, a[i])
-			i++
-		} else if lessByKey(b[j], a[i]) {
-			result = append(result, b[j])
-			j++
-		} else {
-			result = append(result, a[i])
-			i++
-			j++
-		}
-	}
-	for ; i < len(a); i++ {
-		result = append(result, a[i])
-	}
-	for ; j < len(b); j++ {
-		result = append(result, b[j])
-	}
-	return result
-}
-
-// nWayUnion will merge and dedupe n lists of chunks.
-// lists must be sorted and not contain dupes.
-func nWayUnion(sets []ByKey) ByKey {
-	l := len(sets)
-	switch l {
-	case 0:
-		return ByKey{}
-	case 1:
-		return sets[0]
-	case 2:
-		return merge(sets[0], sets[1])
-	default:
-		var (
-			split = l / 2
-			left  = nWayUnion(sets[:split])
-			right = nWayUnion(sets[split:])
-		)
-		return nWayUnion([]ByKey{left, right})
-	}
-}
-
-// nWayIntersect will interesct n sorted lists of chunks.
-func nWayIntersect(sets []ByKey) ByKey {
-	l := len(sets)
-	switch l {
-	case 0:
-		return ByKey{}
-	case 1:
-		return sets[0]
-	case 2:
-		var (
-			left, right = sets[0], sets[1]
-			i, j        = 0, 0
-			result      = []Chunk{}
-		)
-		for i < len(left) && j < len(right) {
-			if equalByKey(left[i], right[j]) {
-				result = append(result, left[i])
-			}
-
-			if lessByKey(left[i], right[j]) {
-				i++
-			} else {
-				j++
-			}
-		}
-		return result
-	default:
-		var (
-			split = l / 2
-			left  = nWayIntersect(sets[:split])
-			right = nWayIntersect(sets[split:])
-		)
-		return nWayIntersect([]ByKey{left, right})
-	}
-}
diff --git a/by_key_test.go b/by_key_test.go
deleted file mode 100644
index 3c63b0318d771..0000000000000
--- a/by_key_test.go
+++ /dev/null
@@ -1,131 +0,0 @@
-package chunk
-
-import (
-	"math/rand"
-	"reflect"
-	"sort"
-	"testing"
-
-	"github.com/prometheus/common/model"
-	"github.com/stretchr/testify/assert"
-)
-
-func c(id string) Chunk {
-	return Chunk{UserID: id}
-}
-
-func TestUnique(t *testing.T) {
-	for _, tc := range []struct {
-		in   ByKey
-		want ByKey
-	}{
-		{nil, ByKey{}},
-		{ByKey{c("a"), c("a")}, ByKey{c("a")}},
-		{ByKey{c("a"), c("a"), c("b"), c("b"), c("c")}, ByKey{c("a"), c("b"), c("c")}},
-		{ByKey{c("a"), c("b"), c("c")}, ByKey{c("a"), c("b"), c("c")}},
-	} {
-		have := unique(tc.in)
-		if !reflect.DeepEqual(tc.want, have) {
-			assert.Equal(t, tc.want, have)
-		}
-	}
-}
-
-func TestMerge(t *testing.T) {
-	type args struct {
-		a ByKey
-		b ByKey
-	}
-	for _, tc := range []struct {
-		args args
-		want ByKey
-	}{
-		{args{ByKey{}, ByKey{}}, ByKey{}},
-		{args{ByKey{c("a")}, ByKey{}}, ByKey{c("a")}},
-		{args{ByKey{}, ByKey{c("b")}}, ByKey{c("b")}},
-		{args{ByKey{c("a")}, ByKey{c("b")}}, ByKey{c("a"), c("b")}},
-		{
-			args{ByKey{c("a"), c("c")}, ByKey{c("a"), c("b"), c("d")}},
-			ByKey{c("a"), c("b"), c("c"), c("d")},
-		},
-	} {
-		have := merge(tc.args.a, tc.args.b)
-		if !reflect.DeepEqual(tc.want, have) {
-			assert.Equal(t, tc.want, have)
-		}
-	}
-}
-
-func TestNWayUnion(t *testing.T) {
-	for _, tc := range []struct {
-		in   []ByKey
-		want ByKey
-	}{
-		{nil, ByKey{}},
-		{[]ByKey{{c("a")}}, ByKey{c("a")}},
-		{[]ByKey{{c("a")}, {c("a")}}, ByKey{c("a")}},
-		{[]ByKey{{c("a")}, {}}, ByKey{c("a")}},
-		{[]ByKey{{}, {c("b")}}, ByKey{c("b")}},
-		{[]ByKey{{c("a")}, {c("b")}}, ByKey{c("a"), c("b")}},
-		{
-			[]ByKey{{c("a"), c("c"), c("e")}, {c("c"), c("d")}, {c("b")}},
-			ByKey{c("a"), c("b"), c("c"), c("d"), c("e")},
-		},
-		{
-			[]ByKey{{c("c"), c("d")}, {c("b")}, {c("a"), c("c"), c("e")}},
-			ByKey{c("a"), c("b"), c("c"), c("d"), c("e")},
-		},
-	} {
-		have := nWayUnion(tc.in)
-		if !reflect.DeepEqual(tc.want, have) {
-			assert.Equal(t, tc.want, have)
-		}
-	}
-}
-
-func TestNWayIntersect(t *testing.T) {
-	for _, tc := range []struct {
-		in   []ByKey
-		want ByKey
-	}{
-		{nil, ByKey{}},
-		{[]ByKey{{c("a"), c("b"), c("c")}}, []Chunk{c("a"), c("b"), c("c")}},
-		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}}, ByKey{c("a"), c("c")}},
-		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}, {c("b")}}, ByKey{}},
-		{[]ByKey{{c("a"), c("b"), c("c")}, {c("a"), c("c")}, {c("a")}}, ByKey{c("a")}},
-	} {
-		have := nWayIntersect(tc.in)
-		if !reflect.DeepEqual(tc.want, have) {
-			assert.Equal(t, tc.want, have)
-		}
-	}
-}
-
-func BenchmarkByKeyLess(b *testing.B) {
-	now := model.Now()
-	a := ByKey{dummyChunk(now), dummyChunk(now)}
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		a.Less(0, 1)
-	}
-}
-
-func BenchmarkByKeySort100(b *testing.B)   { benchmarkByKeySort(b, 100) }
-func BenchmarkByKeySort1000(b *testing.B)  { benchmarkByKeySort(b, 1000) }
-func BenchmarkByKeySort10000(b *testing.B) { benchmarkByKeySort(b, 10000) }
-
-func benchmarkByKeySort(b *testing.B, batchSize int) {
-	chunks := []Chunk{}
-	for i := 0; i < batchSize; i++ {
-		chunk := dummyChunk(model.Now() + model.Time(rand.Intn(batchSize)))
-		chunks = append(chunks, chunk)
-	}
-
-	b.ResetTimer()
-
-	for i := 0; i < b.N; i++ {
-		sort.Sort(ByKey(chunks))
-	}
-}
diff --git a/chunk.go b/chunk.go
index 09bd6d0236ca4..87343017a8a10 100644
--- a/chunk.go
+++ b/chunk.go
@@ -314,6 +314,11 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	return c.Data.UnmarshalFromBuf(remainingData[:int(dataLen)])
 }
 
+func equalByKey(a, b Chunk) bool {
+	return a.UserID == b.UserID && a.Fingerprint == b.Fingerprint &&
+		a.From == b.From && a.Through == b.Through && a.Checksum == b.Checksum
+}
+
 func chunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "chunksToMatrix")
 	defer sp.Finish()
diff --git a/chunk_store.go b/chunk_store.go
index 03838ba17c8aa..84bf658b3ea0f 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -2,12 +2,12 @@ package chunk
 
 import (
 	"context"
-	"encoding/json"
 	"flag"
 	"fmt"
 	"sort"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
@@ -54,7 +54,9 @@ func init() {
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
 	CacheConfig cache.Config
-	MinChunkAge time.Duration
+
+	MinChunkAge     time.Duration
+	QueryChunkLimit int
 
 	// For injecting different schemas in tests.
 	schemaFactory func(cfg SchemaConfig) Schema
@@ -63,7 +65,8 @@ type StoreConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.CacheConfig.RegisterFlags(f)
-	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "minimum time between chunk update and being saved to the store")
+	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
+	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
 }
 
 // Store implements Store
@@ -167,20 +170,45 @@ func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch
 	return writeReqs, nil
 }
 
+// spanLogger unifies tracing and logging, to reduce repetition.
+type spanLogger struct {
+	log.Logger
+	ot.Span
+}
+
+func newSpanLogger(ctx context.Context, method string) (*spanLogger, context.Context) {
+	span, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
+	return &spanLogger{
+		Logger: log.With(util.WithContext(ctx, util.Logger), "method", method),
+		Span:   span,
+	}, ctx
+}
+
+func (s *spanLogger) Log(kvps ...interface{}) error {
+	s.Logger.Log(kvps...)
+	fields, err := otlog.InterleavedKVToFields(kvps...)
+	if err != nil {
+		return err
+	}
+	s.Span.LogFields(fields...)
+	return nil
+}
+
 // Get implements ChunkStore
 func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) (model.Matrix, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
-	defer sp.Finish()
+	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+	defer log.Span.Finish()
+
+	now := model.Now()
+	level.Debug(log).Log("from", from, "through", through, "now", now, "matchers", len(allMatchers))
 
 	if through < from {
 		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
 	}
 
-	now := model.Now()
-	sp.LogFields(otlog.String("from", from.String()), otlog.String("through", through.String()), otlog.String("now", now.String()))
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
-		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
+		level.Error(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
 		return nil, nil
 	}
 
@@ -191,14 +219,14 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 
 	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
-		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
+		level.Error(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
 		through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
 	// Fetch metric name chunks if the matcher is of type equal,
 	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
 	if ok && metricNameMatcher.Type == labels.MatchEqual {
-		sp.SetTag("metric", metricNameMatcher.Value)
+		log.Span.SetTag("metric", metricNameMatcher.Value)
 		return c.getMetricNameMatrix(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 
@@ -215,12 +243,15 @@ func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Tim
 }
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
-	logger := util.WithContext(ctx, util.Logger)
+	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
+	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
+
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
 	if err != nil {
 		return nil, err
 	}
+	level.Debug(log).Log("Chunks in index", len(chunks))
 
 	// Filter out chunks that are not in the selected time range.
 	filtered := make([]Chunk, 0, len(chunks))
@@ -232,33 +263,37 @@ func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 		filtered = append(filtered, chunk)
 		keys = append(keys, chunk.ExternalKey())
 	}
+	level.Debug(log).Log("Chunks post filtering", len(chunks))
+
+	if len(filtered) > c.cfg.QueryChunkLimit {
+		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), c.cfg.QueryChunkLimit)
+		level.Error(log).Log("err", err)
+		return nil, err
+	}
 
 	// Now fetch the actual chunk data from Memcache / S3
 	cacheHits, cacheBufs, _, err := c.cache.FetchChunkData(ctx, keys)
 	if err != nil {
-		level.Warn(logger).Log("msg", "error fetching from cache", "err", err)
+		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
 
 	fromCache, missing, err := ProcessCacheResponse(filtered, cacheHits, cacheBufs)
 	if err != nil {
-		level.Warn(logger).Log("msg", "error fetching from cache", "err", err)
+		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
 
 	fromStorage, err := c.storage.GetChunks(ctx, missing)
 
 	// Always cache any chunks we did get
 	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
-		level.Warn(logger).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
 	}
 
 	if err != nil {
 		return nil, promql.ErrStorage(err)
 	}
 
-	// TODO instead of doing this sort, propagate an index and assign chunks
-	// into the result based on that index.
 	allChunks := append(fromCache, fromStorage...)
-	sort.Sort(ByKey(allChunks))
 
 	// Filter out chunks
 	filteredChunks := make([]Chunk, 0, len(allChunks))
@@ -375,6 +410,8 @@ outer:
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksByMetricName")
+
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
@@ -386,17 +423,25 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 		if err != nil {
 			return nil, err
 		}
+		level.Debug(log).Log("queries", len(queries))
 
 		entries, err := c.lookupEntriesByQueries(ctx, queries)
 		if err != nil {
 			return nil, err
 		}
+		level.Debug(log).Log("entries", len(entries))
 
-		return c.convertIndexEntriesToChunks(ctx, entries, nil)
+		chunkIDs, err := c.parseIndexEntries(ctx, entries, nil)
+		if err != nil {
+			return nil, err
+		}
+		level.Debug(log).Log("chunkIDs", len(chunkIDs))
+
+		return c.convertChunkIDsToChunks(ctx, chunkIDs)
 	}
 
 	// Otherwise get chunks which include other matchers
-	incomingChunkSets := make(chan ByKey)
+	incomingChunkIDs := make(chan []string)
 	incomingErrors := make(chan error)
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
@@ -412,6 +457,7 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 				incomingErrors <- err
 				return
 			}
+			level.Debug(log).Log("matcher", matcher, "queries", len(queries))
 
 			// Lookup IndexEntry's
 			entries, err := c.lookupEntriesByQueries(ctx, queries)
@@ -419,31 +465,42 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 				incomingErrors <- err
 				return
 			}
+			level.Debug(log).Log("matcher", matcher, "entries", len(entries))
 
-			// Convert IndexEntry's into chunks
-			chunks, err := c.convertIndexEntriesToChunks(ctx, entries, matcher)
+			// Convert IndexEntry's to chunk IDs, filter out non-matchers at the same time.
+			chunkIDs, err := c.parseIndexEntries(ctx, entries, matcher)
 			if err != nil {
 				incomingErrors <- err
-			} else {
-				incomingChunkSets <- chunks
+				return
 			}
+			level.Debug(log).Log("matcher", matcher, "chunkIDs", len(chunkIDs))
+			incomingChunkIDs <- chunkIDs
 		}(matcher)
 	}
 
 	// Receive chunkSets from all matchers
-	var chunkSets []ByKey
+	var chunkIDs []string
 	var lastErr error
 	for i := 0; i < len(matchers); i++ {
 		select {
-		case incoming := <-incomingChunkSets:
-			chunkSets = append(chunkSets, incoming)
+		case incoming := <-incomingChunkIDs:
+			if chunkIDs == nil {
+				chunkIDs = incoming
+			} else {
+				chunkIDs = intersectStrings(chunkIDs, incoming)
+			}
 		case err := <-incomingErrors:
 			lastErr = err
 		}
 	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
 
-	// Merge chunkSets in order because we wish to keep label series together consecutively
-	return nWayIntersect(chunkSets), lastErr
+	level.Debug(log).Log("msg", "post intersection", "entries", len(chunkIDs))
+
+	// Convert IndexEntry's into chunks
+	return c.convertChunkIDsToChunks(ctx, chunkIDs)
 }
 
 func (c *Store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
@@ -496,44 +553,44 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 	return entries, nil
 }
 
-func (c *Store) convertIndexEntriesToChunks(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) (ByKey, error) {
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	var chunkSet ByKey
+func (c *Store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+	result := make([]string, 0, len(entries))
 
 	for _, entry := range entries {
-		chunkKey, labelValue, metadataInIndex, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
 
-		chunk, err := ParseExternalKey(userID, chunkKey)
-		if err != nil {
-			return nil, err
+		if matcher != nil && !matcher.Matches(string(labelValue)) {
+			level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "dropping chunk for non-matching label", "label", labelValue)
+			continue
 		}
+		result = append(result, chunkKey)
+	}
 
-		// This can be removed in Dev 2017, 13 months after the last chunks
-		// was written with metadata in the index.
-		if metadataInIndex && entry.Value != nil {
-			if err := json.Unmarshal(entry.Value, &chunk); err != nil {
-				return nil, err
-			}
-			chunk.metadataInIndex = true
-		}
+	// Return ids sorted and deduped because they will be merged with other sets.
+	sort.Strings(result)
+	result = uniqueStrings(result)
+	return result, nil
+}
 
-		if matcher != nil && !matcher.Matches(string(labelValue)) {
-			level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "dropping chunk for non-matching metric", "metric", chunk.Metric)
-			continue
+func (c *Store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string) ([]Chunk, error) {
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	chunkSet := make([]Chunk, 0, len(chunkIDs))
+	for _, chunkID := range chunkIDs {
+		chunk, err := ParseExternalKey(userID, chunkID)
+		if err != nil {
+			return nil, err
 		}
 		chunkSet = append(chunkSet, chunk)
 	}
 
-	// Return chunks sorted and deduped because they will be merged with other sets
-	sort.Sort(chunkSet)
-	return unique(chunkSet), nil
+	return chunkSet, nil
 }
 
 func (c *Store) writeBackCache(ctx context.Context, chunks []Chunk) error {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 7c1347be2662a..b47a92d134732 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -194,7 +194,8 @@ func TestChunkStore_Get(t *testing.T) {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, StoreConfig{
-					schemaFactory: schema.fn,
+					schemaFactory:   schema.fn,
+					QueryChunkLimit: 2e6,
 				})
 
 				if err := store.Put(ctx, []Chunk{
@@ -333,7 +334,8 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, StoreConfig{
-					schemaFactory: schema.fn,
+					schemaFactory:   schema.fn,
+					QueryChunkLimit: 2e6,
 				})
 
 				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
@@ -378,7 +380,8 @@ func TestChunkStoreRandom(t *testing.T) {
 
 	for i := range schemas {
 		schemas[i].store = newTestChunkStore(t, StoreConfig{
-			schemaFactory: schemas[i].fn,
+			schemaFactory:   schemas[i].fn,
+			QueryChunkLimit: 2e6,
 		})
 	}
 
@@ -447,7 +450,8 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
 	ctx := user.InjectOrgID(context.Background(), userID)
 	store := newTestChunkStore(t, StoreConfig{
-		schemaFactory: v6Schema,
+		schemaFactory:   v6Schema,
+		QueryChunkLimit: 2e6,
 	})
 
 	// Put 24 chunks 1hr chunks in the store
diff --git a/storage/by_key_test.go b/storage/by_key_test.go
new file mode 100644
index 0000000000000..8d157b8095640
--- /dev/null
+++ b/storage/by_key_test.go
@@ -0,0 +1,12 @@
+package storage
+
+import (
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+// ByKey allow you to sort chunks by ID
+type ByKey []chunk.Chunk
+
+func (cs ByKey) Len() int           { return len(cs) }
+func (cs ByKey) Swap(i, j int)      { cs[i], cs[j] = cs[j], cs[i] }
+func (cs ByKey) Less(i, j int) bool { return cs[i].ExternalKey() < cs[j].ExternalKey() }
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index e3c5947daf2a9..c661a87872105 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -49,8 +49,8 @@ func TestChunksBasic(t *testing.T) {
 			require.NoError(t, err)
 			require.Equal(t, len(chunksToGet), len(chunksWeGot))
 
-			sort.Sort(chunk.ByKey(chunksToGet))
-			sort.Sort(chunk.ByKey(chunksWeGot))
+			sort.Sort(ByKey(chunksToGet))
+			sort.Sort(ByKey(chunksWeGot))
 			for j := 0; j < len(chunksWeGot); j++ {
 				require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey(), strconv.Itoa(i))
 			}
diff --git a/strings.go b/strings.go
new file mode 100644
index 0000000000000..0d91e640d2daa
--- /dev/null
+++ b/strings.go
@@ -0,0 +1,59 @@
+package chunk
+
+func uniqueStrings(cs []string) []string {
+	if len(cs) == 0 {
+		return []string{}
+	}
+
+	result := make([]string, 1, len(cs))
+	result[0] = cs[0]
+	i, j := 0, 1
+	for j < len(cs) {
+		if result[i] == cs[j] {
+			j++
+			continue
+		}
+		result = append(result, cs[j])
+		i++
+		j++
+	}
+	return result
+}
+
+func intersectStrings(left, right []string) []string {
+	var (
+		i, j   = 0, 0
+		result = []string{}
+	)
+	for i < len(left) && j < len(right) {
+		if left[i] == right[j] {
+			result = append(result, left[i])
+		}
+
+		if left[i] < right[j] {
+			i++
+		} else {
+			j++
+		}
+	}
+	return result
+}
+
+func nWayIntersectStrings(sets [][]string) []string {
+	l := len(sets)
+	switch l {
+	case 0:
+		return []string{}
+	case 1:
+		return sets[0]
+	case 2:
+		return intersectStrings(sets[0], sets[1])
+	default:
+		var (
+			split = l / 2
+			left  = nWayIntersectStrings(sets[:split])
+			right = nWayIntersectStrings(sets[split:])
+		)
+		return intersectStrings(left, right)
+	}
+}

From 2c4e2f28a47890e3eb27cb4fde855f4f1699c49c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 12 Jul 2018 14:52:18 +0100
Subject: [PATCH 102/660] Don't forget to Finish spans. (#878)

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index 84bf658b3ea0f..aa46935b0e2ae 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -244,6 +244,7 @@ func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Tim
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
+	defer log.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
 
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
@@ -411,6 +412,7 @@ outer:
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksByMetricName")
+	defer log.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {

From 8f8e7b9688ba005c79d2905697a219e0130d5e52 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 13 Jul 2018 10:39:01 +0100
Subject: [PATCH 103/660] Don't convert chunks to matrixes and then merges
 them... (#713)

* Make chunk store return chunks; converting to series is now separate.

- Refactor chunk store to return chunks.
- Refactor querier package to use new Prometheus 2.0 interfaces.
- Have separate querier for ingesters, chunk store and metadata.
- Make remote read handler take a Queryable.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Simple query benchmark, using in-memory chunks, but running an acutal PromQL query.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Don't convert chunks to matrixes and then merges them; use iterators and the upstream heap-based merging code.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Optimise the ChunksToMatrix function.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Update prometheus to pull in 78efdc6d - Avoid infinite loop on duplicate NaN values

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Register query engine metrics.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Review feedback: clarify log message.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk.go            | 29 ++++++++++++-----------------
 chunk_store.go      | 18 +++++-------------
 chunk_store_test.go | 10 ++++++++--
 chunk_test.go       |  2 +-
 4 files changed, 26 insertions(+), 33 deletions(-)

diff --git a/chunk.go b/chunk.go
index 87343017a8a10..7bd15c8b711c2 100644
--- a/chunk.go
+++ b/chunk.go
@@ -319,36 +319,31 @@ func equalByKey(a, b Chunk) bool {
 		a.From == b.From && a.Through == b.Through && a.Checksum == b.Checksum
 }
 
-func chunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
+// ChunksToMatrix converts a set of chunks to a model.Matrix.
+func ChunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "chunksToMatrix")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks", len(chunks)))
 
 	// Group chunks by series, sort and dedupe samples.
-	sampleStreams := map[model.Fingerprint]*model.SampleStream{}
+	metrics := map[model.Fingerprint]model.Metric{}
+	samplesBySeries := map[model.Fingerprint][][]model.SamplePair{}
 	for _, c := range chunks {
-		ss, ok := sampleStreams[c.Fingerprint]
-		if !ok {
-			ss = &model.SampleStream{
-				Metric: c.Metric,
-			}
-			sampleStreams[c.Fingerprint] = ss
-		}
-
-		samples, err := c.Samples(from, through)
+		ss, err := c.Samples(from, through)
 		if err != nil {
 			return nil, err
 		}
 
-		ss.Values = util.MergeSampleSets(ss.Values, samples)
+		metrics[c.Fingerprint] = c.Metric
+		samplesBySeries[c.Fingerprint] = append(samplesBySeries[c.Fingerprint], ss)
 	}
-	sp.LogFields(otlog.Int("sample streams", len(sampleStreams)))
+	sp.LogFields(otlog.Int("series", len(samplesBySeries)))
 
-	matrix := make(model.Matrix, 0, len(sampleStreams))
-	for _, ss := range sampleStreams {
+	matrix := make(model.Matrix, 0, len(samplesBySeries))
+	for fp, ss := range samplesBySeries {
 		matrix = append(matrix, &model.SampleStream{
-			Metric: ss.Metric,
-			Values: ss.Values,
+			Metric: metrics[fp],
+			Values: util.MergeNSampleSets(ss...),
 		})
 	}
 
diff --git a/chunk_store.go b/chunk_store.go
index aa46935b0e2ae..24d3b1c1c208e 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -195,7 +195,7 @@ func (s *spanLogger) Log(kvps ...interface{}) error {
 }
 
 // Get implements ChunkStore
-func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) (model.Matrix, error) {
+func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
 
@@ -227,19 +227,11 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
 	if ok && metricNameMatcher.Type == labels.MatchEqual {
 		log.Span.SetTag("metric", metricNameMatcher.Value)
-		return c.getMetricNameMatrix(ctx, from, through, matchers, metricNameMatcher.Value)
+		return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
 	}
 
 	// Otherwise we consult the metric name index first and then create queries for each matching metric name.
-	return c.getSeriesMatrix(ctx, from, through, matchers, metricNameMatcher)
-}
-
-func (c *Store) getMetricNameMatrix(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) (model.Matrix, error) {
-	chunks, err := c.getMetricNameChunks(ctx, from, through, allMatchers, metricName)
-	if err != nil {
-		return nil, err
-	}
-	return chunksToMatrix(ctx, chunks, from, through)
+	return c.getSeriesChunks(ctx, from, through, matchers, metricNameMatcher)
 }
 
 func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
@@ -346,7 +338,7 @@ func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found [
 	return
 }
 
-func (c *Store) getSeriesMatrix(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) (model.Matrix, error) {
+func (c *Store) getSeriesChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) ([]Chunk, error) {
 	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
@@ -407,7 +399,7 @@ outer:
 			}
 		}
 	}
-	return chunksToMatrix(ctx, chunks, from, through)
+	return chunks, nil
 }
 
 func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index b47a92d134732..9bf523de0b3a4 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -218,7 +218,10 @@ func TestChunkStore_Get(t *testing.T) {
 				}
 
 				// Query with ordinary time-range
-				matrix1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+				chunks1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+				require.NoError(t, err)
+
+				matrix1, err := ChunksToMatrix(ctx, chunks1, now.Add(-time.Hour), now)
 				require.NoError(t, err)
 
 				sort.Sort(ByFingerprint(matrix1))
@@ -229,7 +232,10 @@ func TestChunkStore_Get(t *testing.T) {
 				}
 
 				// Pushing end of time-range into future should yield exact same resultset
-				matrix2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
+				chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
+				require.NoError(t, err)
+
+				matrix2, err := ChunksToMatrix(ctx, chunks2, now.Add(-time.Hour), now)
 				require.NoError(t, err)
 
 				sort.Sort(ByFingerprint(matrix2))
diff --git a/chunk_test.go b/chunk_test.go
index 040d01c511003..b486565dfc876 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -188,7 +188,7 @@ func TestChunksToMatrix(t *testing.T) {
 			},
 		},
 	} {
-		matrix, err := chunksToMatrix(context.Background(), c.chunks, chunk1.From, chunk3.Through)
+		matrix, err := ChunksToMatrix(context.Background(), c.chunks, chunk1.From, chunk3.Through)
 		require.NoError(t, err)
 
 		sort.Sort(matrix)

From 1e8af82a4087ef2d67d6d08b08a3d396860d4570 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 27 Jul 2018 11:53:45 +0100
Subject: [PATCH 104/660] Remove bazel. (#903)

* Remove bazel.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 BUILD.bazel         | 96 ---------------------------------------------
 gcp/BUILD.bazel     | 29 --------------
 storage/BUILD.bazel | 14 -------
 3 files changed, 139 deletions(-)
 delete mode 100644 BUILD.bazel
 delete mode 100644 gcp/BUILD.bazel
 delete mode 100644 storage/BUILD.bazel

diff --git a/BUILD.bazel b/BUILD.bazel
deleted file mode 100644
index 26031584ac6ed..0000000000000
--- a/BUILD.bazel
+++ /dev/null
@@ -1,96 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")
-
-go_library(
-    name = "go_default_library",
-    srcs = [
-        "aws_storage_client.go",
-        "backoff.go",
-        "by_key.go",
-        "chunk.go",
-        "chunk_cache.go",
-        "chunk_store.go",
-        "dynamodb_table_client.go",
-        "inmemory_storage_client.go",
-        "memcache_client.go",
-        "schema.go",
-        "schema_config.go",
-        "schema_util.go",
-        "storage_client.go",
-        "table_client.go",
-        "table_manager.go",
-    ],
-    importpath = "github.com/weaveworks/cortex/pkg/chunk",
-    visibility = ["//visibility:public"],
-    deps = [
-        "//pkg/prom1/storage/local/chunk:go_default_library",
-        "//pkg/util:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/client:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/session:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
-        "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
-        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
-        "//vendor/github.com/golang/snappy:go_default_library",
-        "//vendor/github.com/opentracing/opentracing-go:go_default_library",
-        "//vendor/github.com/pkg/errors:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
-        "//vendor/github.com/prometheus/common/model:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
-        "//vendor/github.com/weaveworks/common/aws:go_default_library",
-        "//vendor/github.com/weaveworks/common/errors:go_default_library",
-        "//vendor/github.com/weaveworks/common/instrument:go_default_library",
-        "//vendor/github.com/weaveworks/common/mtime:go_default_library",
-        "//vendor/github.com/weaveworks/common/user:go_default_library",
-        "//vendor/golang.org/x/time/rate:go_default_library",
-    ],
-)
-
-go_test(
-    name = "go_default_test",
-    srcs = [
-        "aws_storage_client_test.go",
-        "by_key_test.go",
-        "chunk_cache_test.go",
-        "chunk_store_test.go",
-        "chunk_test.go",
-        "schema_config_test.go",
-        "schema_test.go",
-        "schema_util_test.go",
-        "table_manager_test.go",
-    ],
-    importpath = "github.com/weaveworks/cortex/pkg/chunk",
-    library = ":go_default_library",
-    deps = [
-        "//pkg/prom1/storage/local/chunk:go_default_library",
-        "//pkg/util:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/aws/request:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/s3:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/s3/s3iface:go_default_library",
-        "//vendor/github.com/bradfitz/gomemcache/memcache:go_default_library",
-        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
-        "//vendor/github.com/pkg/errors:go_default_library",
-        "//vendor/github.com/prometheus/common/model:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/pkg/labels:go_default_library",
-        "//vendor/github.com/prometheus/prometheus/promql:go_default_library",
-        "//vendor/github.com/stretchr/testify/assert:go_default_library",
-        "//vendor/github.com/stretchr/testify/require:go_default_library",
-        "//vendor/github.com/weaveworks/common/mtime:go_default_library",
-        "//vendor/github.com/weaveworks/common/test:go_default_library",
-        "//vendor/github.com/weaveworks/common/user:go_default_library",
-        "//vendor/golang.org/x/net/context:go_default_library",
-    ],
-)
diff --git a/gcp/BUILD.bazel b/gcp/BUILD.bazel
deleted file mode 100644
index 660273a31467c..0000000000000
--- a/gcp/BUILD.bazel
+++ /dev/null
@@ -1,29 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-go_library(
-    name = "go_default_library",
-    srcs = [
-        "instrumentation.go",
-        "storage_client.go",
-        "table_client.go",
-    ],
-    importpath = "github.com/weaveworks/cortex/pkg/chunk/gcp",
-    visibility = ["//visibility:public"],
-    deps = [
-        "//pkg/chunk:go_default_library",
-        "//pkg/util:go_default_library",
-        "//vendor/cloud.google.com/go/bigtable:go_default_library",
-        "//vendor/github.com/aws/aws-sdk-go/service/dynamodb:go_default_library",
-        "//vendor/github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc:go_default_library",
-        "//vendor/github.com/mwitkow/go-grpc-middleware:go_default_library",
-        "//vendor/github.com/opentracing/opentracing-go:go_default_library",
-        "//vendor/github.com/pkg/errors:go_default_library",
-        "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library",
-        "//vendor/github.com/weaveworks/common/instrument:go_default_library",
-        "//vendor/golang.org/x/net/context:go_default_library",
-        "//vendor/google.golang.org/api/option:go_default_library",
-        "//vendor/google.golang.org/grpc:go_default_library",
-        "//vendor/google.golang.org/grpc/metadata:go_default_library",
-        "//vendor/google.golang.org/grpc/status:go_default_library",
-    ],
-)
diff --git a/storage/BUILD.bazel b/storage/BUILD.bazel
deleted file mode 100644
index 7b5ede1f40cf8..0000000000000
--- a/storage/BUILD.bazel
+++ /dev/null
@@ -1,14 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_library")
-
-go_library(
-    name = "go_default_library",
-    srcs = ["factory.go"],
-    importpath = "github.com/weaveworks/cortex/pkg/chunk/storage",
-    visibility = ["//visibility:public"],
-    deps = [
-        "//pkg/chunk:go_default_library",
-        "//pkg/chunk/gcp:go_default_library",
-        "//pkg/util:go_default_library",
-        "//vendor/github.com/go-kit/kit/log/level:go_default_library",
-    ],
-)

From 668779496e4990a66f812e264f708a30268ae923 Mon Sep 17 00:00:00 2001
From: Bert Hartmann <BertHartm@gmail.com>
Date: Thu, 2 Aug 2018 09:54:39 -0400
Subject: [PATCH 105/660] allow specifying the port that cassandra is listening
 on (#895)

* allow specifying the port that cassandra is listening on

* update description of addresses flag for clarity
---
 cassandra/storage_client.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index ea511cdebac0f..54d40be2f397f 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -20,6 +20,7 @@ const (
 // Config for a StorageClient
 type Config struct {
 	addresses                string
+	port                     int
 	keyspace                 string
 	consistency              string
 	replicationFactor        int
@@ -34,7 +35,8 @@ type Config struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.addresses, "cassandra.addresses", "", "Comma-separated addresses of Cassandra instances.")
+	f.StringVar(&cfg.addresses, "cassandra.addresses", "", "Comma-separated hostnames or ips of Cassandra instances.")
+	f.IntVar(&cfg.port, "cassandra.port", 9042, "Port that Cassandra is running on")
 	f.StringVar(&cfg.keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
 	f.StringVar(&cfg.consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
 	f.IntVar(&cfg.replicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
@@ -58,6 +60,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	}
 
 	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
+	cluster.Port = cfg.port
 	cluster.Keyspace = cfg.keyspace
 	cluster.Consistency = consistency
 	cluster.BatchObserver = observer{}
@@ -89,6 +92,7 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
 // createKeyspace will create the desired keyspace if it doesn't exist.
 func (cfg *Config) createKeyspace() error {
 	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
+	cluster.Port = cfg.port
 	cluster.Keyspace = "system"
 	cluster.Timeout = 20 * time.Second
 

From 10abe51369309f762c7dee4dbfedde4199065c68 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 3 Aug 2018 12:53:56 +0100
Subject: [PATCH 106/660] Minor distributor optimisations (#907)

* Preallocate slices for TimeSeries.Unmarshal.

The top memory allocator (by alloc) in the distributor is the unmarshall code.  Because this relies on growing arrays, preallocating them to a reasonable size reduced memory usage by half.

Before:
```
(pprof) top
Showing nodes accounting for 87808185, 66.93% of 131193232 total
Dropped 310 nodes (cum <= 655966)
Showing top 10 nodes out of 94
      flat  flat%   sum%        cum   cum%
  35700038 27.21% 27.21%   35700038 27.21%  github.com/weaveworks/cortex/pkg/ingester/client.(*TimeSeries).Unmarshal
  19397839 14.79% 42.00%   43338313 33.03%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).sendSamplesErr
   6979797  5.32% 47.32%   13369752 10.19%  github.com/weaveworks/cortex/pkg/ring.(*Ring).getInternal
   6389955  4.87% 52.19%    6389955  4.87%  github.com/weaveworks/cortex/pkg/ring.(*Ring).replicationStrategy
   3899451  2.97% 55.16%    6291551  4.80%  github.com/weaveworks/cortex/pkg/distributor.shardByAllLabels
   3580624  2.73% 57.89%    3580624  2.73%  github.com/weaveworks/cortex/pkg/util/validation.(*Config).ValidateLabels
   3184017  2.43% 60.32%    3184017  2.43%  strings.ToLower
   3007825  2.29% 62.61%    4793786  3.65%  github.com/weaveworks/cortex/vendor/google.golang.org/grpc/transport.(*http2Client).newStream
   2839545  2.16% 64.77%   26145071 19.93%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).Push
   2829094  2.16% 66.93%    2829094  2.16%  context.WithValue
```

After:
```
(pprof) top
Showing nodes accounting for 19528466, 60.21% of 32434700 total
Dropped 225 nodes (cum <= 162173)
Showing top 10 nodes out of 107
      flat  flat%   sum%        cum   cum%
   6288233 19.39% 19.39%   13730949 42.33%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).sendSamplesErr
   3976864 12.26% 31.65%    3976864 12.26%  github.com/weaveworks/cortex/pkg/ingester/client.(*PreallocTimeseries).Unmarshal
   2064447  6.36% 38.01%    4079740 12.58%  github.com/weaveworks/cortex/pkg/ring.(*Ring).getInternal
   2015293  6.21% 44.23%    2015293  6.21%  github.com/weaveworks/cortex/pkg/ring.(*Ring).replicationStrategy
   1114129  3.43% 47.66%    1769499  5.46%  github.com/weaveworks/cortex/pkg/distributor.shardByAllLabels
    959085  2.96% 50.62%    1477939  4.56%  github.com/weaveworks/cortex/vendor/google.golang.org/grpc/transport.(*http2Client).newStream
    827977  2.55% 53.17%     827977  2.55%  github.com/weaveworks/cortex/pkg/util/validation.(*Config).ValidateLabels
    797386  2.46% 55.63%     797386  2.46%  context.WithValue
    758682  2.34% 57.97%     758682  2.34%  github.com/weaveworks/cortex/vendor/google.golang.org/grpc/metadata.Join
    726370  2.24% 60.21%     726370  2.24%  strings.ToLower
```

* Instead of tracking samples through the ingester, track series.

Removes a bunch of allocations on the write path.

Before:
```
(pprof) top
Showing nodes accounting for 4444376, 59.76% of 7437108 total
Dropped 158 nodes (cum <= 37185)
Showing top 10 nodes out of 102
      flat  flat%   sum%        cum   cum%
   1573696 21.16% 21.16%    3111101 41.83%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).sendSamplesErr
    839824 11.29% 32.45%     839824 11.29%  github.com/weaveworks/cortex/pkg/ingester/client.(*PreallocTimeseries).Unmarshal
    557073  7.49% 39.94%     950301 12.78%  github.com/weaveworks/cortex/pkg/ring.(*Ring).getInternal
    393228  5.29% 45.23%     393228  5.29%  github.com/weaveworks/cortex/pkg/ring.(*Ring).replicationStrategy
    222286  2.99% 48.22%     222286  2.99%  github.com/weaveworks/cortex/pkg/util/validation.(*Config).ValidateLabels
    196611  2.64% 50.86%     196611  2.64%  hash/fnv.New32 (inline)
    185692  2.50% 53.36%     185692  2.50%  context.WithValue
    164397  2.21% 55.57%     268165  3.61%  github.com/weaveworks/cortex/vendor/google.golang.org/grpc/transport.(*http2Client).newStream
    163842  2.20% 57.77%     360453  4.85%  github.com/weaveworks/cortex/pkg/distributor.shardByAllLabels
    147727  1.99% 59.76%    1684711 22.65%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).Push
```

After
```
(pprof) top
Showing nodes accounting for 1837915, 53.90% of 3409687 total
Dropped 130 nodes (cum <= 17048)
Showing top 10 nodes out of 108
      flat  flat%   sum%        cum   cum%
    466482 13.68% 13.68%     466482 13.68%  github.com/weaveworks/cortex/pkg/ingester/client.(*PreallocTimeseries).Unmarshal
    245767  7.21% 20.89%     442381 12.97%  github.com/weaveworks/cortex/pkg/ring.(*Ring).getInternal
    196614  5.77% 26.66%     196614  5.77%  github.com/weaveworks/cortex/pkg/ring.(*Ring).replicationStrategy
    166336  4.88% 31.53%    1008393 29.57%  github.com/weaveworks/cortex/pkg/distributor.(*Distributor).Push
    163842  4.81% 36.34%     262147  7.69%  github.com/weaveworks/cortex/pkg/distributor.shardByAllLabels
    139266  4.08% 40.42%     193882  5.69%  github.com/weaveworks/cortex/vendor/golang.org/x/net/http2.(*Framer).readMetaFrame
    134900  3.96% 44.38%     134900  3.96%  github.com/weaveworks/cortex/pkg/util/validation.(*Config).ValidateLabels
    114689  3.36% 47.74%     114689  3.36%  strings.ToLower
    111714  3.28% 51.02%     111714  3.28%  github.com/weaveworks/cortex/vendor/github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc.metadataReaderWriter.Set
     98305  2.88% 53.90%      98305  2.88%  hash/fnv.New32 (inline)
```

* Use gRPC middleware to consistently record client-side metrics for calls to the ingester.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/instrumentation.go | 89 +++++-------------------------------------
 1 file changed, 9 insertions(+), 80 deletions(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index abe189c5ff5c8..9aa8e071ba489 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -1,21 +1,18 @@
 package gcp
 
 import (
-	"io"
-	"time"
-
 	"github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc"
 	"github.com/mwitkow/go-grpc-middleware"
 	"github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/weaveworks/common/instrument"
-	"golang.org/x/net/context"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
-	"google.golang.org/grpc/metadata"
+
+	"github.com/weaveworks/cortex/pkg/util/middleware"
 )
 
-var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+var bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
 	Namespace: "cortex",
 	Name:      "bigtable_request_duration_seconds",
 	Help:      "Time spent doing Bigtable requests.",
@@ -25,87 +22,19 @@ var bigtableRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpt
 	Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
 }, []string{"operation", "status_code"})
 
-func init() {
-	prometheus.MustRegister(bigtableRequestDuration)
-}
-
 func instrumentation() []option.ClientOption {
 	return []option.ClientOption{
 		option.WithGRPCDialOption(
 			grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(
 				otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()),
-				grpcUnaryInstrumentation,
+				middleware.PrometheusGRPCUnaryInstrumentation(bigtableRequestDuration),
 			)),
 		),
 		option.WithGRPCDialOption(
-			grpc.WithStreamInterceptor(grpcStreamInstrumentation),
+			grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(
+				otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()),
+				middleware.PrometheusGRPCStreamInstrumentation(bigtableRequestDuration),
+			)),
 		),
 	}
 }
-
-func grpcUnaryInstrumentation(
-	ctx context.Context, method string, req, resp interface{},
-	cc *grpc.ClientConn, invoker grpc.UnaryInvoker, opts ...grpc.CallOption,
-) error {
-	start := time.Now()
-	err := invoker(ctx, method, req, resp, cc, opts...)
-	bigtableRequestDuration.WithLabelValues(method, instrument.ErrorCode(err)).Observe(time.Now().Sub(start).Seconds())
-	return err
-}
-
-func grpcStreamInstrumentation(
-	ctx context.Context, desc *grpc.StreamDesc, cc *grpc.ClientConn, method string,
-	streamer grpc.Streamer, opts ...grpc.CallOption,
-) (grpc.ClientStream, error) {
-	start := time.Now()
-	stream, err := streamer(ctx, desc, cc, method, opts...)
-	return &instrumentedClientStream{
-		start:        start,
-		method:       method,
-		ClientStream: stream,
-	}, err
-}
-
-type instrumentedClientStream struct {
-	start  time.Time
-	method string
-	grpc.ClientStream
-}
-
-func (s *instrumentedClientStream) SendMsg(m interface{}) error {
-	err := s.ClientStream.SendMsg(m)
-	if err == nil {
-		return err
-	}
-
-	if err == io.EOF {
-		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(nil)).Observe(time.Now().Sub(s.start).Seconds())
-	} else {
-		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
-	}
-
-	return err
-}
-
-func (s *instrumentedClientStream) RecvMsg(m interface{}) error {
-	err := s.ClientStream.RecvMsg(m)
-	if err == nil {
-		return err
-	}
-
-	if err == io.EOF {
-		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(nil)).Observe(time.Now().Sub(s.start).Seconds())
-	} else {
-		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
-	}
-
-	return err
-}
-
-func (s *instrumentedClientStream) Header() (metadata.MD, error) {
-	md, err := s.ClientStream.Header()
-	if err != nil {
-		bigtableRequestDuration.WithLabelValues(s.method, instrument.ErrorCode(err)).Observe(time.Now().Sub(s.start).Seconds())
-	}
-	return md, err
-}

From 8328f6e7b9f46402b6115e4e7dc7b6197fcc54ad Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 7 Aug 2018 11:09:53 +0100
Subject: [PATCH 107/660]  Auto-scale DynamoDB provision based on Prometheus
 metrics (#841)

* Refactor: Move AWS-specific function into aws directory
* Refactor: pull DynamoDB expected table results and fixture config out to functions
* Refactor: Simplify test fixtures
* Auto-scale DynamoDB provision based on Prometheus metrics
  Move old-style AWS autoscaling to a separate file; make metrics
  autoscaling implement the common interface.
* Improve logging of DynamoDB settings updates
* Warn but don't fail on limit-exceeded error, and bump failure counter
* Change cooldown default from 50 to 30 minutes
---
 aws/aws_autoscaling.go            | 226 +++++++++++++
 aws/dynamodb_table_client.go      | 259 ++++-----------
 aws/dynamodb_table_client_test.go | 526 +++++++-----------------------
 aws/metrics_autoscaling.go        | 259 +++++++++++++++
 aws/metrics_autoscaling_test.go   | 244 ++++++++++++++
 aws/storage_client.go             |   2 +
 cassandra/table_client.go         |   5 +-
 gcp/table_client.go               |   5 +-
 inmemory_storage_client.go        |   7 +-
 schema_config.go                  |   4 +-
 table_client.go                   |   2 +-
 table_manager.go                  |  27 +-
 table_manager_test.go             |   5 +-
 13 files changed, 933 insertions(+), 638 deletions(-)
 create mode 100644 aws/aws_autoscaling.go
 create mode 100644 aws/metrics_autoscaling.go
 create mode 100644 aws/metrics_autoscaling_test.go

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
new file mode 100644
index 0000000000000..e367bcc8600d7
--- /dev/null
+++ b/aws/aws_autoscaling.go
@@ -0,0 +1,226 @@
+package aws
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
+	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
+	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	autoScalingPolicyNamePrefix = "DynamoScalingPolicy_cortex_"
+)
+
+var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	Namespace: "cortex",
+	Name:      "application_autoscaling_request_duration_seconds",
+	Help:      "Time spent doing ApplicationAutoScaling requests.",
+
+	// AWS latency seems to range from a few ms to a few sec. So use 8 buckets
+	// from 128us to 2s. TODO: Confirm that this is the case for ApplicationAutoScaling.
+	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+}, []string{"operation", "status_code"})
+
+func init() {
+	prometheus.MustRegister(applicationAutoScalingRequestDuration)
+}
+
+type awsAutoscale struct {
+	call                   callManager
+	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
+}
+
+func newAWSAutoscale(cfg DynamoDBConfig, callManager callManager) (*awsAutoscale, error) {
+	session, err := awsSessionFromURL(cfg.ApplicationAutoScaling.URL)
+	if err != nil {
+		return nil, err
+	}
+	return &awsAutoscale{
+		call: callManager,
+		ApplicationAutoScaling: applicationautoscaling.New(session),
+	}, nil
+}
+
+func (a *awsAutoscale) PostCreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	if desc.WriteScale.Enabled {
+		return a.enableAutoScaling(ctx, desc)
+	}
+	return nil
+}
+
+func (a *awsAutoscale) DescribeTable(ctx context.Context, desc *chunk.TableDesc) error {
+	err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			out, err := a.ApplicationAutoScaling.DescribeScalableTargetsWithContext(ctx, &applicationautoscaling.DescribeScalableTargetsInput{
+				ResourceIds:       []*string{aws.String("table/" + desc.Name)},
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			})
+			if err != nil {
+				return err
+			}
+			switch l := len(out.ScalableTargets); l {
+			case 0:
+				return err
+			case 1:
+				desc.WriteScale.Enabled = true
+				if target := out.ScalableTargets[0]; target != nil {
+					if target.RoleARN != nil {
+						desc.WriteScale.RoleARN = *target.RoleARN
+					}
+					if target.MinCapacity != nil {
+						desc.WriteScale.MinCapacity = *target.MinCapacity
+					}
+					if target.MaxCapacity != nil {
+						desc.WriteScale.MaxCapacity = *target.MaxCapacity
+					}
+				}
+				return err
+			default:
+				return fmt.Errorf("more than one scalable target found for DynamoDB table")
+			}
+		})
+	})
+	if err != nil {
+		return err
+	}
+
+	err = a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			out, err := a.ApplicationAutoScaling.DescribeScalingPoliciesWithContext(ctx, &applicationautoscaling.DescribeScalingPoliciesInput{
+				PolicyNames:       []*string{aws.String(autoScalingPolicyNamePrefix + desc.Name)},
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			})
+			if err != nil {
+				return err
+			}
+			switch l := len(out.ScalingPolicies); l {
+			case 0:
+				return err
+			case 1:
+				config := out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration
+				if config != nil {
+					if config.ScaleInCooldown != nil {
+						desc.WriteScale.InCooldown = *config.ScaleInCooldown
+					}
+					if config.ScaleOutCooldown != nil {
+						desc.WriteScale.OutCooldown = *config.ScaleOutCooldown
+					}
+					if config.TargetValue != nil {
+						desc.WriteScale.TargetValue = *config.TargetValue
+					}
+				}
+				return err
+			default:
+				return fmt.Errorf("more than one scaling policy found for DynamoDB table")
+			}
+		})
+	})
+	return err
+}
+
+func (a *awsAutoscale) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
+	var err error
+	if !current.WriteScale.Enabled {
+		if expected.WriteScale.Enabled {
+			level.Info(util.Logger).Log("msg", "enabling autoscaling on table", "table")
+			err = a.enableAutoScaling(ctx, *expected)
+		}
+	} else {
+		if !expected.WriteScale.Enabled {
+			level.Info(util.Logger).Log("msg", "disabling autoscaling on table", "table")
+			err = a.disableAutoScaling(ctx, *expected)
+		} else if current.WriteScale != expected.WriteScale {
+			level.Info(util.Logger).Log("msg", "enabling autoscaling on table", "table")
+			err = a.enableAutoScaling(ctx, *expected)
+		}
+	}
+	return err
+}
+
+func (a *awsAutoscale) enableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
+	// Registers or updates a scalable target
+	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.RegisterScalableTargetInput{
+				MinCapacity:       aws.Int64(desc.WriteScale.MinCapacity),
+				MaxCapacity:       aws.Int64(desc.WriteScale.MaxCapacity),
+				ResourceId:        aws.String("table/" + desc.Name),
+				RoleARN:           aws.String(desc.WriteScale.RoleARN),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := a.ApplicationAutoScaling.RegisterScalableTarget(input)
+			if err != nil {
+				return err
+			}
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	// Puts or updates a scaling policy
+	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.PutScalingPolicyInput{
+				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
+				PolicyType:        aws.String("TargetTrackingScaling"),
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
+					PredefinedMetricSpecification: &applicationautoscaling.PredefinedMetricSpecification{
+						PredefinedMetricType: aws.String("DynamoDBWriteCapacityUtilization"),
+					},
+					ScaleInCooldown:  aws.Int64(desc.WriteScale.InCooldown),
+					ScaleOutCooldown: aws.Int64(desc.WriteScale.OutCooldown),
+					TargetValue:      aws.Float64(desc.WriteScale.TargetValue),
+				},
+			}
+			_, err := a.ApplicationAutoScaling.PutScalingPolicy(input)
+			return err
+		})
+	})
+}
+
+func (a *awsAutoscale) disableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
+	// Deregister scalable target
+	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.DeregisterScalableTargetInput{
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := a.ApplicationAutoScaling.DeregisterScalableTarget(input)
+			return err
+		})
+	}); err != nil {
+		return err
+	}
+
+	// Delete scaling policy
+	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+			input := &applicationautoscaling.DeleteScalingPolicyInput{
+				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
+				ResourceId:        aws.String("table/" + desc.Name),
+				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
+				ServiceNamespace:  aws.String("dynamodb"),
+			}
+			_, err := a.ApplicationAutoScaling.DeleteScalingPolicy(input)
+			return err
+		})
+	})
+}
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 14fa346b7704d..10e8c38e5fc6c 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -2,16 +2,12 @@ package aws
 
 import (
 	"context"
-	"fmt"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/go-kit/kit/log/level"
-	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/time/rate"
 
 	"github.com/weaveworks/common/instrument"
@@ -19,29 +15,24 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
-const (
-	autoScalingPolicyNamePrefix = "DynamoScalingPolicy_cortex_"
-)
-
-var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
-	Namespace: "cortex",
-	Name:      "application_autoscaling_request_duration_seconds",
-	Help:      "Time spent doing ApplicationAutoScaling requests.",
-
-	// AWS latency seems to range from a few ms to a few sec. So use 8 buckets
-	// from 128us to 2s. TODO: Confirm that this is the case for ApplicationAutoScaling.
-	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
-}, []string{"operation", "status_code"})
+// Pluggable auto-scaler implementation
+type autoscale interface {
+	PostCreateTable(ctx context.Context, desc chunk.TableDesc) error
+	// This whole interface is very similar to chunk.TableClient, but
+	// DescribeTable needs to mutate desc
+	DescribeTable(ctx context.Context, desc *chunk.TableDesc) error
+	UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error
+}
 
-func init() {
-	prometheus.MustRegister(applicationAutoScalingRequestDuration)
+type callManager struct {
+	limiter       *rate.Limiter
+	backoffConfig util.BackoffConfig
 }
 
 type dynamoTableClient struct {
-	DynamoDB               dynamodbiface.DynamoDBAPI
-	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
-	limiter                *rate.Limiter
-	backoffConfig          util.BackoffConfig
+	DynamoDB    dynamodbiface.DynamoDBAPI
+	callManager callManager
+	autoscale   autoscale
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
@@ -51,24 +42,38 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 		return nil, err
 	}
 
-	var applicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
+	callManager := callManager{
+		limiter:       rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
+		backoffConfig: cfg.backoffConfig,
+	}
+
+	var autoscale autoscale
 	if cfg.ApplicationAutoScaling.URL != nil {
-		session, err := awsSessionFromURL(cfg.ApplicationAutoScaling.URL)
+		autoscale, err = newAWSAutoscale(cfg, callManager)
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	if cfg.Metrics.URL != "" {
+		autoscale, err = newMetrics(cfg)
 		if err != nil {
 			return nil, err
 		}
-		applicationAutoScaling = applicationautoscaling.New(session)
 	}
 
 	return dynamoTableClient{
-		DynamoDB:               dynamoDB,
-		ApplicationAutoScaling: applicationAutoScaling,
-		limiter:                rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
-		backoffConfig:          cfg.backoffConfig,
+		DynamoDB:    dynamoDB,
+		callManager: callManager,
+		autoscale:   autoscale,
 	}, nil
 }
 
 func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
+	return d.callManager.backoffAndRetry(ctx, fn)
+}
+
+func (d callManager) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
 	if d.limiter != nil { // Tests will have a nil limiter.
 		d.limiter.Wait(ctx)
 	}
@@ -104,6 +109,17 @@ func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	return table, err
 }
 
+func chunkTagsToDynamoDB(ts chunk.Tags) []*dynamodb.Tag {
+	var result []*dynamodb.Tag
+	for k, v := range ts {
+		result = append(result, &dynamodb.Tag{
+			Key:   aws.String(k),
+			Value: aws.String(v),
+		})
+	}
+	return result
+}
+
 func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	var tableARN *string
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
@@ -148,14 +164,14 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 		return err
 	}
 
-	if desc.WriteScale.Enabled {
-		err := d.enableAutoScaling(ctx, desc)
+	if d.autoscale != nil {
+		err := d.autoscale.PostCreateTable(ctx, desc)
 		if err != nil {
 			return err
 		}
 	}
 
-	tags := desc.Tags.AWSTags()
+	tags := chunkTagsToDynamoDB(desc.Tags)
 	if len(tags) > 0 {
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
@@ -170,7 +186,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 	return nil
 }
 
-func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
+func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	var tableARN *string
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
 		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
@@ -191,7 +207,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 					}
 				}
 				if out.Table.TableStatus != nil {
-					status = *out.Table.TableStatus
+					isActive = (*out.Table.TableStatus == dynamodb.TableStatusActive)
 				}
 				tableARN = out.Table.TableArn
 			}
@@ -218,95 +234,22 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 		})
 	})
 
-	if d.ApplicationAutoScaling != nil {
-		err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-				out, err := d.ApplicationAutoScaling.DescribeScalableTargetsWithContext(ctx, &applicationautoscaling.DescribeScalableTargetsInput{
-					ResourceIds:       []*string{aws.String("table/" + desc.Name)},
-					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-					ServiceNamespace:  aws.String("dynamodb"),
-				})
-				if err != nil {
-					return err
-				}
-				switch l := len(out.ScalableTargets); l {
-				case 0:
-					return err
-				case 1:
-					desc.WriteScale.Enabled = true
-					if target := out.ScalableTargets[0]; target != nil {
-						if target.RoleARN != nil {
-							desc.WriteScale.RoleARN = *target.RoleARN
-						}
-						if target.MinCapacity != nil {
-							desc.WriteScale.MinCapacity = *target.MinCapacity
-						}
-						if target.MaxCapacity != nil {
-							desc.WriteScale.MaxCapacity = *target.MaxCapacity
-						}
-					}
-					return err
-				default:
-					return fmt.Errorf("more than one scalable target found for DynamoDB table")
-				}
-			})
-		})
-
-		err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-				out, err := d.ApplicationAutoScaling.DescribeScalingPoliciesWithContext(ctx, &applicationautoscaling.DescribeScalingPoliciesInput{
-					PolicyNames:       []*string{aws.String(autoScalingPolicyNamePrefix + desc.Name)},
-					ResourceId:        aws.String("table/" + desc.Name),
-					ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-					ServiceNamespace:  aws.String("dynamodb"),
-				})
-				if err != nil {
-					return err
-				}
-				switch l := len(out.ScalingPolicies); l {
-				case 0:
-					return err
-				case 1:
-					config := out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration
-					if config != nil {
-						if config.ScaleInCooldown != nil {
-							desc.WriteScale.InCooldown = *config.ScaleInCooldown
-						}
-						if config.ScaleOutCooldown != nil {
-							desc.WriteScale.OutCooldown = *config.ScaleOutCooldown
-						}
-						if config.TargetValue != nil {
-							desc.WriteScale.TargetValue = *config.TargetValue
-						}
-					}
-					return err
-				default:
-					return fmt.Errorf("more than one scaling policy found for DynamoDB table")
-				}
-			})
-		})
+	if d.autoscale != nil {
+		err = d.autoscale.DescribeTable(ctx, &desc)
 	}
 	return
 }
 
 func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
-	var err error
-	if !current.WriteScale.Enabled {
-		if expected.WriteScale.Enabled {
-			err = d.enableAutoScaling(ctx, expected)
-		}
-	} else {
-		if !expected.WriteScale.Enabled {
-			err = d.disableAutoScaling(ctx, expected)
-		} else if current.WriteScale != expected.WriteScale {
-			err = d.enableAutoScaling(ctx, expected)
+	if d.autoscale != nil {
+		err := d.autoscale.UpdateTable(ctx, current, &expected)
+		if err != nil {
+			return err
 		}
 	}
-	if err != nil {
-		return err
-	}
 
 	if current.ProvisionedRead != expected.ProvisionedRead || current.ProvisionedWrite != expected.ProvisionedWrite {
+		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
 				_, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
@@ -319,7 +262,12 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 				return err
 			})
 		}); err != nil {
-			return err
+			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable")
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
+				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
+			} else {
+				return err
+			}
 		}
 	}
 
@@ -346,7 +294,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
 				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 					ResourceArn: tableARN,
-					Tags:        expected.Tags.AWSTags(),
+					Tags:        chunkTagsToDynamoDB(expected.Tags),
 				})
 				return err
 			})
@@ -354,80 +302,3 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 	}
 	return nil
 }
-
-func (d dynamoTableClient) enableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
-	// Registers or updates a scalable target
-	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-			input := &applicationautoscaling.RegisterScalableTargetInput{
-				MinCapacity:       aws.Int64(desc.WriteScale.MinCapacity),
-				MaxCapacity:       aws.Int64(desc.WriteScale.MaxCapacity),
-				ResourceId:        aws.String("table/" + desc.Name),
-				RoleARN:           aws.String(desc.WriteScale.RoleARN),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := d.ApplicationAutoScaling.RegisterScalableTarget(input)
-			if err != nil {
-				return err
-			}
-			return nil
-		})
-	}); err != nil {
-		return err
-	}
-
-	// Puts or updates a scaling policy
-	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-			input := &applicationautoscaling.PutScalingPolicyInput{
-				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
-				PolicyType:        aws.String("TargetTrackingScaling"),
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
-					PredefinedMetricSpecification: &applicationautoscaling.PredefinedMetricSpecification{
-						PredefinedMetricType: aws.String("DynamoDBWriteCapacityUtilization"),
-					},
-					ScaleInCooldown:  aws.Int64(desc.WriteScale.InCooldown),
-					ScaleOutCooldown: aws.Int64(desc.WriteScale.OutCooldown),
-					TargetValue:      aws.Float64(desc.WriteScale.TargetValue),
-				},
-			}
-			_, err := d.ApplicationAutoScaling.PutScalingPolicy(input)
-			return err
-		})
-	})
-}
-
-func (d dynamoTableClient) disableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
-	// Deregister scalable target
-	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-			input := &applicationautoscaling.DeregisterScalableTargetInput{
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := d.ApplicationAutoScaling.DeregisterScalableTarget(input)
-			return err
-		})
-	}); err != nil {
-		return err
-	}
-
-	// Delete scaling policy
-	return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
-			input := &applicationautoscaling.DeleteScalingPolicyInput{
-				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := d.ApplicationAutoScaling.DeleteScalingPolicy(input)
-			return err
-		})
-	})
-}
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 1ac518aa906fb..2581cf4985bef 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -2,6 +2,7 @@ package aws
 
 import (
 	"context"
+	"fmt"
 	"testing"
 	"time"
 
@@ -29,65 +30,102 @@ const (
 	read             = 100
 )
 
-func TestTableManagerAutoScaling(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	applicationAutoScaling := newMockApplicationAutoScaling()
-	client := dynamoTableClient{
-		DynamoDB:               dynamoDB,
-		ApplicationAutoScaling: applicationAutoScaling,
+func fixtureWriteScale() chunk.AutoScalingConfig {
+	return chunk.AutoScalingConfig{
+		Enabled:     true,
+		MinCapacity: 100,
+		MaxCapacity: 250,
+		OutCooldown: 100,
+		InCooldown:  100,
+		TargetValue: 80.0,
 	}
+}
 
-	test := func(tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
-		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			mtime.NowForce(tm)
-			defer mtime.NowReset()
-			if err := tableManager.SyncTables(ctx); err != nil {
-				t.Fatal(err)
-			}
-			err := chunk.ExpectTables(ctx, client, expected)
-			require.NoError(t, err)
-		})
+func fixturePeriodicTableConfig(prefix string, inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.PeriodicTableConfig {
+	return chunk.PeriodicTableConfig{
+		Prefix: prefix,
+		Period: tablePeriod,
+		From:   util.NewDayValue(model.TimeFromUnix(0)),
+		ProvisionedWriteThroughput: write,
+		ProvisionedReadThroughput:  read,
+		InactiveWriteThroughput:    inactiveWrite,
+		InactiveReadThroughput:     inactiveRead,
+		WriteScale:                 writeScale,
+		InactiveWriteScale:         inactWriteScale,
+		InactiveWriteScaleLastN:    inactLastN,
 	}
+}
 
-	cfg := chunk.SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: chunk.PeriodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale: chunk.AutoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
+func baseTable(name string, provisionedRead, provisionedWrite int64) []chunk.TableDesc {
+	return []chunk.TableDesc{
+		{
+			Name:             name,
+			ProvisionedRead:  provisionedRead,
+			ProvisionedWrite: provisionedWrite,
+		},
+	}
+}
+
+func staticTable(i int, indexRead, indexWrite, chunkRead, chunkWrite int64) []chunk.TableDesc {
+	return []chunk.TableDesc{
+		{
+			Name:             tablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  indexRead,
+			ProvisionedWrite: indexWrite,
+		},
+		{
+			Name:             chunkTablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  chunkRead,
+			ProvisionedWrite: chunkWrite,
 		},
+	}
+}
 
-		ChunkTables: chunk.PeriodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale: chunk.AutoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
+func autoScaledTable(i int, provisionedRead, provisionedWrite int64, indexOutCooldown int64, chunkTarget float64) []chunk.TableDesc {
+	chunkASC, indexASC := fixtureWriteScale(), fixtureWriteScale()
+	indexASC.OutCooldown = indexOutCooldown
+	chunkASC.TargetValue = chunkTarget
+	return []chunk.TableDesc{
+		{
+			Name:             tablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  provisionedRead,
+			ProvisionedWrite: provisionedWrite,
+			WriteScale:       indexASC,
+		},
+		{
+			Name:             chunkTablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  provisionedRead,
+			ProvisionedWrite: provisionedWrite,
+			WriteScale:       chunkASC,
 		},
+	}
+}
 
+func test(t *testing.T, client dynamoTableClient, tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
+	t.Run(name, func(t *testing.T) {
+		ctx := context.Background()
+		mtime.NowForce(tm)
+		defer mtime.NowReset()
+		if err := tableManager.SyncTables(ctx); err != nil {
+			t.Fatal(err)
+		}
+		err := chunk.ExpectTables(ctx, client, expected)
+		require.NoError(t, err)
+	})
+}
+
+func TestTableManagerAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	applicationAutoScaling := newMockApplicationAutoScaling()
+	client := dynamoTableClient{
+		DynamoDB:  dynamoDB,
+		autoscale: &awsAutoscale{ApplicationAutoScaling: applicationAutoScaling},
+	}
+
+	cfg := chunk.SchemaConfig{
+		UsePeriodicTables:   true,
+		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
 		CreationGracePeriod: gracePeriod,
 	}
 
@@ -98,43 +136,12 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"Create tables",
 			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-			},
+			append(baseTable("", inactiveRead, inactiveWrite),
+				autoScaledTable(0, read, write, 100, 80)...),
 		)
 	}
 
@@ -148,43 +155,12 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"Update tables with new settings",
 			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 200,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 90.0,
-					},
-				},
-			},
+			append(baseTable("", inactiveRead, inactiveWrite),
+				autoScaledTable(0, read, write, 200, 90)...),
 		)
 	}
 
@@ -198,59 +174,13 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"Update tables with new settings",
 			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 200,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 90.0,
-					},
-				},
-			},
+			append(append(baseTable("", inactiveRead, inactiveWrite),
+				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
+				autoScaledTable(1, read, write, 200, 90)...),
 		)
 	}
 
@@ -264,49 +194,13 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"Update tables with new settings",
 			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled: false,
-					},
-				},
-			},
+			append(append(baseTable("", inactiveRead, inactiveWrite),
+				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
+				staticTable(1, read, write, read, write)...),
 		)
 	}
 }
@@ -315,63 +209,14 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 	dynamoDB := newMockDynamoDB(0, 0)
 	applicationAutoScaling := newMockApplicationAutoScaling()
 	client := dynamoTableClient{
-		DynamoDB:               dynamoDB,
-		ApplicationAutoScaling: applicationAutoScaling,
-	}
-
-	test := func(tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
-		t.Run(name, func(t *testing.T) {
-			ctx := context.Background()
-			mtime.NowForce(tm)
-			defer mtime.NowReset()
-			if err := tableManager.SyncTables(ctx); err != nil {
-				t.Fatal(err)
-			}
-			err := chunk.ExpectTables(ctx, client, expected)
-			require.NoError(t, err)
-		})
+		DynamoDB:  dynamoDB,
+		autoscale: &awsAutoscale{ApplicationAutoScaling: applicationAutoScaling},
 	}
 
 	cfg := chunk.SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: chunk.PeriodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale: chunk.AutoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-			InactiveWriteScaleLastN: 2,
-		},
-
-		ChunkTables: chunk.PeriodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale: chunk.AutoScalingConfig{
-				Enabled:     true,
-				MinCapacity: 10,
-				MaxCapacity: 20,
-				OutCooldown: 100,
-				InCooldown:  100,
-				TargetValue: 80.0,
-			},
-			InactiveWriteScaleLastN: 2,
-		},
-
+		UsePeriodicTables:   true,
+		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
 		CreationGracePeriod: gracePeriod,
 	}
 
@@ -382,27 +227,12 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"Legacy and latest tables",
 			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
+			append(baseTable("", inactiveRead, inactiveWrite),
+				staticTable(0, read, write, read, write)...),
 		)
 	}
 
@@ -413,53 +243,13 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"1 week of inactive tables with latest",
 			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
+			append(append(baseTable("", inactiveRead, inactiveWrite),
+				autoScaledTable(0, inactiveRead, inactiveWrite, 100, 80)...),
+				staticTable(1, read, write, read, write)...),
 		)
 	}
 
@@ -470,89 +260,15 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 			t.Fatal(err)
 		}
 
-		test(
+		test(t, client,
 			tableManager,
 			"3 weeks of inactive tables with latest",
 			time.Unix(0, 0).Add(tablePeriod*3).Add(maxChunkAge).Add(gracePeriod),
-			[]chunk.TableDesc{
-				{
-					Name:             "",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             chunkTablePrefix + "0",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-				},
-				{
-					Name:             tablePrefix + "1",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "1",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "2",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             chunkTablePrefix + "2",
-					ProvisionedRead:  inactiveRead,
-					ProvisionedWrite: inactiveWrite,
-					WriteScale: chunk.AutoScalingConfig{
-						Enabled:     true,
-						MinCapacity: 10,
-						MaxCapacity: 20,
-						OutCooldown: 100,
-						InCooldown:  100,
-						TargetValue: 80.0,
-					},
-				},
-				{
-					Name:             tablePrefix + "3",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-				{
-					Name:             chunkTablePrefix + "3",
-					ProvisionedRead:  read,
-					ProvisionedWrite: write,
-				},
-			},
+			append(append(append(append(baseTable("", inactiveRead, inactiveWrite),
+				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
+				autoScaledTable(1, inactiveRead, inactiveWrite, 100, 80)...),
+				autoScaledTable(2, inactiveRead, inactiveWrite, 100, 80)...),
+				staticTable(3, read, write, read, write)...),
 		)
 	}
 }
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
new file mode 100644
index 0000000000000..d261f59642a96
--- /dev/null
+++ b/aws/metrics_autoscaling.go
@@ -0,0 +1,259 @@
+package aws
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"time"
+
+	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	promApi "github.com/prometheus/client_golang/api"
+	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
+	"github.com/prometheus/common/model"
+	"github.com/weaveworks/common/mtime"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+const (
+	cachePromDataFor       = 30 * time.Second
+	queueObservationPeriod = 2 * time.Minute
+	targetScaledown        = 0.1 // consider scaling down if queue smaller than this times target
+	targetMax              = 10  // always scale up if queue bigger than this times target
+	errorFractionScaledown = 0.1
+	minUsageForScaledown   = 100 // only scale down if usage is > this DynamoDB units/sec
+
+	// fetch Ingester queue length
+	// average the queue length over 2 minutes to avoid aliasing with the 1-minute flush period
+	defaultQueueLenQuery = `sum(avg_over_time(cortex_ingester_flush_queue_length{job="cortex/ingester"}[2m]))`
+	// fetch write error rate per DynamoDB table
+	defaultErrorRateQuery = `sum(rate(cortex_dynamo_failures_total{error="ProvisionedThroughputExceededException",operation=~".*Write.*"}[1m])) by (table) > 0`
+	// fetch write capacity usage per DynamoDB table
+	// use the rate over 15 minutes so we take a broad average
+	defaultUsageQuery = `sum(rate(cortex_dynamo_consumed_capacity_total{operation="DynamoDB.BatchWriteItem"}[15m])) by (table) > 0`
+)
+
+// MetricsAutoScalingConfig holds parameters to configure how it works
+type MetricsAutoScalingConfig struct {
+	URL              string  // URL to contact Prometheus store on
+	TargetQueueLen   int64   // Queue length above which we will scale up capacity
+	ScaleUpFactor    float64 // Scale up capacity by this multiple
+	QueueLengthQuery string  // Promql query to fetch ingester queue length
+	ErrorRateQuery   string  // Promql query to fetch error rates per table
+	UsageQuery       string  // Promql query to fetch write capacity usage per table
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.URL, "metrics.url", "", "Use metrics-based autoscaling, via this query URL")
+	f.Int64Var(&cfg.TargetQueueLen, "metrics.target-queue-length", 100000, "Queue length above which we will scale up capacity")
+	f.Float64Var(&cfg.ScaleUpFactor, "metrics.scale-up-factor", 1.3, "Scale up capacity by this multiple")
+	f.StringVar(&cfg.QueueLengthQuery, "metrics.queue-length-query", defaultQueueLenQuery, "query to fetch ingester queue length")
+	f.StringVar(&cfg.ErrorRateQuery, "metrics.error-rate-query", defaultErrorRateQuery, "query to fetch error rates per table")
+	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
+}
+
+type metricsData struct {
+	cfg              MetricsAutoScalingConfig
+	promAPI          promV1.API
+	promLastQuery    time.Time
+	tableLastUpdated map[string]time.Time
+	queueLengths     []float64
+	errorRates       map[string]float64
+	usageRates       map[string]float64
+}
+
+func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
+	client, err := promApi.NewClient(promApi.Config{Address: cfg.Metrics.URL})
+	if err != nil {
+		return nil, err
+	}
+	return &metricsData{
+		promAPI:          promV1.NewAPI(client),
+		cfg:              cfg.Metrics,
+		tableLastUpdated: make(map[string]time.Time),
+	}, nil
+}
+
+func (m *metricsData) PostCreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	return nil
+}
+
+func (m *metricsData) DescribeTable(ctx context.Context, desc *chunk.TableDesc) error {
+	return nil
+}
+
+func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
+	if err := m.update(ctx); err != nil {
+		return err
+	}
+
+	errorRate := m.errorRates[expected.Name]
+	usageRate := m.usageRates[expected.Name]
+
+	level.Info(util.Logger).Log("msg", "checking metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "errorRate", errorRate, "usageRate", usageRate)
+
+	// If we don't take explicit action, return the current provision as the expected provision
+	expected.ProvisionedWrite = current.ProvisionedWrite
+
+	switch {
+	case errorRate < errorFractionScaledown*float64(current.ProvisionedWrite) &&
+		m.queueLengths[2] < float64(m.cfg.TargetQueueLen)*targetScaledown:
+		// No big queue, low errors -> scale down
+		m.scaleDownWrite(current, expected, m.computeScaleDown(current, *expected), "metrics scale-down")
+	case errorRate == 0 &&
+		m.queueLengths[2] < m.queueLengths[1] && m.queueLengths[1] < m.queueLengths[0]:
+		// zero errors and falling queue -> scale down to current usage
+		m.scaleDownWrite(current, expected, m.computeScaleDown(current, *expected), "zero errors scale-down")
+	case errorRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
+		// Too big queue, some errors -> scale up
+		m.scaleUpWrite(current, expected, m.computeScaleUp(current, *expected), "metrics max queue scale-up")
+	case errorRate > 0 &&
+		m.queueLengths[2] > float64(m.cfg.TargetQueueLen) &&
+		m.queueLengths[2] > m.queueLengths[1] && m.queueLengths[1] > m.queueLengths[0]:
+		// Growing queue, some errors -> scale up
+		m.scaleUpWrite(current, expected, m.computeScaleUp(current, *expected), "metrics queue growing scale-up")
+	}
+	return nil
+}
+
+func (m metricsData) computeScaleDown(current, expected chunk.TableDesc) int64 {
+	usageRate := m.usageRates[expected.Name]
+	return int64(usageRate * 100.0 / expected.WriteScale.TargetValue)
+}
+
+func (m metricsData) computeScaleUp(current, expected chunk.TableDesc) int64 {
+	scaleUp := int64(float64(current.ProvisionedWrite) * m.cfg.ScaleUpFactor)
+	// Scale up minimum of 10% of max capacity, to avoid futzing around at low levels
+	minIncrement := expected.WriteScale.MaxCapacity / 10
+	if scaleUp < current.ProvisionedWrite+minIncrement {
+		scaleUp = current.ProvisionedWrite + minIncrement
+	}
+	return scaleUp
+}
+
+func (m *metricsData) scaleDownWrite(current chunk.TableDesc, expected *chunk.TableDesc, newWrite int64, msg string) {
+	if newWrite < expected.WriteScale.MinCapacity {
+		newWrite = expected.WriteScale.MinCapacity
+	}
+	// If we're already at or below the requested value, it's not a scale-down.
+	if newWrite >= current.ProvisionedWrite {
+		return
+	}
+	earliest := m.tableLastUpdated[current.Name].Add(time.Duration(expected.WriteScale.InCooldown) * time.Second)
+	if earliest.After(mtime.Now()) {
+		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", current.Name, "till", earliest)
+		return
+	}
+	// Reject a change that is less than 20% - AWS rate-limits scale-downs so save
+	// our chances until it makes a bigger difference
+	if newWrite > current.ProvisionedWrite*4/5 {
+		level.Info(util.Logger).Log("msg", "rejected de minimis "+msg, "table", current.Name, "current", current.ProvisionedWrite, "proposed", newWrite)
+		return
+	}
+	// Check that the ingesters seem to be doing some work - don't want to scale down
+	// if all our metrics are returning zero, or all the ingesters have crashed, etc
+	totalUsage := 0.0
+	for _, u := range m.usageRates {
+		totalUsage += u
+	}
+	if totalUsage < minUsageForScaledown {
+		level.Info(util.Logger).Log("msg", "rejected low usage "+msg, "table", current.Name, "totalUsage", totalUsage)
+		return
+	}
+
+	level.Info(util.Logger).Log("msg", msg, "table", current.Name, "write", newWrite)
+	expected.ProvisionedWrite = newWrite
+	m.tableLastUpdated[current.Name] = mtime.Now()
+}
+
+func (m *metricsData) scaleUpWrite(current chunk.TableDesc, expected *chunk.TableDesc, newWrite int64, msg string) {
+	if newWrite > expected.WriteScale.MaxCapacity {
+		newWrite = expected.WriteScale.MaxCapacity
+	}
+	earliest := m.tableLastUpdated[current.Name].Add(time.Duration(expected.WriteScale.OutCooldown) * time.Second)
+	if earliest.After(mtime.Now()) {
+		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", current.Name, "till", earliest)
+		return
+	}
+	if newWrite > current.ProvisionedWrite {
+		level.Info(util.Logger).Log("msg", msg, "table", current.Name, "write", newWrite)
+		expected.ProvisionedWrite = newWrite
+		m.tableLastUpdated[current.Name] = mtime.Now()
+	}
+}
+
+func (m *metricsData) update(ctx context.Context) error {
+	if m.promLastQuery.After(mtime.Now().Add(-cachePromDataFor)) {
+		return nil
+	}
+
+	m.promLastQuery = mtime.Now()
+	qlMatrix, err := promQuery(ctx, m.promAPI, m.cfg.QueueLengthQuery, queueObservationPeriod, queueObservationPeriod/2)
+	if err != nil {
+		return err
+	}
+	if len(qlMatrix) != 1 {
+		return errors.Errorf("expected one sample stream for queue: %d", len(qlMatrix))
+	}
+	if len(qlMatrix[0].Values) != 3 {
+		return errors.Errorf("expected three values: %d", len(qlMatrix[0].Values))
+	}
+	m.queueLengths = make([]float64, len(qlMatrix[0].Values))
+	for i, v := range qlMatrix[0].Values {
+		m.queueLengths[i] = float64(v.Value)
+	}
+
+	deMatrix, err := promQuery(ctx, m.promAPI, m.cfg.ErrorRateQuery, 0, time.Second)
+	if err != nil {
+		return err
+	}
+	if m.errorRates, err = extractRates(deMatrix); err != nil {
+		return err
+	}
+
+	usageMatrix, err := promQuery(ctx, m.promAPI, m.cfg.UsageQuery, 0, time.Second)
+	if err != nil {
+		return err
+	}
+	if m.usageRates, err = extractRates(usageMatrix); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func extractRates(matrix model.Matrix) (map[string]float64, error) {
+	ret := map[string]float64{}
+	for _, s := range matrix {
+		table, found := s.Metric["table"]
+		if !found {
+			continue
+		}
+		if len(s.Values) != 1 {
+			return nil, errors.Errorf("expected one sample for table %s: %d", table, len(s.Values))
+		}
+		ret[string(table)] = float64(s.Values[0].Value)
+	}
+	return ret, nil
+}
+
+func promQuery(ctx context.Context, promAPI promV1.API, query string, duration, step time.Duration) (model.Matrix, error) {
+	queryRange := promV1.Range{
+		Start: mtime.Now().Add(-duration),
+		End:   mtime.Now(),
+		Step:  step,
+	}
+
+	value, err := promAPI.QueryRange(ctx, query, queryRange)
+	if err != nil {
+		return nil, err
+	}
+	matrix, ok := value.(model.Matrix)
+	if !ok {
+		return nil, fmt.Errorf("Unable to convert value to matrix: %#v", value)
+	}
+	return matrix, nil
+}
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
new file mode 100644
index 0000000000000..0534e4d899723
--- /dev/null
+++ b/aws/metrics_autoscaling_test.go
@@ -0,0 +1,244 @@
+package aws
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/pkg/errors"
+	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
+	"github.com/prometheus/common/model"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+func TestTableManagerMetricsAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	mockProm := mockPrometheus{}
+
+	client := dynamoTableClient{
+		DynamoDB: dynamoDB,
+		autoscale: &metricsData{
+			promAPI: &mockProm,
+			cfg: MetricsAutoScalingConfig{
+				TargetQueueLen: 100000,
+				ScaleUpFactor:  1.2,
+			},
+			tableLastUpdated: make(map[string]time.Time),
+		},
+	}
+
+	indexWriteScale := fixtureWriteScale()
+	chunkWriteScale := fixtureWriteScale()
+	chunkWriteScale.MaxCapacity /= 5
+	chunkWriteScale.MinCapacity /= 5
+	inactiveWriteScale := fixtureWriteScale()
+	inactiveWriteScale.MinCapacity = 5
+
+	// Set up table-manager config
+	cfg := chunk.SchemaConfig{
+		OriginalTableName:   "a",
+		UsePeriodicTables:   true,
+		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 2, indexWriteScale, inactiveWriteScale),
+		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 2, chunkWriteScale, inactiveWriteScale),
+		CreationGracePeriod: gracePeriod,
+	}
+
+	tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create tables
+	startTime := time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod)
+
+	test(t, client, tableManager, "Create tables",
+		startTime,
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, write, read, write)...),
+	)
+
+	mockProm.SetResponse(0, 100000, 100000, []int{0, 0}, []int{100, 20})
+	test(t, client, tableManager, "Queues but no errors",
+		startTime.Add(time.Minute*10),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, write, read, write)...), // - remain flat
+	)
+
+	mockProm.SetResponse(0, 120000, 100000, []int{100, 200}, []int{100, 20})
+	test(t, client, tableManager, "Shrinking queues",
+		startTime.Add(time.Minute*20),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, write, read, write)...), //  - remain flat
+	)
+
+	mockProm.SetResponse(0, 120000, 200000, []int{100, 0}, []int{100, 20})
+	test(t, client, tableManager, "Building queues",
+		startTime.Add(time.Minute*30),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 240, read, write)...), // - scale up index table
+	)
+
+	mockProm.SetResponse(0, 5000000, 5000000, []int{1, 0}, []int{100, 20})
+	test(t, client, tableManager, "Large queues small errors",
+		startTime.Add(time.Minute*40),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 250, read, write)...), // - scale up index table
+	)
+
+	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{120, 40})
+	test(t, client, tableManager, "No queues no errors",
+		startTime.Add(time.Minute*100),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 150, read, 50)...), // - scale down both tables
+	)
+
+	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{50, 10})
+	test(t, client, tableManager, "in cooldown period",
+		startTime.Add(time.Minute*101),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 150, read, 50)...), // - no change; in cooldown period
+	)
+
+	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{90, 10})
+	test(t, client, tableManager, "No queues no errors",
+		startTime.Add(time.Minute*200),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 112, read, 20)...), // - scale down both again
+	)
+
+	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{50, 10})
+	test(t, client, tableManager, "de minimis change",
+		startTime.Add(time.Minute*220),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, 112, read, 20)...), // - should see no change
+	)
+
+	mockProm.SetResponse(0, 0, 0, []int{30, 30, 30, 30}, []int{50, 10, 100, 20})
+	test(t, client, tableManager, "Next week",
+		startTime.Add(tablePeriod),
+		// Nothing much happening - expect table 0 write rates to stay as-is and table 1 to be created with defaults
+		append(append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, inactiveRead, 112, inactiveRead, 20)...),
+			staticTable(1, read, write, read, write)...),
+	)
+
+	// No errors on last week's index table, still some on chunk table
+	mockProm.SetResponse(0, 0, 0, []int{0, 30, 30, 30}, []int{10, 2, 100, 20})
+	test(t, client, tableManager, "Next week plus a bit",
+		startTime.Add(tablePeriod).Add(time.Minute*10),
+		append(append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, inactiveRead, 12, inactiveRead, 20)...), // Scale back last week's index table
+			staticTable(1, read, write, read, write)...),
+	)
+
+	// No errors on last week's tables but some queueing
+	mockProm.SetResponse(20000, 20000, 20000, []int{0, 0, 1, 1}, []int{0, 0, 100, 20})
+	test(t, client, tableManager, "Next week plus a bit",
+		startTime.Add(tablePeriod).Add(time.Minute*20),
+		append(append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, inactiveRead, 12, inactiveRead, 20)...), // no scaling back
+			staticTable(1, read, write, read, write)...),
+	)
+
+	mockProm.SetResponse(120000, 130000, 140000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
+	test(t, client, tableManager, "next week, queues building, errors on index table",
+		startTime.Add(tablePeriod).Add(time.Minute*30),
+		append(append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, inactiveRead, 12, inactiveRead, 20)...), // no scaling back
+			staticTable(1, read, 240, read, write)...), // scale up index table
+	)
+
+	mockProm.SetResponse(140000, 130000, 120000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
+	test(t, client, tableManager, "next week, queues shrinking, errors on index table",
+		startTime.Add(tablePeriod).Add(time.Minute*40),
+		append(append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, inactiveRead, 5, inactiveRead, 5)...), // scale right back
+			staticTable(1, read, 240, read, 25)...), // scale chunk table to usage/80%
+	)
+}
+
+// Helper to return pre-canned results to Prometheus queries
+type mockPrometheus struct {
+	rangeValues []model.Value
+}
+
+func (m *mockPrometheus) SetResponse(q0, q1, q2 model.SampleValue, errorRates ...[]int) {
+	// Mock metrics from Prometheus
+	m.rangeValues = []model.Value{
+		// Queue lengths
+		model.Matrix{
+			&model.SampleStream{Values: []model.SamplePair{
+				{Timestamp: 0, Value: q0},
+				{Timestamp: 15000, Value: q1},
+				{Timestamp: 30000, Value: q2},
+			}},
+		},
+	}
+	for _, rates := range errorRates {
+		errorMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+			errorMatrix = append(errorMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2])}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2+1])}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, errorMatrix)
+	}
+}
+
+func (m *mockPrometheus) Query(ctx context.Context, query string, ts time.Time) (model.Value, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, error) {
+	if len(m.rangeValues) == 0 {
+		return nil, errors.New("mockPrometheus.QueryRange: out of values")
+	}
+	// Take the first value and move the slice up
+	ret := m.rangeValues[0]
+	m.rangeValues = m.rangeValues[1:]
+	return ret, nil
+}
+
+func (m *mockPrometheus) LabelValues(ctx context.Context, label string) (model.LabelValues, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) AlertManagers(ctx context.Context) (promV1.AlertManagersResult, error) {
+	return promV1.AlertManagersResult{}, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) CleanTombstones(ctx context.Context) error {
+	return errors.New("not implemented")
+}
+
+func (m *mockPrometheus) Config(ctx context.Context) (promV1.ConfigResult, error) {
+	return promV1.ConfigResult{}, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error {
+	return errors.New("not implemented")
+}
+
+func (m *mockPrometheus) Flags(ctx context.Context) (promV1.FlagsResult, error) {
+	return nil, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) Snapshot(ctx context.Context, skipHead bool) (promV1.SnapshotResult, error) {
+	return promV1.SnapshotResult{}, errors.New("not implemented")
+}
+
+func (m *mockPrometheus) Targets(ctx context.Context) (promV1.TargetsResult, error) {
+	return promV1.TargetsResult{}, errors.New("not implemented")
+}
diff --git a/aws/storage_client.go b/aws/storage_client.go
index 815e30c318929..3bf4b168694f2 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -110,6 +110,7 @@ type DynamoDBConfig struct {
 	DynamoDB               util.URLValue
 	APILimit               float64
 	ApplicationAutoScaling util.URLValue
+	Metrics                MetricsAutoScalingConfig
 	ChunkGangSize          int
 	ChunkGetMaxParallelism int
 	backoffConfig          util.BackoffConfig
@@ -126,6 +127,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
 	f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
 	f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
+	cfg.Metrics.RegisterFlags(f)
 }
 
 // StorageConfig specifies config for storing data on AWS.
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 45865d2bba82e..6fd058aad2dfe 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 
-	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
 
@@ -51,10 +50,10 @@ func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 	return errors.WithStack(err)
 }
 
-func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
+func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,
-	}, dynamodb.TableStatusActive, nil
+	}, true, nil
 }
 
 func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 15e38cdbcd465..8e2b4359e0c7f 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -5,7 +5,6 @@ import (
 	"strings"
 
 	"cloud.google.com/go/bigtable"
-	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"google.golang.org/grpc/status"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -75,10 +74,10 @@ func alreadyExistsError(err error) bool {
 	return ok && strings.Contains(serr.Message(), "already exists")
 }
 
-func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, status string, err error) {
+func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,
-	}, dynamodb.TableStatusActive, nil
+	}, true, nil
 }
 
 func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 3dea4728db927..ccb868468103c 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -7,7 +7,6 @@ import (
 	"sort"
 	"sync"
 
-	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/util"
 )
@@ -70,20 +69,20 @@ func (m *MockStorage) CreateTable(_ context.Context, desc TableDesc) error {
 }
 
 // DescribeTable implements StorageClient.
-func (m *MockStorage) DescribeTable(_ context.Context, name string) (desc TableDesc, status string, err error) {
+func (m *MockStorage) DescribeTable(_ context.Context, name string) (desc TableDesc, isActive bool, err error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
 	table, ok := m.tables[name]
 	if !ok {
-		return TableDesc{}, "", fmt.Errorf("not found")
+		return TableDesc{}, false, fmt.Errorf("not found")
 	}
 
 	return TableDesc{
 		Name:             name,
 		ProvisionedRead:  table.read,
 		ProvisionedWrite: table.write,
-	}, dynamodb.TableStatusActive, nil
+	}, true, nil
 }
 
 // UpdateTable implements StorageClient.
diff --git a/schema_config.go b/schema_config.go
index d9e65862bef98..b5d92ea9d1fd0 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -208,8 +208,8 @@ func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.RoleARN, argPrefix+".role-arn", "", "AWS AutoScaling role ARN")
 	f.Int64Var(&cfg.MinCapacity, argPrefix+".min-capacity", 3000, "DynamoDB minimum provision capacity.")
 	f.Int64Var(&cfg.MaxCapacity, argPrefix+".max-capacity", 6000, "DynamoDB maximum provision capacity.")
-	f.Int64Var(&cfg.OutCooldown, argPrefix+".out-cooldown", 3000, "DynamoDB minimum time between each autoscaling event that increases provision capacity.")
-	f.Int64Var(&cfg.InCooldown, argPrefix+".in-cooldown", 3000, "DynamoDB minimum time between each autoscaling event that decreases provision capacity.")
+	f.Int64Var(&cfg.OutCooldown, argPrefix+".out-cooldown", 1800, "DynamoDB minimum seconds between each autoscale up.")
+	f.Int64Var(&cfg.InCooldown, argPrefix+".in-cooldown", 1800, "DynamoDB minimum seconds between each autoscale down.")
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
diff --git a/table_client.go b/table_client.go
index 0898fde0c8a94..24d175229b14a 100644
--- a/table_client.go
+++ b/table_client.go
@@ -6,7 +6,7 @@ import "context"
 type TableClient interface {
 	ListTables(ctx context.Context) ([]string, error)
 	CreateTable(ctx context.Context, desc TableDesc) error
-	DescribeTable(ctx context.Context, name string) (desc TableDesc, status string, err error)
+	DescribeTable(ctx context.Context, name string) (desc TableDesc, isActive bool, err error)
 	UpdateTable(ctx context.Context, current, expected TableDesc) error
 }
 
diff --git a/table_manager.go b/table_manager.go
index 2f8b2f70ec7fe..947f7ab2a606e 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -8,8 +8,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/aws/aws-sdk-go/aws"
-	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
@@ -84,22 +82,6 @@ func (ts Tags) Equals(other Tags) bool {
 	return true
 }
 
-// AWSTags converts ts into a []*dynamodb.Tag.
-func (ts Tags) AWSTags() []*dynamodb.Tag {
-	if ts == nil {
-		return nil
-	}
-
-	var result []*dynamodb.Tag
-	for k, v := range ts {
-		result = append(result, &dynamodb.Tag{
-			Key:   aws.String(k),
-			Value: aws.String(v),
-		})
-	}
-	return result
-}
-
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	client      TableClient
@@ -269,8 +251,8 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, expected := range descriptions {
-		level.Info(util.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
-		current, status, err := m.client.DescribeTable(ctx, expected.Name)
+		level.Debug(util.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
+		current, isActive, err := m.client.DescribeTable(ctx, expected.Name)
 		if err != nil {
 			return err
 		}
@@ -282,8 +264,8 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 			continue
 		}
 
-		if status != dynamodb.TableStatusActive {
-			level.Info(util.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name, "status", status)
+		if !isActive {
+			level.Info(util.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name)
 			continue
 		}
 
@@ -292,7 +274,6 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 			continue
 		}
 
-		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		err = m.client.UpdateTable(ctx, current, expected)
 		if err != nil {
 			return err
diff --git a/table_manager_test.go b/table_manager_test.go
index 1014aa22706f1..37c02a35899b3 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -6,7 +6,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
@@ -60,11 +59,11 @@ func (m *mockTableClient) CreateTable(_ context.Context, desc TableDesc) error {
 	return nil
 }
 
-func (m *mockTableClient) DescribeTable(_ context.Context, name string) (desc TableDesc, status string, err error) {
+func (m *mockTableClient) DescribeTable(_ context.Context, name string) (desc TableDesc, isActive bool, err error) {
 	m.Lock()
 	defer m.Unlock()
 
-	return m.tables[name], dynamodb.TableStatusActive, nil
+	return m.tables[name], true, nil
 }
 
 func (m *mockTableClient) UpdateTable(_ context.Context, current, expected TableDesc) error {

From 0d0c578c693695c21ec829fe20bf9250c9499006 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 12 Jul 2018 10:55:51 +0100
Subject: [PATCH 108/660] Move compositeSchema abstraction up to
 componsiteStore.

Promote the composite schema abstraction to "composite chunk store" - a chunk store which delegates to different chunk stores based on time.  This allows us to vary the store implementation over time, and not just the schema.  This will unblock the new bigtable storage adapter (using columns instead of rows), and allow us to more easily implement the iterative intersections and indexing of series instead of chunks.

Corner case when a writing chunks which span multiple stores: they are written to both stores, and instead of using the chunk start/end we use the schema start/end.  This will lead to duplication of the index entries on schema migrations, but is actually already the case for day boundaries anyway.  It will lead to duplicate writes of the chunk on schema migrations - they should be deduped by the underlying store.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go          | 111 ++++++++----------
 chunk_store_test.go     | 246 +++++++++++++++++-----------------------
 composite_store.go      | 201 ++++++++++++++++++++++++++++++++
 composite_store_test.go | 171 ++++++++++++++++++++++++++++
 schema_config.go        | 200 --------------------------------
 schema_config_test.go   | 145 -----------------------
 schema_test.go          | 104 -----------------
 7 files changed, 525 insertions(+), 653 deletions(-)
 create mode 100644 composite_store.go
 create mode 100644 composite_store_test.go

diff --git a/chunk_store.go b/chunk_store.go
index 24d3b1c1c208e..5a6b93e133570 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -57,9 +57,6 @@ type StoreConfig struct {
 
 	MinChunkAge     time.Duration
 	QueryChunkLimit int
-
-	// For injecting different schemas in tests.
-	schemaFactory func(cfg SchemaConfig) Schema
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -69,8 +66,8 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
 }
 
-// Store implements Store
-type Store struct {
+// store implements Store
+type store struct {
 	cfg StoreConfig
 
 	storage StorageClient
@@ -78,25 +75,13 @@ type Store struct {
 	schema  Schema
 }
 
-// NewStore makes a new ChunkStore
-func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (*Store, error) {
-	var schema Schema
-	var err error
-	if cfg.schemaFactory == nil {
-		schema, err = newCompositeSchema(schemaCfg)
-	} else {
-		schema = cfg.schemaFactory(schemaCfg)
-	}
-	if err != nil {
-		return nil, err
-	}
-
+func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
 	cache, err := cache.New(cfg.CacheConfig)
 	if err != nil {
 		return nil, err
 	}
 
-	return &Store{
+	return &store{
 		cfg:     cfg,
 		storage: storage,
 		schema:  schema,
@@ -105,28 +90,40 @@ func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (*
 }
 
 // Stop any background goroutines (ie in the cache.)
-func (c *Store) Stop() {
+func (c *store) Stop() {
 	c.cache.Stop()
 }
 
 // Put implements ChunkStore
-func (c *Store) Put(ctx context.Context, chunks []Chunk) error {
+func (c *store) Put(ctx context.Context, chunks []Chunk) error {
+	for _, chunk := range chunks {
+		if err := c.PutOne(ctx, chunk.From, chunk.Through, chunk); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// PutOne implements ChunkStore
+func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return err
 	}
 
+	// Horribly, PutChunks mutates the chunk by setting its checksum.  By putting
+	// the chunk in a slice we are in fact passing by reference, so below we
+	// need to make sure we pick the chunk back out the slice.
+	chunks := []Chunk{chunk}
+
 	err = c.storage.PutChunks(ctx, chunks)
 	if err != nil {
 		return err
 	}
 
 	c.writeBackCache(ctx, chunks)
-	return c.updateIndex(ctx, userID, chunks)
-}
 
-func (c *Store) updateIndex(ctx context.Context, userID string, chunks []Chunk) error {
-	writeReqs, err := c.calculateDynamoWrites(userID, chunks)
+	writeReqs, err := c.calculateIndexEntries(userID, from, through, chunks[0])
 	if err != nil {
 		return err
 	}
@@ -134,40 +131,32 @@ func (c *Store) updateIndex(ctx context.Context, userID string, chunks []Chunk)
 	return c.storage.BatchWrite(ctx, writeReqs)
 }
 
-// calculateDynamoWrites creates a set of batched WriteRequests to dynamo for all
-// the chunks it is given.
-func (c *Store) calculateDynamoWrites(userID string, chunks []Chunk) (WriteBatch, error) {
+// calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
+func (c *store) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, error) {
 	seenIndexEntries := map[string]struct{}{}
 
-	writeReqs := c.storage.NewWriteBatch()
-	for _, chunk := range chunks {
-		metricName, err := extract.MetricNameFromMetric(chunk.Metric)
-		if err != nil {
-			return nil, err
-		}
+	metricName, err := extract.MetricNameFromMetric(chunk.Metric)
+	if err != nil {
+		return nil, err
+	}
 
-		entries, err := c.schema.GetWriteEntries(chunk.From, chunk.Through, userID, metricName, chunk.Metric, chunk.ExternalKey())
-		if err != nil {
-			return nil, err
-		}
-		indexEntriesPerChunk.Observe(float64(len(entries)))
-
-		// Remove duplicate entries based on tableName:hashValue:rangeValue
-		unseenEntries := []IndexEntry{}
-		for _, entry := range entries {
-			key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
-			if _, ok := seenIndexEntries[key]; !ok {
-				seenIndexEntries[key] = struct{}{}
-				unseenEntries = append(unseenEntries, entry)
-			}
-		}
+	entries, err := c.schema.GetWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
+	if err != nil {
+		return nil, err
+	}
+	indexEntriesPerChunk.Observe(float64(len(entries)))
 
-		for _, entry := range unseenEntries {
+	// Remove duplicate entries based on tableName:hashValue:rangeValue
+	result := c.storage.NewWriteBatch()
+	for _, entry := range entries {
+		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
+		if _, ok := seenIndexEntries[key]; !ok {
+			seenIndexEntries[key] = struct{}{}
 			rowWrites.Observe(entry.HashValue, 1)
-			writeReqs.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+			result.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
 		}
 	}
-	return writeReqs, nil
+	return result, nil
 }
 
 // spanLogger unifies tracing and logging, to reduce repetition.
@@ -195,7 +184,7 @@ func (s *spanLogger) Log(kvps ...interface{}) error {
 }
 
 // Get implements ChunkStore
-func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
 
@@ -234,7 +223,7 @@ func (c *Store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	return c.getSeriesChunks(ctx, from, through, matchers, metricNameMatcher)
 }
 
-func (c *Store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
+func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
 	defer log.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
@@ -338,7 +327,7 @@ func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found [
 	return
 }
 
-func (c *Store) getSeriesChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) ([]Chunk, error) {
+func (c *store) getSeriesChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) ([]Chunk, error) {
 	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
@@ -402,7 +391,7 @@ outer:
 	return chunks, nil
 }
 
-func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
+func (c *store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksByMetricName")
 	defer log.Finish()
 
@@ -497,7 +486,7 @@ func (c *Store) lookupChunksByMetricName(ctx context.Context, from, through mode
 	return c.convertChunkIDsToChunks(ctx, chunkIDs)
 }
 
-func (c *Store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	incomingEntries := make(chan []IndexEntry)
 	incomingErrors := make(chan error)
 	for _, query := range queries {
@@ -526,7 +515,7 @@ func (c *Store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 	return entries, lastErr
 }
 
-func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
+func (c *store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
 
 	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch) (shouldContinue bool) {
@@ -547,7 +536,7 @@ func (c *Store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 	return entries, nil
 }
 
-func (c *Store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
 	result := make([]string, 0, len(entries))
 
 	for _, entry := range entries {
@@ -569,7 +558,7 @@ func (c *Store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
 	return result, nil
 }
 
-func (c *Store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string) ([]Chunk, error) {
+func (c *store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string) ([]Chunk, error) {
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
 		return nil, err
@@ -587,7 +576,7 @@ func (c *Store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string)
 	return chunkSet, nil
 }
 
-func (c *Store) writeBackCache(ctx context.Context, chunks []Chunk) error {
+func (c *store) writeBackCache(ctx context.Context, chunks []Chunk) error {
 	for i := range chunks {
 		encoded, err := chunks[i].Encode()
 		if err != nil {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 9bf523de0b3a4..404cf4f4d9c00 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -13,23 +13,50 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"github.com/weaveworks/cortex/pkg/util/extract"
 	"golang.org/x/net/context"
 
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
+type schemaFactory func(cfg SchemaConfig) Schema
+type storeFactory func(StoreConfig, Schema, StorageClient) (Store, error)
+
+var schemas = []struct {
+	name              string
+	schemaFn          schemaFactory
+	storeFn           storeFactory
+	requireMetricName bool
+}{
+	{"v1 schema", v1Schema, newStore, true},
+	{"v2 schema", v2Schema, newStore, true},
+	{"v3 schema", v3Schema, newStore, true},
+	{"v4 schema", v4Schema, newStore, true},
+	{"v5 schema", v5Schema, newStore, true},
+	{"v6 schema", v6Schema, newStore, true},
+	{"v7 schema", v7Schema, newStore, true},
+	{"v8 schema", v8Schema, newStore, false},
+}
+
 // newTestStore creates a new Store for testing.
-func newTestChunkStore(t *testing.T, cfg StoreConfig) *Store {
+func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory storeFactory) Store {
+	var (
+		storeCfg  StoreConfig
+		schemaCfg SchemaConfig
+	)
+	util.DefaultValues(&storeCfg, &schemaCfg)
+
 	storage := NewMockStorage()
-	schemaCfg := SchemaConfig{}
 	tableManager, err := NewTableManager(schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
+
 	err = tableManager.SyncTables(context.Background())
 	require.NoError(t, err)
-	store, err := NewStore(cfg, schemaCfg, storage)
+
+	store, err := storeFactory(storeCfg, schemaFactory(schemaCfg), storage)
 	require.NoError(t, err)
 	return store
 }
@@ -102,21 +129,6 @@ func TestChunkStore_Get(t *testing.T) {
 	barSampleStream2, err := createSampleStreamFrom(barChunk2)
 	require.NoError(t, err)
 
-	schemas := []struct {
-		name              string
-		fn                func(cfg SchemaConfig) Schema
-		requireMetricName bool
-	}{
-		{"v1 schema", v1Schema, true},
-		{"v2 schema", v2Schema, true},
-		{"v3 schema", v3Schema, true},
-		{"v4 schema", v4Schema, true},
-		{"v5 schema", v5Schema, true},
-		{"v6 schema", v6Schema, true},
-		{"v7 schema", v7Schema, true},
-		{"v8 schema", v8Schema, false},
-	}
-
 	for _, tc := range []struct {
 		query  string
 		expect model.Matrix
@@ -193,10 +205,7 @@ func TestChunkStore_Get(t *testing.T) {
 		for _, schema := range schemas {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
-				store := newTestChunkStore(t, StoreConfig{
-					schemaFactory:   schema.fn,
-					QueryChunkLimit: 2e6,
-				})
+				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
 
 				if err := store.Put(ctx, []Chunk{
 					fooChunk1,
@@ -226,8 +235,6 @@ func TestChunkStore_Get(t *testing.T) {
 
 				sort.Sort(ByFingerprint(matrix1))
 				if !reflect.DeepEqual(tc.expect, matrix1) {
-					t.Fatalf("jml\nstart = %#v\nnow = %#v\nfooChunk1 = %#v\nfooChunk2 = %#v\nbarChunk1 = %#v\nbarChunk2 = %#v\n",
-						now.Add(-time.Hour), now, fooChunk1, fooChunk2, barChunk1, barChunk2)
 					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
 				}
 
@@ -259,7 +266,6 @@ func TestChunkStore_Get(t *testing.T) {
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
-	metricName := "foo"
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar":  "baz",
@@ -272,83 +278,62 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 		"toms": "code",
 	})
 
-	schemas := []struct {
-		name string
-		fn   func(cfg SchemaConfig) Schema
-	}{
-		{"v1 schema", v1Schema},
-		{"v2 schema", v2Schema},
-		{"v3 schema", v3Schema},
-		{"v4 schema", v4Schema},
-		{"v5 schema", v5Schema},
-		{"v6 schema", v6Schema},
-		{"v7 schema", v7Schema},
-		{"v8 schema", v8Schema},
-	}
-
 	for _, tc := range []struct {
-		query    string
-		expect   []Chunk
-		matchers []*labels.Matcher
+		query  string
+		expect []Chunk
 	}{
 		{
 			`foo`,
 			[]Chunk{chunk1, chunk2},
-			[]*labels.Matcher{},
 		},
 		{
 			`foo{flip=""}`,
 			[]Chunk{chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "flip", "")},
 		},
 		{
 			`foo{bar="baz"}`,
 			[]Chunk{chunk1},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")},
 		},
 		{
 			`foo{bar="beep"}`,
 			[]Chunk{chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "beep")},
 		},
 		{
 			`foo{toms="code"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code")},
 		},
 		{
 			`foo{bar!="baz"}`,
 			[]Chunk{chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchNotEqual, "bar", "baz")},
 		},
 		{
 			`foo{bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchRegexp, "bar", "beep|baz")},
 		},
 		{
 			`foo{toms="code", bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
-			[]*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code"), mustNewLabelMatcher(labels.MatchRegexp, "bar", "beep|baz")},
 		},
 		{
 			`foo{toms="code", bar="baz"}`,
-			[]Chunk{chunk1}, []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "toms", "code"), mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")},
+			[]Chunk{chunk1},
 		},
 	} {
 		for _, schema := range schemas {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
-				store := newTestChunkStore(t, StoreConfig{
-					schemaFactory:   schema.fn,
-					QueryChunkLimit: 2e6,
-				})
+				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
 
 				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
 					t.Fatal(err)
 				}
 
-				chunks, err := store.getMetricNameChunks(ctx, now.Add(-time.Hour), now, tc.matchers, metricName)
+				matchers, err := promql.ParseMetricSelector(tc.query)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
 				require.NoError(t, err)
 
 				if !reflect.DeepEqual(tc.expect, chunks) {
@@ -369,96 +354,73 @@ func mustNewLabelMatcher(matchType labels.MatchType, name string, value string)
 
 func TestChunkStoreRandom(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
-	schemas := []struct {
-		name  string
-		fn    func(cfg SchemaConfig) Schema
-		store *Store
-	}{
-		{name: "v1 schema", fn: v1Schema},
-		{name: "v2 schema", fn: v2Schema},
-		{name: "v3 schema", fn: v3Schema},
-		{name: "v4 schema", fn: v4Schema},
-		{name: "v5 schema", fn: v5Schema},
-		{name: "v6 schema", fn: v6Schema},
-		{name: "v7 schema", fn: v7Schema},
-		{name: "v8 schema", fn: v8Schema},
-	}
 
-	for i := range schemas {
-		schemas[i].store = newTestChunkStore(t, StoreConfig{
-			schemaFactory:   schemas[i].fn,
-			QueryChunkLimit: 2e6,
-		})
-	}
+	for _, schema := range schemas {
+		t.Run(schema.name, func(t *testing.T) {
+			store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+
+			// put 100 chunks from 0 to 99
+			const chunkLen = 13 * 3600 // in seconds
+			for i := 0; i < 100; i++ {
+				ts := model.TimeFromUnix(int64(i * chunkLen))
+				chunks, _ := chunk.New().Add(model.SamplePair{
+					Timestamp: ts,
+					Value:     model.SampleValue(float64(i)),
+				})
+				chunk := NewChunk(
+					userID,
+					model.Fingerprint(1),
+					model.Metric{
+						model.MetricNameLabel: "foo",
+						"bar": "baz",
+					},
+					chunks[0],
+					ts,
+					ts.Add(chunkLen*time.Second),
+				)
+
+				err := store.Put(ctx, []Chunk{chunk})
+				require.NoError(t, err)
+			}
 
-	// put 100 chunks from 0 to 99
-	const chunkLen = 13 * 3600 // in seconds
-	for i := 0; i < 100; i++ {
-		ts := model.TimeFromUnix(int64(i * chunkLen))
-		chunks, _ := chunk.New().Add(model.SamplePair{
-			Timestamp: ts,
-			Value:     model.SampleValue(float64(i)),
-		})
-		chunk := NewChunk(
-			userID,
-			model.Fingerprint(1),
-			model.Metric{
-				model.MetricNameLabel: "foo",
-				"bar": "baz",
-			},
-			chunks[0],
-			ts,
-			ts.Add(chunkLen*time.Second),
-		)
-		for _, s := range schemas {
-			err := s.store.Put(ctx, []Chunk{chunk})
-			require.NoError(t, err)
-		}
-	}
+			// pick two random numbers and do a query
+			for i := 0; i < 100; i++ {
+				start := rand.Int63n(100 * chunkLen)
+				end := start + rand.Int63n((100*chunkLen)-start)
+				assert.True(t, start < end)
 
-	// pick two random numbers and do a query
-	for i := 0; i < 100; i++ {
-		start := rand.Int63n(100 * chunkLen)
-		end := start + rand.Int63n((100*chunkLen)-start)
-		assert.True(t, start < end)
+				startTime := model.TimeFromUnix(start)
+				endTime := model.TimeFromUnix(end)
 
-		startTime := model.TimeFromUnix(start)
-		endTime := model.TimeFromUnix(end)
+				matchers := []*labels.Matcher{
+					mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
+					mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
+				}
+				chunks, err := store.Get(ctx, startTime, endTime, matchers...)
+				require.NoError(t, err)
 
-		metricNameLabel := mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo")
-		matchers := []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")}
-
-		for _, s := range schemas {
-			chunks, err := s.store.getMetricNameChunks(ctx, startTime, endTime,
-				matchers,
-				metricNameLabel.Value,
-			)
-			require.NoError(t, err)
-
-			// We need to check that each chunk is in the time range
-			for _, chunk := range chunks {
-				assert.False(t, chunk.From.After(endTime))
-				assert.False(t, chunk.Through.Before(startTime))
-				samples, err := chunk.Samples(chunk.From, chunk.Through)
-				assert.NoError(t, err)
-				assert.Equal(t, 1, len(samples))
-				// TODO verify chunk contents
-			}
+				// We need to check that each chunk is in the time range
+				for _, chunk := range chunks {
+					assert.False(t, chunk.From.After(endTime))
+					assert.False(t, chunk.Through.Before(startTime))
+					samples, err := chunk.Samples(chunk.From, chunk.Through)
+					assert.NoError(t, err)
+					assert.Equal(t, 1, len(samples))
+					// TODO verify chunk contents
+				}
 
-			// And check we got all the chunks we want
-			numChunks := (end / chunkLen) - (start / chunkLen) + 1
-			assert.Equal(t, int(numChunks), len(chunks), s.name)
-		}
+				// And check we got all the chunks we want
+				numChunks := (end / chunkLen) - (start / chunkLen) + 1
+				assert.Equal(t, int(numChunks), len(chunks))
+			}
+		})
 	}
 }
 
 func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
 	ctx := user.InjectOrgID(context.Background(), userID)
-	store := newTestChunkStore(t, StoreConfig{
-		schemaFactory:   v6Schema,
-		QueryChunkLimit: 2e6,
-	})
+	store := newTestChunkStore(t, v6Schema, newStore)
 
 	// Put 24 chunks 1hr chunks in the store
 	const chunkLen = 60 // in seconds
@@ -492,14 +454,12 @@ func TestChunkStoreLeastRead(t *testing.T) {
 
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
+		matchers := []*labels.Matcher{
+			mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
+			mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
+		}
 
-		metricNameLabel := mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo")
-		matchers := []*labels.Matcher{mustNewLabelMatcher(labels.MatchEqual, "bar", "baz")}
-
-		chunks, err := store.getMetricNameChunks(ctx, startTime, endTime,
-			matchers,
-			metricNameLabel.Value,
-		)
+		chunks, err := store.Get(ctx, startTime, endTime, matchers...)
 		if err != nil {
 			t.Fatal(t, err)
 		}
diff --git a/composite_store.go b/composite_store.go
new file mode 100644
index 0000000000000..929d5b0e97b21
--- /dev/null
+++ b/composite_store.go
@@ -0,0 +1,201 @@
+package chunk
+
+import (
+	"context"
+	"fmt"
+	"sort"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+)
+
+// Store for chunks.
+type Store interface {
+	Put(ctx context.Context, chunks []Chunk) error
+	PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error
+	Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	Stop()
+}
+
+// compositeStore is a Store which delegates to various stores depending
+// on when they were activated.
+type compositeStore struct {
+	stores []compositeStoreEntry
+}
+
+type compositeStoreEntry struct {
+	start model.Time
+	Store
+}
+
+type byStart []compositeStoreEntry
+
+func (a byStart) Len() int           { return len(a) }
+func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byStart) Less(i, j int) bool { return a[i].start < a[j].start }
+
+// NewStore creates a new Store which delegates to different stores depending
+// on time.
+func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (Store, error) {
+	store, err := newStore(cfg, v1Schema(schemaCfg), storage)
+	if err != nil {
+		return nil, err
+	}
+
+	stores := []compositeStoreEntry{
+		{0, store},
+	}
+
+	if schemaCfg.DailyBucketsFrom.IsSet() {
+		store, err := newStore(cfg, v2Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.DailyBucketsFrom.Time, store})
+	}
+
+	if schemaCfg.Base64ValuesFrom.IsSet() {
+		store, err := newStore(cfg, v3Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.Base64ValuesFrom.Time, store})
+	}
+
+	if schemaCfg.V4SchemaFrom.IsSet() {
+		store, err := newStore(cfg, v4Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V4SchemaFrom.Time, store})
+	}
+
+	if schemaCfg.V5SchemaFrom.IsSet() {
+		store, err := newStore(cfg, v5Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V5SchemaFrom.Time, store})
+	}
+
+	if schemaCfg.V6SchemaFrom.IsSet() {
+		store, err := newStore(cfg, v6Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V6SchemaFrom.Time, store})
+	}
+
+	if schemaCfg.V7SchemaFrom.IsSet() {
+		store, err := newStore(cfg, v7Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V7SchemaFrom.Time, store})
+	}
+
+	if schemaCfg.V8SchemaFrom.IsSet() {
+		store, err := newStore(cfg, v8Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V8SchemaFrom.Time, store})
+	}
+
+	if !sort.IsSorted(byStart(stores)) {
+		return nil, fmt.Errorf("schemas not in time-sorted order")
+	}
+
+	return compositeStore{stores}, nil
+}
+
+func (c compositeStore) Put(ctx context.Context, chunks []Chunk) error {
+	for _, chunk := range chunks {
+		err := c.forStores(chunk.From, chunk.Through, func(from, through model.Time, store Store) error {
+			return store.PutOne(ctx, from, through, chunk)
+		})
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (c compositeStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	return c.forStores(from, through, func(from, through model.Time, store Store) error {
+		return store.PutOne(ctx, from, through, chunk)
+	})
+}
+
+func (c compositeStore) Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+	var results []Chunk
+	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
+		chunks, err := store.Get(ctx, from, through, matchers...)
+		if err != nil {
+			return err
+		}
+		results = append(results, chunks...)
+		return nil
+	})
+	return results, err
+}
+
+func (c compositeStore) Stop() {
+	for _, store := range c.stores {
+		store.Stop()
+	}
+}
+
+func (c compositeStore) forStores(from, through model.Time, callback func(from, through model.Time, store Store) error) error {
+	if len(c.stores) == 0 {
+		return nil
+	}
+
+	// first, find the schema with the highest start _before or at_ from
+	i := sort.Search(len(c.stores), func(i int) bool {
+		return c.stores[i].start > from
+	})
+	if i > 0 {
+		i--
+	} else {
+		// This could happen if we get passed a sample from before 1970.
+		i = 0
+		from = c.stores[0].start
+	}
+
+	// next, find the schema with the lowest start _after_ through
+	j := sort.Search(len(c.stores), func(j int) bool {
+		return c.stores[j].start > through
+	})
+
+	min := func(a, b model.Time) model.Time {
+		if a < b {
+			return a
+		}
+		return b
+	}
+
+	start := from
+	for ; i < j; i++ {
+		nextSchemaStarts := model.Latest
+		if i+1 < len(c.stores) {
+			nextSchemaStarts = c.stores[i+1].start
+		}
+
+		// If the next schema starts at the same time as this one,
+		// skip this one.
+		if nextSchemaStarts == c.stores[i].start {
+			continue
+		}
+
+		end := min(through, nextSchemaStarts-1)
+		err := callback(start, end, c.stores[i].Store)
+		if err != nil {
+			return err
+		}
+
+		start = nextSchemaStarts
+	}
+
+	return nil
+}
diff --git a/composite_store_test.go b/composite_store_test.go
new file mode 100644
index 0000000000000..3bb1cb2cb657e
--- /dev/null
+++ b/composite_store_test.go
@@ -0,0 +1,171 @@
+package chunk
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/weaveworks/common/test"
+)
+
+type mockStore int
+
+func (m mockStore) Put(ctx context.Context, chunks []Chunk) error {
+	return nil
+}
+
+func (m mockStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	return nil
+}
+
+func (m mockStore) Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+	return nil, nil
+}
+
+func (m mockStore) Stop() {}
+
+func TestCompositeStore(t *testing.T) {
+	type result struct {
+		from, through model.Time
+		store         Store
+	}
+	collect := func(results *[]result) func(from, through model.Time, store Store) error {
+		return func(from, through model.Time, store Store) error {
+			*results = append(*results, result{from, through, store})
+			return nil
+		}
+	}
+	cs := compositeStore{
+		stores: []compositeStoreEntry{
+			{model.TimeFromUnix(0), mockStore(1)},
+			{model.TimeFromUnix(100), mockStore(2)},
+			{model.TimeFromUnix(200), mockStore(3)},
+		},
+	}
+
+	for i, tc := range []struct {
+		cs            compositeStore
+		from, through int64
+		want          []result
+	}{
+		// Test we have sensible results when there are no schema's defined
+		{compositeStore{}, 0, 1, []result{}},
+
+		// Test we have sensible results when there is a single schema
+		{
+			compositeStore{
+				stores: []compositeStoreEntry{
+					{model.TimeFromUnix(0), mockStore(1)},
+				},
+			},
+			0, 10,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockStore(1)},
+			},
+		},
+
+		// Test we have sensible results for negative (ie pre 1970) times
+		{
+			compositeStore{
+				stores: []compositeStoreEntry{
+					{model.TimeFromUnix(0), mockStore(1)},
+				},
+			},
+			-10, -9,
+			[]result{},
+		},
+		{
+			compositeStore{
+				stores: []compositeStoreEntry{
+					{model.TimeFromUnix(0), mockStore(1)},
+				},
+			},
+			-10, 10,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockStore(1)},
+			},
+		},
+
+		// Test we have sensible results when there is two schemas
+		{
+			compositeStore{
+				stores: []compositeStoreEntry{
+					{model.TimeFromUnix(0), mockStore(1)},
+					{model.TimeFromUnix(100), mockStore(2)},
+				},
+			},
+			34, 165,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockStore(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockStore(2)},
+			},
+		},
+
+		// Test we get only one result when two schema start at same time
+		{
+			compositeStore{
+				stores: []compositeStoreEntry{
+					{model.TimeFromUnix(0), mockStore(1)},
+					{model.TimeFromUnix(10), mockStore(2)},
+					{model.TimeFromUnix(10), mockStore(3)},
+				},
+			},
+			0, 165,
+			[]result{
+				{model.TimeFromUnix(0), model.TimeFromUnix(10) - 1, mockStore(1)},
+				{model.TimeFromUnix(10), model.TimeFromUnix(165), mockStore(3)},
+			},
+		},
+
+		// Test all the various combination we can get when there are three schemas
+		{
+			cs, 34, 65,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(65), mockStore(1)},
+			},
+		},
+
+		{
+			cs, 244, 6785,
+			[]result{
+				{model.TimeFromUnix(244), model.TimeFromUnix(6785), mockStore(3)},
+			},
+		},
+
+		{
+			cs, 34, 165,
+			[]result{
+				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockStore(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockStore(2)},
+			},
+		},
+
+		{
+			cs, 151, 264,
+			[]result{
+				{model.TimeFromUnix(151), model.TimeFromUnix(200) - 1, mockStore(2)},
+				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockStore(3)},
+			},
+		},
+
+		{
+			cs, 32, 264,
+			[]result{
+				{model.TimeFromUnix(32), model.TimeFromUnix(100) - 1, mockStore(1)},
+				{model.TimeFromUnix(100), model.TimeFromUnix(200) - 1, mockStore(2)},
+				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockStore(3)},
+			},
+		},
+	} {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			have := []result{}
+			tc.cs.forStores(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong stores - %s", test.Diff(tc.want, have))
+			}
+		})
+	}
+}
diff --git a/schema_config.go b/schema_config.go
index b5d92ea9d1fd0..8a1796fab091e 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -3,7 +3,6 @@ package chunk
 import (
 	"flag"
 	"fmt"
-	"sort"
 	"strconv"
 	"time"
 
@@ -282,202 +281,3 @@ func (cfg *PeriodicTableConfig) TableFor(t model.Time) string {
 	)
 	return cfg.Prefix + strconv.Itoa(int(table))
 }
-
-// compositeSchema is a Schema which delegates to various schemas depending
-// on when they were activated.
-type compositeSchema struct {
-	schemas []compositeSchemaEntry
-}
-
-type compositeSchemaEntry struct {
-	start model.Time
-	Schema
-}
-
-type byStart []compositeSchemaEntry
-
-func (a byStart) Len() int           { return len(a) }
-func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-func (a byStart) Less(i, j int) bool { return a[i].start < a[j].start }
-
-func newCompositeSchema(cfg SchemaConfig) (Schema, error) {
-	schemas := []compositeSchemaEntry{
-		{0, v1Schema(cfg)},
-	}
-
-	if cfg.DailyBucketsFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.DailyBucketsFrom.Time, v2Schema(cfg)})
-	}
-
-	if cfg.Base64ValuesFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.Base64ValuesFrom.Time, v3Schema(cfg)})
-	}
-
-	if cfg.V4SchemaFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.V4SchemaFrom.Time, v4Schema(cfg)})
-	}
-
-	if cfg.V5SchemaFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.V5SchemaFrom.Time, v5Schema(cfg)})
-	}
-
-	if cfg.V6SchemaFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.V6SchemaFrom.Time, v6Schema(cfg)})
-	}
-
-	if cfg.V7SchemaFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.V7SchemaFrom.Time, v7Schema(cfg)})
-	}
-
-	if cfg.V8SchemaFrom.IsSet() {
-		schemas = append(schemas, compositeSchemaEntry{cfg.V8SchemaFrom.Time, v8Schema(cfg)})
-	}
-
-	if !sort.IsSorted(byStart(schemas)) {
-		return nil, fmt.Errorf("schemas not in time-sorted order")
-	}
-
-	return compositeSchema{schemas}, nil
-}
-
-func (c compositeSchema) forSchemasIndexQuery(from, through model.Time, callback func(from, through model.Time, schema Schema) ([]IndexQuery, error)) ([]IndexQuery, error) {
-	if len(c.schemas) == 0 {
-		return nil, nil
-	}
-
-	// first, find the schema with the highest start _before or at_ from
-	i := sort.Search(len(c.schemas), func(i int) bool {
-		return c.schemas[i].start > from
-	})
-	if i > 0 {
-		i--
-	} else {
-		// This could happen if we get passed a sample from before 1970.
-		i = 0
-		from = c.schemas[0].start
-	}
-
-	// next, find the schema with the lowest start _after_ through
-	j := sort.Search(len(c.schemas), func(j int) bool {
-		return c.schemas[j].start > through
-	})
-
-	min := func(a, b model.Time) model.Time {
-		if a < b {
-			return a
-		}
-		return b
-	}
-
-	start := from
-	result := []IndexQuery{}
-	for ; i < j; i++ {
-		nextSchemaStarts := model.Latest
-		if i+1 < len(c.schemas) {
-			nextSchemaStarts = c.schemas[i+1].start
-		}
-
-		// If the next schema starts at the same time as this one,
-		// skip this one.
-		if nextSchemaStarts == c.schemas[i].start {
-			continue
-		}
-
-		end := min(through, nextSchemaStarts-1)
-		entries, err := callback(start, end, c.schemas[i].Schema)
-		if err != nil {
-			return nil, err
-		}
-
-		result = append(result, entries...)
-		start = nextSchemaStarts
-	}
-
-	return result, nil
-}
-
-func (c compositeSchema) forSchemasIndexEntry(from, through model.Time, callback func(from, through model.Time, schema Schema) ([]IndexEntry, error)) ([]IndexEntry, error) {
-	if len(c.schemas) == 0 {
-		return nil, nil
-	}
-
-	// first, find the schema with the highest start _before or at_ from
-	i := sort.Search(len(c.schemas), func(i int) bool {
-		return c.schemas[i].start > from
-	})
-	if i > 0 {
-		i--
-	} else {
-		// This could happen if we get passed a sample from before 1970.
-		i = 0
-		from = c.schemas[0].start
-	}
-
-	// next, find the schema with the lowest start _after_ through
-	j := sort.Search(len(c.schemas), func(j int) bool {
-		return c.schemas[j].start > through
-	})
-
-	min := func(a, b model.Time) model.Time {
-		if a < b {
-			return a
-		}
-		return b
-	}
-
-	start := from
-	result := []IndexEntry{}
-	for ; i < j; i++ {
-		nextSchemaStarts := model.Latest
-		if i+1 < len(c.schemas) {
-			nextSchemaStarts = c.schemas[i+1].start
-		}
-
-		// If the next schema starts at the same time as this one,
-		// skip this one.
-		if nextSchemaStarts == c.schemas[i].start {
-			continue
-		}
-
-		end := min(through, nextSchemaStarts-1)
-		entries, err := callback(start, end, c.schemas[i].Schema)
-		if err != nil {
-			return nil, err
-		}
-
-		result = append(result, entries...)
-		start = nextSchemaStarts
-	}
-
-	return result, nil
-}
-
-func (c compositeSchema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
-	return c.forSchemasIndexEntry(from, through, func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
-		return schema.GetWriteEntries(from, through, userID, metricName, labels, chunkID)
-	})
-}
-
-func (c compositeSchema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
-	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
-		return schema.GetReadQueries(from, through, userID)
-	})
-}
-
-func (c compositeSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
-	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
-		return schema.GetReadQueriesForMetric(from, through, userID, metricName)
-	})
-}
-
-func (c compositeSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
-	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
-		return schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
-	})
-}
-
-func (c compositeSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
-	return c.forSchemasIndexQuery(from, through, func(from, through model.Time, schema Schema) ([]IndexQuery, error) {
-		return schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
-	})
-}
diff --git a/schema_config_test.go b/schema_config_test.go
index 2bf6e1d1d41ca..3ff0455e2d3a9 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -1,12 +1,10 @@
 package chunk
 
 import (
-	"fmt"
 	"reflect"
 	"testing"
 
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/common/test"
 )
 
 func TestHourlyBuckets(t *testing.T) {
@@ -180,146 +178,3 @@ func TestDailyBuckets(t *testing.T) {
 		})
 	}
 }
-
-func TestCompositeSchema(t *testing.T) {
-	type result struct {
-		from, through model.Time
-		schema        Schema
-	}
-	collect := func(results *[]result) func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
-		return func(from, through model.Time, schema Schema) ([]IndexEntry, error) {
-			*results = append(*results, result{from, through, schema})
-			return nil, nil
-		}
-	}
-	cs := compositeSchema{
-		schemas: []compositeSchemaEntry{
-			{model.TimeFromUnix(0), mockSchema(1)},
-			{model.TimeFromUnix(100), mockSchema(2)},
-			{model.TimeFromUnix(200), mockSchema(3)},
-		},
-	}
-
-	for i, tc := range []struct {
-		cs            compositeSchema
-		from, through int64
-		want          []result
-	}{
-		// Test we have sensible results when there are no schema's defined
-		{compositeSchema{}, 0, 1, []result{}},
-
-		// Test we have sensible results when there is a single schema
-		{
-			compositeSchema{
-				schemas: []compositeSchemaEntry{
-					{model.TimeFromUnix(0), mockSchema(1)},
-				},
-			},
-			0, 10,
-			[]result{
-				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockSchema(1)},
-			},
-		},
-
-		// Test we have sensible results for negative (ie pre 1970) times
-		{
-			compositeSchema{
-				schemas: []compositeSchemaEntry{
-					{model.TimeFromUnix(0), mockSchema(1)},
-				},
-			},
-			-10, -9,
-			[]result{},
-		},
-		{
-			compositeSchema{
-				schemas: []compositeSchemaEntry{
-					{model.TimeFromUnix(0), mockSchema(1)},
-				},
-			},
-			-10, 10,
-			[]result{
-				{model.TimeFromUnix(0), model.TimeFromUnix(10), mockSchema(1)},
-			},
-		},
-
-		// Test we have sensible results when there is two schemas
-		{
-			compositeSchema{
-				schemas: []compositeSchemaEntry{
-					{model.TimeFromUnix(0), mockSchema(1)},
-					{model.TimeFromUnix(100), mockSchema(2)},
-				},
-			},
-			34, 165,
-			[]result{
-				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockSchema(1)},
-				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockSchema(2)},
-			},
-		},
-
-		// Test we get only one result when two schema start at same time
-		{
-			compositeSchema{
-				schemas: []compositeSchemaEntry{
-					{model.TimeFromUnix(0), mockSchema(1)},
-					{model.TimeFromUnix(10), mockSchema(2)},
-					{model.TimeFromUnix(10), mockSchema(3)},
-				},
-			},
-			0, 165,
-			[]result{
-				{model.TimeFromUnix(0), model.TimeFromUnix(10) - 1, mockSchema(1)},
-				{model.TimeFromUnix(10), model.TimeFromUnix(165), mockSchema(3)},
-			},
-		},
-
-		// Test all the various combination we can get when there are three schemas
-		{
-			cs, 34, 65,
-			[]result{
-				{model.TimeFromUnix(34), model.TimeFromUnix(65), mockSchema(1)},
-			},
-		},
-
-		{
-			cs, 244, 6785,
-			[]result{
-				{model.TimeFromUnix(244), model.TimeFromUnix(6785), mockSchema(3)},
-			},
-		},
-
-		{
-			cs, 34, 165,
-			[]result{
-				{model.TimeFromUnix(34), model.TimeFromUnix(100) - 1, mockSchema(1)},
-				{model.TimeFromUnix(100), model.TimeFromUnix(165), mockSchema(2)},
-			},
-		},
-
-		{
-			cs, 151, 264,
-			[]result{
-				{model.TimeFromUnix(151), model.TimeFromUnix(200) - 1, mockSchema(2)},
-				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockSchema(3)},
-			},
-		},
-
-		{
-			cs, 32, 264,
-			[]result{
-				{model.TimeFromUnix(32), model.TimeFromUnix(100) - 1, mockSchema(1)},
-				{model.TimeFromUnix(100), model.TimeFromUnix(200) - 1, mockSchema(2)},
-				{model.TimeFromUnix(200), model.TimeFromUnix(264), mockSchema(3)},
-			},
-		},
-	} {
-		t.Run(fmt.Sprintf("TestSchemaComposite[%d]", i), func(t *testing.T) {
-			have := []result{}
-			tc.cs.forSchemasIndexEntry(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
-			if !reflect.DeepEqual(tc.want, have) {
-				t.Fatalf("wrong schemas - %s", test.Diff(tc.want, have))
-			}
-		})
-	}
-}
diff --git a/schema_test.go b/schema_test.go
index 70403f6d68caf..73a7d7e2cb7d4 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -17,24 +17,6 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
-type mockSchema int
-
-func (mockSchema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
-	return nil, nil
-}
-func (mockSchema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
-	return nil, nil
-}
-func (mockSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
-	return nil, nil
-}
-func (mockSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
-	return nil, nil
-}
-func (mockSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
-	return nil, nil
-}
-
 type ByHashRangeKey []IndexEntry
 
 func (a ByHashRangeKey) Len() int      { return len(a) }
@@ -81,15 +63,6 @@ func TestSchemaHashKeys(t *testing.T) {
 			From:   util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
 		},
 	}
-	compositeSchema := func(dailyBucketsFrom model.Time) Schema {
-		cfgCp := cfg
-		cfgCp.DailyBucketsFrom = util.NewDayValue(dailyBucketsFrom)
-		schema, err := newCompositeSchema(cfgCp)
-		if err != nil {
-			t.Fatal(err)
-		}
-		return schema
-	}
 	hourlyBuckets := v1Schema(cfg)
 	dailyBuckets := v3Schema(cfg)
 	labelBuckets := v4Schema(cfg)
@@ -134,83 +107,6 @@ func TestSchemaHashKeys(t *testing.T) {
 				mkResult(table, "userid:d%d:foo:bar", 0, 3),
 			),
 		},
-
-		// Buckets are by hour until we reach the `dailyBucketsFrom`, after which they are by day.
-		{
-			compositeSchema(model.TimeFromUnix(0).Add(1 * 24 * time.Hour)),
-			0, (3 * 24 * 60 * 60) - 1, "foo",
-			mergeResults(
-				mkResult(table, "userid:%d:foo", 0, 1*24),
-				mkResult(table, "userid:d%d:foo", 1, 3),
-			),
-		},
-
-		// Only the day part of `dailyBucketsFrom` matters, not the time part.
-		{
-			compositeSchema(model.TimeFromUnix(0).Add(2*24*time.Hour) - 1),
-			0, (3 * 24 * 60 * 60) - 1, "foo",
-			mergeResults(
-				mkResult(table, "userid:%d:foo", 0, 1*24),
-				mkResult(table, "userid:d%d:foo", 1, 3),
-			),
-		},
-
-		// Moving dailyBucketsFrom to the previous day compared to the above makes 24 1-hour buckets disappear.
-		{
-			compositeSchema(model.TimeFromUnix(0).Add(1*24*time.Hour) - 1),
-			0, (3 * 24 * 60 * 60) - 1, "foo",
-			mkResult(table, "userid:d%d:foo", 0, 3),
-		},
-
-		// If `dailyBucketsFrom` is after the interval, everything will be bucketed by hour.
-		{
-			compositeSchema(model.TimeFromUnix(0).Add(99 * 24 * time.Hour)),
-			0, (2 * 24 * 60 * 60) - 1, "foo",
-			mkResult(table, "userid:%d:foo", 0, 2*24),
-		},
-
-		// Should only return daily buckets when dailyBucketsFrom is before the interval.
-		{
-			compositeSchema(model.TimeFromUnix(0)),
-			1 * 24 * 60 * 60, (3 * 24 * 60 * 60) - 1, "foo",
-			mkResult(table, "userid:d%d:foo", 1, 3),
-		},
-
-		// Basic weekly- ables.
-		{
-			compositeSchema(model.TimeFromUnix(0)),
-			5 * 24 * 60 * 60, (10 * 24 * 60 * 60) - 1, "foo",
-			mergeResults(
-				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
-				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
-				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
-			),
-		},
-
-		// Daily buckets + weekly tables.
-		{
-			compositeSchema(model.TimeFromUnix(0)),
-			0, (10 * 24 * 60 * 60) - 1, "foo",
-			mergeResults(
-				mkResult(table, "userid:d%d:foo", 0, 5),
-				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
-				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
-				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
-			),
-		},
-
-		// Houly Buckets, then daily buckets, then weekly tables.
-		{
-			compositeSchema(model.TimeFromUnix(2 * 24 * 60 * 60)),
-			0, (10 * 24 * 60 * 60) - 1, "foo",
-			mergeResults(
-				mkResult(table, "userid:%d:foo", 0, 2*24),
-				mkResult(table, "userid:d%d:foo", 2, 5),
-				mkResult(periodicPrefix+"2", "userid:d%d:foo", 5, 6),
-				mkResult(periodicPrefix+"3", "userid:d%d:foo", 6, 8),
-				mkResult(periodicPrefix+"4", "userid:d%d:foo", 8, 10),
-			),
-		},
 	} {
 		t.Run(fmt.Sprintf("TestSchemaHashKeys[%d]", i), func(t *testing.T) {
 			have, err := tc.Schema.GetWriteEntries(

From 152b3f1017b1c3b9a3a408ebaa6f6b26f7583d59 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 11 Jul 2018 21:06:00 +0100
Subject: [PATCH 109/660] Index series, not chunks

We should index the series, not chunks; this will reduce the number of entries in the index by `replication factor * (chunk size / bucket size)`, or 3 * 6hrs / 24hrs - ie 12x. This will however mean we need another index from series to chunks, introducing 1 extra write an N extra reads per query.  Expectation is a reduction in query latency (and bigtable query usage, and memory cost) by 12x, and then an increase by 2x as we have to do a bunch of queries.

This change introduces the seriesStore, a new chunk store implementation that, combined with the v9 schema, indexes series not chunks.

I tried to adapt the original chunk store to support this style of indexing - easy on the write path, but the read path became even more of a rats nest.  So I factored out the common bits as best I could and made a new chunk store.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

Tidy up some of the logging.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go             | 181 ++++++++-----------------------
 chunk_store_test.go        |   1 +
 chunk_store_utils.go       | 172 ++++++++++++++++++++++++++++++
 composite_store.go         |   8 ++
 inmemory_storage_client.go |  18 ++--
 schema.go                  | 133 ++++++++++++++++++++++-
 schema_config.go           |   2 +
 schema_test.go             |   2 +-
 schema_util.go             |  54 +++++++---
 schema_util_test.go        |   2 +-
 series_store.go            | 211 +++++++++++++++++++++++++++++++++++++
 11 files changed, 624 insertions(+), 160 deletions(-)
 create mode 100644 chunk_store_utils.go
 create mode 100644 series_store.go

diff --git a/chunk_store.go b/chunk_store.go
index 5a6b93e133570..6c1b6abad6b42 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -7,10 +7,7 @@ import (
 	"sort"
 	"time"
 
-	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -71,21 +68,21 @@ type store struct {
 	cfg StoreConfig
 
 	storage StorageClient
-	cache   cache.Cache
 	schema  Schema
+	*chunkFetcher
 }
 
 func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
-	cache, err := cache.New(cfg.CacheConfig)
+	fetcher, err := newChunkFetcher(cfg.CacheConfig, storage)
 	if err != nil {
 		return nil, err
 	}
 
 	return &store{
-		cfg:     cfg,
-		storage: storage,
-		schema:  schema,
-		cache:   cache,
+		cfg:          cfg,
+		storage:      storage,
+		schema:       schema,
+		chunkFetcher: fetcher,
 	}, nil
 }
 
@@ -159,68 +156,62 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 	return result, nil
 }
 
-// spanLogger unifies tracing and logging, to reduce repetition.
-type spanLogger struct {
-	log.Logger
-	ot.Span
-}
-
-func newSpanLogger(ctx context.Context, method string) (*spanLogger, context.Context) {
-	span, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
-	return &spanLogger{
-		Logger: log.With(util.WithContext(ctx, util.Logger), "method", method),
-		Span:   span,
-	}, ctx
-}
+// Get implements Store
+func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+	defer log.Span.Finish()
+	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
-func (s *spanLogger) Log(kvps ...interface{}) error {
-	s.Logger.Log(kvps...)
-	fields, err := otlog.InterleavedKVToFields(kvps...)
+	// Validate the query is within reasonable bounds.
+	shortcut, err := c.validateQuery(ctx, from, &through)
 	if err != nil {
-		return err
+		return nil, err
+	} else if shortcut {
+		return nil, nil
 	}
-	s.Span.LogFields(fields...)
-	return nil
+
+	// Fetch metric name chunks if the matcher is of type equal,
+	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
+	if ok && metricNameMatcher.Type == labels.MatchEqual {
+		log.Span.SetTag("metric", metricNameMatcher.Value)
+		return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
+	}
+
+	// Otherwise we consult the metric name index first and then create queries for each matching metric name.
+	return c.getSeriesChunks(ctx, from, through, matchers, metricNameMatcher)
 }
 
-// Get implements ChunkStore
-func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time) (shortcut bool, err error) {
+	log, ctx := newSpanLogger(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
 	now := model.Now()
-	level.Debug(log).Log("from", from, "through", through, "now", now, "matchers", len(allMatchers))
 
-	if through < from {
-		return nil, fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
+	if *through < from {
+		err = fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
+		return
 	}
 
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
 		level.Error(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
-		return nil, nil
+		shortcut = true
+		return
 	}
 
 	if from.After(now.Add(-c.cfg.MinChunkAge)) {
 		// no data relevant to this query will have arrived at the store yet
-		return nil, nil
+		shortcut = true
+		return
 	}
 
 	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
 		level.Error(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
-		through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
+		*through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
-	// Fetch metric name chunks if the matcher is of type equal,
-	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
-	if ok && metricNameMatcher.Type == labels.MatchEqual {
-		log.Span.SetTag("metric", metricNameMatcher.Value)
-		return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
-	}
-
-	// Otherwise we consult the metric name index first and then create queries for each matching metric name.
-	return c.getSeriesChunks(ctx, from, through, matchers, metricNameMatcher)
+	return
 }
 
 func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
@@ -236,15 +227,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	level.Debug(log).Log("Chunks in index", len(chunks))
 
 	// Filter out chunks that are not in the selected time range.
-	filtered := make([]Chunk, 0, len(chunks))
-	keys := make([]string, 0, len(chunks))
-	for _, chunk := range chunks {
-		if chunk.Through < from || through < chunk.From {
-			continue
-		}
-		filtered = append(filtered, chunk)
-		keys = append(keys, chunk.ExternalKey())
-	}
+	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("Chunks post filtering", len(chunks))
 
 	if len(filtered) > c.cfg.QueryChunkLimit {
@@ -254,79 +237,16 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
-	cacheHits, cacheBufs, _, err := c.cache.FetchChunkData(ctx, keys)
-	if err != nil {
-		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
-	}
-
-	fromCache, missing, err := ProcessCacheResponse(filtered, cacheHits, cacheBufs)
-	if err != nil {
-		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
-	}
-
-	fromStorage, err := c.storage.GetChunks(ctx, missing)
-
-	// Always cache any chunks we did get
-	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
-		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
-	}
-
+	allChunks, err := c.fetchChunks(ctx, filtered, keys)
 	if err != nil {
 		return nil, promql.ErrStorage(err)
 	}
 
-	allChunks := append(fromCache, fromStorage...)
-
-	// Filter out chunks
-	filteredChunks := make([]Chunk, 0, len(allChunks))
-outer:
-	for _, chunk := range allChunks {
-		for _, filter := range filters {
-			if !filter.Matches(string(chunk.Metric[model.LabelName(filter.Name)])) {
-				continue outer
-			}
-		}
-		filteredChunks = append(filteredChunks, chunk)
-	}
-
+	// Filter out chunks based on the empty matchers in the query.
+	filteredChunks := filterChunksByMatchers(allChunks, filters)
 	return filteredChunks, nil
 }
 
-// ProcessCacheResponse decodes the chunks coming back from the cache, separating
-// hits and misses.
-func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found []Chunk, missing []Chunk, err error) {
-	decodeContext := NewDecodeContext()
-
-	i, j := 0, 0
-	for i < len(chunks) && j < len(keys) {
-		chunkKey := chunks[i].ExternalKey()
-
-		if chunkKey < keys[j] {
-			missing = append(missing, chunks[i])
-			i++
-		} else if chunkKey > keys[j] {
-			level.Debug(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
-			j++
-		} else {
-			chunk := chunks[i]
-			err = chunk.Decode(decodeContext, bufs[j])
-			if err != nil {
-				cacheCorrupt.Inc()
-				return
-			}
-			found = append(found, chunk)
-			i++
-			j++
-		}
-	}
-
-	for ; i < len(chunks); i++ {
-		missing = append(missing, chunks[i])
-	}
-
-	return
-}
-
 func (c *store) getSeriesChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) ([]Chunk, error) {
 	// Get all series from the index
 	userID, err := user.ExtractOrgID(ctx)
@@ -479,8 +399,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 	if lastErr != nil {
 		return nil, lastErr
 	}
-
-	level.Debug(log).Log("msg", "post intersection", "entries", len(chunkIDs))
+	level.Debug(log).Log("msg", "post intersection", "chunkIDs", len(chunkIDs))
 
 	// Convert IndexEntry's into chunks
 	return c.convertChunkIDsToChunks(ctx, chunkIDs)
@@ -540,13 +459,12 @@ func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
 	result := make([]string, 0, len(entries))
 
 	for _, entry := range entries {
-		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		chunkKey, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
 
 		if matcher != nil && !matcher.Matches(string(labelValue)) {
-			level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "dropping chunk for non-matching label", "label", labelValue)
 			continue
 		}
 		result = append(result, chunkKey)
@@ -575,16 +493,3 @@ func (c *store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string)
 
 	return chunkSet, nil
 }
-
-func (c *store) writeBackCache(ctx context.Context, chunks []Chunk) error {
-	for i := range chunks {
-		encoded, err := chunks[i].Encode()
-		if err != nil {
-			return err
-		}
-		if err := c.cache.StoreChunk(ctx, chunks[i].ExternalKey(), encoded); err != nil {
-			return err
-		}
-	}
-	return nil
-}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 404cf4f4d9c00..dd9b403fb79f1 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -39,6 +39,7 @@ var schemas = []struct {
 	{"v6 schema", v6Schema, newStore, true},
 	{"v7 schema", v7Schema, newStore, true},
 	{"v8 schema", v8Schema, newStore, false},
+	{"v9 schema", v9Schema, newSeriesStore, true},
 }
 
 // newTestStore creates a new Store for testing.
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
new file mode 100644
index 0000000000000..86694e6cd373b
--- /dev/null
+++ b/chunk_store_utils.go
@@ -0,0 +1,172 @@
+package chunk
+
+import (
+	"context"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+func filterChunksByTime(from, through model.Time, chunks []Chunk) ([]Chunk, []string) {
+	filtered := make([]Chunk, 0, len(chunks))
+	keys := make([]string, 0, len(chunks))
+	for _, chunk := range chunks {
+		if chunk.Through < from || through < chunk.From {
+			continue
+		}
+		filtered = append(filtered, chunk)
+		keys = append(keys, chunk.ExternalKey())
+	}
+	return filtered, keys
+}
+
+func filterChunksByMatchers(chunks []Chunk, filters []*labels.Matcher) []Chunk {
+	filteredChunks := make([]Chunk, 0, len(chunks))
+outer:
+	for _, chunk := range chunks {
+		for _, filter := range filters {
+			if !filter.Matches(string(chunk.Metric[model.LabelName(filter.Name)])) {
+				continue outer
+			}
+		}
+		filteredChunks = append(filteredChunks, chunk)
+	}
+	return filteredChunks
+}
+
+// spanLogger unifies tracing and logging, to reduce repetition.
+type spanLogger struct {
+	log.Logger
+	ot.Span
+}
+
+func newSpanLogger(ctx context.Context, method string, kvps ...interface{}) (*spanLogger, context.Context) {
+	span, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
+	logger := &spanLogger{
+		Logger: log.With(util.WithContext(ctx, util.Logger), "method", method),
+		Span:   span,
+	}
+	if len(kvps) > 0 {
+		logger.Log(kvps...)
+	}
+	return logger, ctx
+}
+
+func (s *spanLogger) Log(kvps ...interface{}) error {
+	s.Logger.Log(kvps...)
+	fields, err := otlog.InterleavedKVToFields(kvps...)
+	if err != nil {
+		return err
+	}
+	s.Span.LogFields(fields...)
+	return nil
+}
+
+// chunkFetcher deals with fetching chunk contents from the cache/store,
+// and writing back any misses to the cache.
+type chunkFetcher struct {
+	storage StorageClient
+	cache   cache.Cache
+}
+
+func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, error) {
+	cache, err := cache.New(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &chunkFetcher{
+		storage: storage,
+		cache:   cache,
+	}, nil
+}
+
+func (c *chunkFetcher) Stop() {
+	c.cache.Stop()
+}
+
+func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
+	defer log.Span.Finish()
+
+	// Now fetch the actual chunk data from Memcache / S3
+	cacheHits, cacheBufs, _, err := c.cache.FetchChunkData(ctx, keys)
+	if err != nil {
+		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
+	}
+
+	fromCache, missing, err := ProcessCacheResponse(chunks, cacheHits, cacheBufs)
+	if err != nil {
+		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
+	}
+
+	fromStorage, err := c.storage.GetChunks(ctx, missing)
+
+	// Always cache any chunks we did get
+	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
+		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+	}
+
+	if err != nil {
+		return nil, promql.ErrStorage(err)
+	}
+
+	allChunks := append(fromCache, fromStorage...)
+	return allChunks, nil
+}
+
+func (c *chunkFetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
+	for i := range chunks {
+		encoded, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+		if err := c.cache.StoreChunk(ctx, chunks[i].ExternalKey(), encoded); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// ProcessCacheResponse decodes the chunks coming back from the cache, separating
+// hits and misses.
+func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found []Chunk, missing []Chunk, err error) {
+	decodeContext := NewDecodeContext()
+
+	i, j := 0, 0
+	for i < len(chunks) && j < len(keys) {
+		chunkKey := chunks[i].ExternalKey()
+
+		if chunkKey < keys[j] {
+			missing = append(missing, chunks[i])
+			i++
+		} else if chunkKey > keys[j] {
+			level.Debug(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
+			j++
+		} else {
+			chunk := chunks[i]
+			err = chunk.Decode(decodeContext, bufs[j])
+			if err != nil {
+				cacheCorrupt.Inc()
+				return
+			}
+			found = append(found, chunk)
+			i++
+			j++
+		}
+	}
+
+	for ; i < len(chunks); i++ {
+		missing = append(missing, chunks[i])
+	}
+
+	return
+}
diff --git a/composite_store.go b/composite_store.go
index 929d5b0e97b21..5bad4e02fbb2c 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -102,6 +102,14 @@ func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (S
 		stores = append(stores, compositeStoreEntry{schemaCfg.V8SchemaFrom.Time, store})
 	}
 
+	if schemaCfg.V9SchemaFrom.IsSet() {
+		store, err := newSeriesStore(cfg, v9Schema(schemaCfg), storage)
+		if err != nil {
+			return nil, err
+		}
+		stores = append(stores, compositeStoreEntry{schemaCfg.V9SchemaFrom.Time, store})
+	}
+
 	if !sort.IsSorted(byStart(stores)) {
 		return nil, fmt.Errorf("schemas not in time-sorted order")
 	}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index ccb868468103c..c95f7e5a4ea62 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -142,7 +142,8 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 			// Return error if duplicate write and not metric name entry or series entry
 			itemComponents := decodeRangeKey(items[i].rangeValue)
 			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) &&
-				!bytes.Equal(itemComponents[3], seriesRangeKeyV1) {
+				!bytes.Equal(itemComponents[3], seriesRangeKeyV1) &&
+				!bytes.Equal(itemComponents[3], labelSeriesRangeKeyV1) {
 				return fmt.Errorf("Dupe write")
 			}
 		}
@@ -159,6 +160,8 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 // QueryPages implements StorageClient.
 func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
 	logger := util.WithContext(ctx, util.Logger)
+	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
+
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
@@ -169,11 +172,12 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 
 	items, ok := table.items[query.HashValue]
 	if !ok {
+		level.Debug(logger).Log("msg", "not found")
 		return nil
 	}
 
 	if query.RangeValuePrefix != nil {
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup prefix", "hash", query.HashValue, "range_prefix", query.RangeValuePrefix, "num_items", len(items))
+		level.Debug(logger).Log("msg", "lookup prefix", "hash", query.HashValue, "range_prefix", query.RangeValuePrefix, "num_items", len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
@@ -189,33 +193,33 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 			return !bytes.HasPrefix(items[i+j].rangeValue, query.RangeValuePrefix)
 		})
 
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "found range", "from_inclusive", i, "to_exclusive", i+j)
+		level.Debug(logger).Log("msg", "found range", "from_inclusive", i, "to_exclusive", i+j)
 		if i > len(items) || j == 0 {
 			return nil
 		}
 		items = items[i : i+j]
 
 	} else if query.RangeValueStart != nil {
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup range", "hash", query.HashValue, "range_start", query.RangeValueStart, "num_items", len(items))
+		level.Debug(logger).Log("msg", "lookup range", "hash", query.HashValue, "range_start", query.RangeValueStart, "num_items", len(items))
 
 		// the smallest index i in [0, n) at which f(i) is true
 		i := sort.Search(len(items), func(i int) bool {
 			return bytes.Compare(items[i].rangeValue, query.RangeValueStart) >= 0
 		})
 
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "found range [%d)", "index", i)
+		level.Debug(logger).Log("msg", "found range [%d)", "index", i)
 		if i > len(items) {
 			return nil
 		}
 		items = items[i:]
 
 	} else {
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "lookup", "hash", query.HashValue, "num_items", len(items))
+		level.Debug(logger).Log("msg", "lookup", "hash", query.HashValue, "num_items", len(items))
 	}
 
 	// Filters
 	if query.ValueEqual != nil {
-		level.Debug(util.WithContext(ctx, logger)).Log("msg", "filter by equality", "value_equal", query.ValueEqual)
+		level.Debug(logger).Log("msg", "filter by equality", "value_equal", query.ValueEqual)
 
 		filtered := make([]mockItem, 0)
 		for _, v := range items {
diff --git a/schema.go b/schema.go
index 9657fa1cfbf43..acb292bc96f1d 100644
--- a/schema.go
+++ b/schema.go
@@ -18,7 +18,10 @@ var (
 	chunkTimeRangeKeyV4  = []byte{'4'}
 	chunkTimeRangeKeyV5  = []byte{'5'}
 	metricNameRangeKeyV1 = []byte{'6'}
-	seriesRangeKeyV1     = []byte{'7'}
+
+	// For v9 schema
+	seriesRangeKeyV1      = []byte{'7'}
+	labelSeriesRangeKeyV1 = []byte{'8'}
 )
 
 // Errors
@@ -37,6 +40,9 @@ type Schema interface {
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+
+	// If the query resulted in series IDs, use this method to find chunks.
+	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
 }
 
 // IndexQuery describes a query for entries
@@ -142,6 +148,14 @@ func v8Schema(cfg SchemaConfig) Schema {
 	}
 }
 
+// v9 schema index series, not chunks.
+func v9Schema(cfg SchemaConfig) Schema {
+	return schema{
+		cfg.dailyBuckets,
+		v9Entries{},
+	}
+}
+
 // schema implements Schema given a bucketing function and and set of range key callbacks
 type schema struct {
 	buckets func(from, through model.Time, userID string) []Bucket
@@ -217,12 +231,27 @@ func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, user
 	return result, nil
 }
 
+func (s schema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetChunksForSeries(bucket, seriesID)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
 	GetReadQueries(bucket Bucket) ([]IndexQuery, error)
 	GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 }
 
 type originalEntries struct{}
@@ -283,6 +312,10 @@ func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName
 	}, nil
 }
 
+func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
 type base64Entries struct {
 	originalEntries
 }
@@ -380,6 +413,10 @@ func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, m
 	}, nil
 }
 
+func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
 // v5Entries includes chunk end time in range key - see #298.
 type v5Entries struct{}
 
@@ -441,6 +478,10 @@ func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.
 	}, nil
 }
 
+func (v5Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
 // v6Entries fixes issues with v5 time encoding being wrong (see #337), and
 // moves label value out of range key (see #199).
 type v6Entries struct{}
@@ -510,6 +551,10 @@ func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.
 	}, nil
 }
 
+func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
 // v7Entries is a deprecated scherma initially used to support queries with no metric name. Use v8Entries instead.
 type v7Entries struct {
 	v6Entries
@@ -579,3 +624,89 @@ func (v8Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
 		},
 	}, nil
 }
+
+// v9Entries adds a layer of indirection between labels -> series -> chunks.
+type v9Entries struct {
+}
+
+func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	seriesID := sha256bytes(labels.String())
+	encodedThroughBytes := encodeTime(bucket.through)
+
+	entries := []IndexEntry{
+		// Entry for metricName -> seriesID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(metricName),
+			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+		},
+		// Entry for seriesID -> chunkID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(seriesID),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
+		},
+	}
+
+	// Entries for metricName:labelName -> hash(value):seriesID
+	// We use a hash of the value to limit its length.
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		valueHash := sha256bytes(string(value))
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			Value:      []byte(value),
+		})
+	}
+
+	return entries, nil
+}
+
+func (v9Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
+	return nil, ErrNoMetricNameNotSupported
+}
+
+func (v9Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: bucket.hashKey + ":" + string(metricName),
+		},
+	}, nil
+}
+
+func (v9Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+		},
+	}, nil
+}
+
+func (v9Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	valueHash := sha256bytes(string(labelValue))
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
+			RangeValueStart: encodeRangeKey(valueHash),
+			ValueEqual:      []byte(labelValue),
+		},
+	}, nil
+}
+
+func (v9Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error) {
+	encodedFromBytes := encodeTime(bucket.from)
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       bucket.hashKey + ":" + string(seriesID),
+			RangeValueStart: encodeRangeKey(encodedFromBytes),
+		},
+	}, nil
+}
diff --git a/schema_config.go b/schema_config.go
index 8a1796fab091e..89b14e316aab3 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -30,6 +30,7 @@ type SchemaConfig struct {
 	V6SchemaFrom     util.DayValue
 	V7SchemaFrom     util.DayValue
 	V8SchemaFrom     util.DayValue
+	V9SchemaFrom     util.DayValue
 
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
 	ThroughputUpdatesDisabled bool
@@ -59,6 +60,7 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema (Deprecated).")
 	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema (Deprecated).")
+	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The data (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
 
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
diff --git a/schema_test.go b/schema_test.go
index 73a7d7e2cb7d4..e99ad66d28644 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -394,7 +394,7 @@ func TestSchemaRangeKey(t *testing.T) {
 					_, err := parseMetricNameRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case ChunkTimeRangeValue:
-					_, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+					_, _, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case SeriesRangeValue:
 					_, err := parseSeriesRangeValue(entry.RangeValue, entry.Value)
diff --git a/schema_util.go b/schema_util.go
index ec5d0d9eeafb0..bbf5d5074dc11 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -19,6 +19,11 @@ func metricSeriesID(m model.Metric) string {
 	return string(encodeBase64Bytes(h[:]))
 }
 
+func sha256bytes(s string) []byte {
+	h := sha256.Sum256([]byte(s))
+	return encodeBase64Bytes(h[:])
+}
+
 func encodeRangeKey(ss ...[]byte) []byte {
 	length := 0
 	for _, s := range ss {
@@ -127,45 +132,70 @@ func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error
 
 // parseChunkTimeRangeValue returns the chunkKey, labelValue and metadataInIndex
 // for chunk time range values.
-func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (string, model.LabelValue, bool, error) {
+func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
+	chunkID string, labelValue model.LabelValue, metadataInIndex bool,
+	isSeriesID bool, err error,
+) {
 	components := decodeRangeKey(rangeValue)
 
 	switch {
 	case len(components) < 3:
-		return "", "", false, errors.Errorf("invalid chunk time range value: %x", rangeValue)
+		err = errors.Errorf("invalid chunk time range value: %x", rangeValue)
+		return
 
 	// v1 & v2 schema had three components - label name, label value and chunk ID.
 	// No version number.
 	case len(components) == 3:
-		return string(components[2]), model.LabelValue(components[1]), true, nil
+		chunkID = string(components[2])
+		labelValue = model.LabelValue(components[1])
+		metadataInIndex = true
+		return
 
 	// v3 schema had four components - label name, label value, chunk ID and version.
 	// "version" is 1 and label value is base64 encoded.
 	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
-		labelValue, err := decodeBase64Value(components[1])
-		return string(components[2]), labelValue, false, err
+		chunkID = string(components[2])
+		labelValue, err = decodeBase64Value(components[1])
+		return
 
 	// v4 schema wrote v3 range keys and a new range key - version 2,
 	// with four components - <empty>, <empty>, chunk ID and version.
 	case bytes.Equal(components[3], chunkTimeRangeKeyV2):
-		return string(components[2]), model.LabelValue(""), false, nil
+		chunkID = string(components[2])
+		return
 
 	// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
 	case bytes.Equal(components[3], chunkTimeRangeKeyV3):
-		return string(components[2]), model.LabelValue(""), false, nil
+		chunkID = string(components[2])
+		return
 
 	// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
 	case bytes.Equal(components[3], chunkTimeRangeKeyV4):
-		labelValue, err := decodeBase64Value(components[1])
-		return string(components[2]), labelValue, false, err
+		chunkID = string(components[2])
+		labelValue, err = decodeBase64Value(components[1])
+		return
 
 	// v6 schema added version 5 range keys, which have the label value written in
 	// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
 	case bytes.Equal(components[3], chunkTimeRangeKeyV5):
-		labelValue := model.LabelValue(value)
-		return string(components[2]), labelValue, false, nil
+		chunkID = string(components[2])
+		labelValue = model.LabelValue(value)
+		return
+
+	// v9 schema actually return series IDs
+	case bytes.Equal(components[3], seriesRangeKeyV1):
+		chunkID = string(components[0])
+		isSeriesID = true
+		return
+
+	case bytes.Equal(components[3], labelSeriesRangeKeyV1):
+		chunkID = string(components[1])
+		labelValue = model.LabelValue(value)
+		isSeriesID = true
+		return
 
 	default:
-		return "", model.LabelValue(""), false, fmt.Errorf("unrecognised chunkTimeRangeKey version: '%v'", string(components[3]))
+		err = fmt.Errorf("unrecognised chunkTimeRangeKey version: '%v'", string(components[3]))
+		return
 	}
 }
diff --git a/schema_util_test.go b/schema_util_test.go
index cd308bbb941d2..c112967c0174f 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -99,7 +99,7 @@ func TestParseChunkTimeRangeValue(t *testing.T) {
 		{[]byte("a1b2c3d4\x00Y29kZQ\x002:1484661279394:1484664879394\x004\x00"),
 			"code", "2:1484661279394:1484664879394"},
 	} {
-		chunkID, labelValue, _, err := parseChunkTimeRangeValue(c.encoded, nil)
+		chunkID, labelValue, _, _, err := parseChunkTimeRangeValue(c.encoded, nil)
 		require.NoError(t, err)
 		assert.Equal(t, model.LabelValue(c.value), labelValue)
 		assert.Equal(t, c.chunkID, chunkID)
diff --git a/series_store.go b/series_store.go
new file mode 100644
index 0000000000000..48f0f477b3142
--- /dev/null
+++ b/series_store.go
@@ -0,0 +1,211 @@
+package chunk
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/weaveworks/cortex/pkg/util/extract"
+)
+
+// seriesStore implements Store
+type seriesStore struct {
+	store
+}
+
+func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
+	fetcher, err := newChunkFetcher(cfg.CacheConfig, storage)
+	if err != nil {
+		return nil, err
+	}
+
+	return &seriesStore{
+		store: store{
+			cfg:          cfg,
+			storage:      storage,
+			schema:       schema,
+			chunkFetcher: fetcher,
+		},
+	}, nil
+}
+
+// Get implements Store
+func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+	defer log.Span.Finish()
+	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
+
+	// Validate the query is within reasonable bounds.
+	shortcut, err := c.validateQuery(ctx, from, &through)
+	if err != nil {
+		return nil, err
+	} else if shortcut {
+		return nil, nil
+	}
+
+	// Ensure this query includes a metric name.
+	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
+	if !ok || metricNameMatcher.Type != labels.MatchEqual {
+		return nil, fmt.Errorf("query must contain metric name")
+	}
+	level.Debug(log).Log("metric", metricNameMatcher.Value)
+
+	// Fetch the series IDs from the index, based on non-empty matchers from
+	// the query.
+	filters, matchers := util.SplitFiltersAndMatchers(matchers)
+	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricNameMatcher.Value, matchers)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("Series IDs", len(seriesIDs))
+
+	// Lookup the series in the index to get the chunks.
+	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("Chunk IDs", len(chunkIDs))
+
+	// Protect ourselves against OOMing.
+	if len(chunkIDs) > c.cfg.QueryChunkLimit {
+		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), c.cfg.QueryChunkLimit)
+		level.Error(log).Log("err", err)
+		return nil, err
+	}
+
+	// Filter out chunks that are not in the selected time range.
+	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
+	if err != nil {
+		return nil, err
+	}
+	filtered, keys := filterChunksByTime(from, through, chunks)
+	level.Debug(log).Log("Chunks post filtering", len(chunks))
+
+	// Now fetch the actual chunk data from Memcache / S3
+	allChunks, err := c.fetchChunks(ctx, filtered, keys)
+	if err != nil {
+		level.Error(log).Log("err", err)
+		return nil, err
+	}
+
+	// Filter out chunks based on the empty matchers in the query.
+	filteredChunks := filterChunksByMatchers(allChunks, filters)
+	return filteredChunks, nil
+}
+
+func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
+	log.Finish()
+
+	// Just get series for metric if there are no matchers
+	if len(matchers) == 0 {
+		return c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, nil)
+	}
+
+	// Otherwise get series which include other matchers
+	incomingIDs := make(chan []string)
+	incomingErrors := make(chan error)
+	for _, matcher := range matchers {
+		go func(matcher *labels.Matcher) {
+			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, matcher)
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+			incomingIDs <- ids
+		}(matcher)
+	}
+
+	// Receive chunkSets from all matchers
+	var ids []string
+	var lastErr error
+	for i := 0; i < len(matchers); i++ {
+		select {
+		case incoming := <-incomingIDs:
+			if ids == nil {
+				ids = incoming
+			} else {
+				ids = intersectStrings(ids, incoming)
+			}
+		case err := <-incomingErrors:
+			lastErr = err
+		}
+	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
+
+	level.Debug(log).Log("msg", "post intersection", "ids", len(ids))
+	return ids, nil
+}
+
+func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, metricName string, matcher *labels.Matcher) ([]string, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
+	defer log.Span.Finish()
+
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	var queries []IndexQuery
+	if matcher == nil {
+		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, model.LabelValue(metricName))
+	} else if matcher.Type != labels.MatchEqual {
+		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name))
+	} else {
+		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name), model.LabelValue(matcher.Value))
+	}
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("queries", len(queries))
+
+	entries, err := c.lookupEntriesByQueries(ctx, queries)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("entries", len(entries))
+
+	ids, err := c.parseIndexEntries(ctx, entries, matcher)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("ids", len(ids))
+
+	return ids, nil
+}
+
+func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, seriesIDs []string) ([]string, error) {
+	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksBySeries")
+
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("seriesIDs", len(seriesIDs))
+
+	queries := make([]IndexQuery, 0, len(seriesIDs))
+	for _, seriesID := range seriesIDs {
+		qs, err := c.schema.GetChunksForSeries(from, through, userID, []byte(seriesID))
+		if err != nil {
+			return nil, err
+		}
+		queries = append(queries, qs...)
+	}
+	level.Debug(log).Log("queries", len(queries))
+
+	entries, err := c.lookupEntriesByQueries(ctx, queries)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("entries", len(entries))
+
+	result, err := c.parseIndexEntries(ctx, entries, nil)
+	return result, err
+}

From a49c0410b2a98ca243a6afe99ece35b412586aec Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 16 Jul 2018 12:17:41 +0100
Subject: [PATCH 110/660] Add fifoCache, a cache that uses a fifo linked list
 for evictions.

fifoCache is a simple string -> interface{} cache which uses a fifo to manage evictions.  O(1) inserts, updates and gets.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 fifo_cache.go      | 113 +++++++++++++++++++++++++++++++++++++++++++++
 fifo_cache_test.go |  69 +++++++++++++++++++++++++++
 2 files changed, 182 insertions(+)
 create mode 100644 fifo_cache.go
 create mode 100644 fifo_cache_test.go

diff --git a/fifo_cache.go b/fifo_cache.go
new file mode 100644
index 0000000000000..3a0bb8526f51e
--- /dev/null
+++ b/fifo_cache.go
@@ -0,0 +1,113 @@
+package chunk
+
+import (
+	"sync"
+	"time"
+)
+
+// fifoCache is a simple string -> interface{} cache which uses a fifo slide to
+// manage evictions.  O(1) inserts and updates, O(1) gets.
+type fifoCache struct {
+	lock    sync.RWMutex
+	size    int
+	entries []cacheEntry
+	index   map[string]int
+
+	// indexes into entries to identify the most recent and least recent entry.
+	first, last int
+}
+
+type cacheEntry struct {
+	updated    time.Time
+	key        string
+	value      interface{}
+	prev, next int
+}
+
+func newFifoCache(size int) *fifoCache {
+	return &fifoCache{
+		size:    size,
+		entries: make([]cacheEntry, 0, size),
+		index:   make(map[string]int, size),
+	}
+}
+
+func (c *fifoCache) put(key string, value interface{}) {
+	if c.size == 0 {
+		return
+	}
+
+	c.lock.Lock()
+	defer c.lock.Unlock()
+
+	// See if we already have the entry
+	index, ok := c.index[key]
+	if ok {
+		entry := c.entries[index]
+
+		entry.updated = time.Now()
+		entry.value = value
+
+		// Remove this entry from the FIFO linked-list.
+		c.entries[entry.prev].next = entry.next
+		c.entries[entry.next].prev = entry.prev
+
+		// Insert it at the beginning
+		entry.next = c.first
+		entry.prev = c.last
+		c.entries[entry.next].prev = index
+		c.entries[entry.prev].next = index
+		c.first = index
+
+		c.entries[index] = entry
+		return
+	}
+
+	// Otherwise, see if we need to evict an entry.
+	if len(c.entries) >= c.size {
+		index = c.last
+		entry := c.entries[index]
+
+		c.last = entry.prev
+		c.first = index
+		delete(c.index, entry.key)
+		c.index[key] = index
+
+		entry.updated = time.Now()
+		entry.value = value
+		entry.key = key
+		c.entries[index] = entry
+		return
+	}
+
+	// Finally, no hit and we have space.
+	index = len(c.entries)
+	c.entries = append(c.entries, cacheEntry{
+		updated: time.Now(),
+		key:     key,
+		value:   value,
+		prev:    c.last,
+		next:    c.first,
+	})
+	c.entries[c.first].prev = index
+	c.entries[c.last].next = index
+	c.first = index
+	c.index[key] = index
+}
+
+func (c *fifoCache) get(key string) (value interface{}, updated time.Time, ok bool) {
+	if c.size == 0 {
+		return
+	}
+
+	c.lock.RLock()
+	defer c.lock.RUnlock()
+
+	var index int
+	index, ok = c.index[key]
+	if ok {
+		value = c.entries[index].value
+		updated = c.entries[index].updated
+	}
+	return
+}
diff --git a/fifo_cache_test.go b/fifo_cache_test.go
new file mode 100644
index 0000000000000..1beae800a5da3
--- /dev/null
+++ b/fifo_cache_test.go
@@ -0,0 +1,69 @@
+package chunk
+
+import (
+	"fmt"
+	"strconv"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+const size = 10
+const overwrite = 5
+
+func TestFifoCache(t *testing.T) {
+	c := newFifoCache(size)
+
+	// Check put / get works
+	for i := 0; i < size; i++ {
+		c.put(strconv.Itoa(i), i)
+		//c.print()
+	}
+	require.Len(t, c.index, size)
+	require.Len(t, c.entries, size)
+
+	for i := 0; i < size; i++ {
+		value, _, ok := c.get(strconv.Itoa(i))
+		require.True(t, ok)
+		require.Equal(t, i, value.(int))
+	}
+
+	// Check evictions
+	for i := size; i < size+overwrite; i++ {
+		c.put(strconv.Itoa(i), i)
+		//c.print()
+	}
+	require.Len(t, c.index, size)
+	require.Len(t, c.entries, size)
+
+	for i := 0; i < size-overwrite; i++ {
+		_, _, ok := c.get(strconv.Itoa(i))
+		require.False(t, ok)
+	}
+	for i := size; i < size+overwrite; i++ {
+		value, _, ok := c.get(strconv.Itoa(i))
+		require.True(t, ok)
+		require.Equal(t, i, value.(int))
+	}
+
+	// Check updates work
+	for i := size; i < size+overwrite; i++ {
+		c.put(strconv.Itoa(i), i*2)
+		//c.print()
+	}
+	require.Len(t, c.index, size)
+	require.Len(t, c.entries, size)
+
+	for i := size; i < size+overwrite; i++ {
+		value, _, ok := c.get(strconv.Itoa(i))
+		require.True(t, ok)
+		require.Equal(t, i*2, value.(int))
+	}
+}
+
+func (c *fifoCache) print() {
+	fmt.Println("first", c.first, "last", c.last)
+	for i, entry := range c.entries {
+		fmt.Printf("  %d -> key: %s, value: %v, next: %d, prev: %d\n", i, entry.key, entry.value, entry.next, entry.prev)
+	}
+}

From d6fd4ca3a79aafa799db7a424806e57e9e1dc472 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 16 Jul 2018 12:33:44 +0100
Subject: [PATCH 111/660] Skip index queries for high cardinality labels.

Firstly, cache the length of index rows we query (by the hash and range key).  Secondly, fail for rows with > 100k, either because the cache told us so, or because we read them.  Finally, allow matchers to fail on cardinality errors but proceed with the query (as long as at least one matcher succeeds), and then filter results.

Notably, after this change, queries on two high-cardinality labels that would have results in a small number of series will fail.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go  | 10 ++++++++--
 series_store.go | 45 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 6c1b6abad6b42..21b6752b04ee4 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -52,8 +52,11 @@ func init() {
 type StoreConfig struct {
 	CacheConfig cache.Config
 
-	MinChunkAge     time.Duration
-	QueryChunkLimit int
+	MinChunkAge              time.Duration
+	QueryChunkLimit          int
+	CardinalityCacheSize     int
+	CardinalityCacheValidity time.Duration
+	CardinalityLimit         int
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -61,6 +64,9 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.CacheConfig.RegisterFlags(f)
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
+	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
+	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
+	f.IntVar(&cfg.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.")
 }
 
 // store implements Store
diff --git a/series_store.go b/series_store.go
index 48f0f477b3142..4d3fd47646d68 100644
--- a/series_store.go
+++ b/series_store.go
@@ -2,7 +2,9 @@ package chunk
 
 import (
 	"context"
+	"errors"
 	"fmt"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/common/model"
@@ -13,9 +15,14 @@ import (
 	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
+var (
+	errCardinalityExceeded = errors.New("cardinality limit exceeded")
+)
+
 // seriesStore implements Store
 type seriesStore struct {
 	store
+	cardinalityCache *fifoCache
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
@@ -31,6 +38,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 			schema:       schema,
 			chunkFetcher: fetcher,
 		},
+		cardinalityCache: newFifoCache(cfg.CardinalityCacheSize),
 	}, nil
 }
 
@@ -49,7 +57,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 
 	// Ensure this query includes a metric name.
-	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
+	metricNameMatcher, allMatchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
 	if !ok || metricNameMatcher.Type != labels.MatchEqual {
 		return nil, fmt.Errorf("query must contain metric name")
 	}
@@ -57,7 +65,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 
 	// Fetch the series IDs from the index, based on non-empty matchers from
 	// the query.
-	filters, matchers := util.SplitFiltersAndMatchers(matchers)
+	_, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricNameMatcher.Value, matchers)
 	if err != nil {
 		return nil, err
@@ -94,7 +102,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 
 	// Filter out chunks based on the empty matchers in the query.
-	filteredChunks := filterChunksByMatchers(allChunks, filters)
+	filteredChunks := filterChunksByMatchers(allChunks, allMatchers)
 	return filteredChunks, nil
 }
 
@@ -124,6 +132,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	// Receive chunkSets from all matchers
 	var ids []string
 	var lastErr error
+	var cardinalityExceededErrors int
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingIDs:
@@ -133,10 +142,16 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 				ids = intersectStrings(ids, incoming)
 			}
 		case err := <-incomingErrors:
-			lastErr = err
+			if err == errCardinalityExceeded {
+				cardinalityExceededErrors++
+			} else {
+				lastErr = err
+			}
 		}
 	}
-	if lastErr != nil {
+	if cardinalityExceededErrors == len(matchers) {
+		return nil, errCardinalityExceeded
+	} else if lastErr != nil {
 		return nil, lastErr
 	}
 
@@ -166,12 +181,32 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 	level.Debug(log).Log("queries", len(queries))
 
+	for _, query := range queries {
+		value, updated, ok := c.cardinalityCache.get(query.HashValue)
+		if !ok {
+			continue
+		}
+		entryAge := time.Now().Sub(updated)
+		cardinality := value.(int)
+		if entryAge < c.cfg.CardinalityCacheValidity && cardinality > c.cfg.CardinalityLimit {
+			return nil, errCardinalityExceeded
+		}
+	}
+
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
 	if err != nil {
 		return nil, err
 	}
 	level.Debug(log).Log("entries", len(entries))
 
+	// TODO This is not correct, will overcount for queries > 24hrs
+	for _, query := range queries {
+		c.cardinalityCache.put(query.HashValue, len(entries))
+	}
+	if len(entries) > c.cfg.CardinalityLimit {
+		return nil, errCardinalityExceeded
+	}
+
 	ids, err := c.parseIndexEntries(ctx, entries, matcher)
 	if err != nil {
 		return nil, err

From fcc0f5cea066284a53656088016ccb36b1a4c2cc Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 16 Aug 2018 11:55:07 +0100
Subject: [PATCH 112/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 schema_config.go |  2 +-
 series_store.go  | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 89b14e316aab3..3d777cf58321a 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -60,7 +60,7 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema (Deprecated).")
 	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema (Deprecated).")
-	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The data (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
+	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
 
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
diff --git a/series_store.go b/series_store.go
index 4d3fd47646d68..a635f9af357eb 100644
--- a/series_store.go
+++ b/series_store.go
@@ -79,13 +79,6 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 	level.Debug(log).Log("Chunk IDs", len(chunkIDs))
 
-	// Protect ourselves against OOMing.
-	if len(chunkIDs) > c.cfg.QueryChunkLimit {
-		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), c.cfg.QueryChunkLimit)
-		level.Error(log).Log("err", err)
-		return nil, err
-	}
-
 	// Filter out chunks that are not in the selected time range.
 	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
 	if err != nil {
@@ -94,6 +87,13 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("Chunks post filtering", len(chunks))
 
+	// Protect ourselves against OOMing.
+	if len(chunkIDs) > c.cfg.QueryChunkLimit {
+		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), c.cfg.QueryChunkLimit)
+		level.Error(log).Log("err", err)
+		return nil, err
+	}
+
 	// Now fetch the actual chunk data from Memcache / S3
 	allChunks, err := c.fetchChunks(ctx, filtered, keys)
 	if err != nil {

From 83fe29edb4ecaf761de717e44a6168bf2fc431fb Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 30 Jul 2018 11:38:44 +0530
Subject: [PATCH 113/660] Name and finish spans properly

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store_utils.go | 4 ++--
 series_store.go      | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 86694e6cd373b..5506ffeb957c4 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -49,7 +49,7 @@ type spanLogger struct {
 }
 
 func newSpanLogger(ctx context.Context, method string, kvps ...interface{}) (*spanLogger, context.Context) {
-	span, ctx := ot.StartSpanFromContext(ctx, "ChunkStore.Get")
+	span, ctx := ot.StartSpanFromContext(ctx, method)
 	logger := &spanLogger{
 		Logger: log.With(util.WithContext(ctx, util.Logger), "method", method),
 		Span:   span,
@@ -94,7 +94,7 @@ func (c *chunkFetcher) Stop() {
 }
 
 func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
+	log, ctx := newSpanLogger(ctx, "ChunkStore.fetchChunks")
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
diff --git a/series_store.go b/series_store.go
index a635f9af357eb..3f007844330c6 100644
--- a/series_store.go
+++ b/series_store.go
@@ -108,7 +108,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
-	log.Finish()
+	defer log.Span.Finish()
 
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
@@ -218,6 +218,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 
 func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, seriesIDs []string) ([]string, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksBySeries")
+	defer log.Span.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {

From d1168b615c49cd81ef332557248d55d9222646a2 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 20 Aug 2018 15:14:10 +0100
Subject: [PATCH 114/660] Parallelise the decoding of chunks coming from the
 cache.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go  | 27 +++++++------
 chunk_store_test.go  |  4 +-
 chunk_store_utils.go | 90 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 92 insertions(+), 29 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index b0df014dd234b..943744bedd09b 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -15,11 +15,10 @@ import (
 	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 )
 
+const userID = "1"
+
 func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
-	const (
-		userID   = "1"
-		chunkLen = 13 * 3600 // in seconds
-	)
+	const chunkLen = 13 * 3600 // in seconds
 
 	// put 100 chunks from 0 to 99
 	keys := []string{}
@@ -67,10 +66,11 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 		require.Len(t, bufs, 1)
 		require.Len(t, missingKeys, 0)
 
-		foundChunks, missing, err := chunk.ProcessCacheResponse([]chunk.Chunk{chunks[index]}, found, bufs)
+		c, err := chunk.ParseExternalKey(userID, found[0])
+		require.NoError(t, err)
+		err = c.Decode(chunk.NewDecodeContext(), bufs[0])
 		require.NoError(t, err)
-		require.Empty(t, missing)
-		require.Equal(t, chunks[index], foundChunks[0])
+		require.Equal(t, c, chunks[index])
 	}
 }
 
@@ -82,10 +82,15 @@ func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []
 	require.Len(t, bufs, len(keys))
 	require.Len(t, missingKeys, 0)
 
-	foundChunks, missing, err := chunk.ProcessCacheResponse(chunks, found, bufs)
-	require.NoError(t, err)
-	require.Empty(t, missing)
-	require.Equal(t, chunks, foundChunks)
+	result := []chunk.Chunk{}
+	for i := range found {
+		c, err := chunk.ParseExternalKey(userID, found[i])
+		require.NoError(t, err)
+		err = c.Decode(chunk.NewDecodeContext(), bufs[i])
+		require.NoError(t, err)
+		result = append(result, c)
+	}
+	require.Equal(t, chunks, result)
 }
 
 func testCacheMiss(t *testing.T, cache cache.Cache) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index dd9b403fb79f1..26fdc0509a2f0 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -461,9 +461,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		}
 
 		chunks, err := store.Get(ctx, startTime, endTime, matchers...)
-		if err != nil {
-			t.Fatal(t, err)
-		}
+		require.NoError(t, err)
 
 		// We need to check that each chunk is in the time range
 		for _, chunk := range chunks {
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 5506ffeb957c4..a54ec882ea4ab 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"sync"
 
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
@@ -15,6 +16,8 @@ import (
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
+const chunkDecodeParallelism = 16
+
 func filterChunksByTime(from, through model.Time, chunks []Chunk) ([]Chunk, []string) {
 	filtered := make([]Chunk, 0, len(chunks))
 	keys := make([]string, 0, len(chunks))
@@ -75,6 +78,19 @@ func (s *spanLogger) Log(kvps ...interface{}) error {
 type chunkFetcher struct {
 	storage StorageClient
 	cache   cache.Cache
+
+	wait           sync.WaitGroup
+	decodeRequests chan decodeRequest
+}
+
+type decodeRequest struct {
+	chunk     Chunk
+	buf       []byte
+	responses chan decodeResponse
+}
+type decodeResponse struct {
+	chunk Chunk
+	err   error
 }
 
 func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, error) {
@@ -83,16 +99,40 @@ func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, er
 		return nil, err
 	}
 
-	return &chunkFetcher{
+	c := &chunkFetcher{
 		storage: storage,
 		cache:   cache,
-	}, nil
+	}
+
+	c.wait.Add(chunkDecodeParallelism)
+	for i := 0; i < chunkDecodeParallelism; i++ {
+		go c.worker()
+	}
+
+	return c, nil
 }
 
 func (c *chunkFetcher) Stop() {
+	close(c.decodeRequests)
+	c.wait.Wait()
 	c.cache.Stop()
 }
 
+func (c *chunkFetcher) worker() {
+	defer c.wait.Done()
+	decodeContext := NewDecodeContext()
+	for req := range c.decodeRequests {
+		err := req.chunk.Decode(decodeContext, req.buf)
+		if err != nil {
+			cacheCorrupt.Inc()
+		}
+		req.responses <- decodeResponse{
+			chunk: req.chunk,
+			err:   err,
+		}
+	}
+}
+
 func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.fetchChunks")
 	defer log.Span.Finish()
@@ -103,7 +143,7 @@ func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []s
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
 
-	fromCache, missing, err := ProcessCacheResponse(chunks, cacheHits, cacheBufs)
+	fromCache, missing, err := c.processCacheResponse(chunks, cacheHits, cacheBufs)
 	if err != nil {
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
@@ -138,8 +178,12 @@ func (c *chunkFetcher) writeBackCache(ctx context.Context, chunks []Chunk) error
 
 // ProcessCacheResponse decodes the chunks coming back from the cache, separating
 // hits and misses.
-func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found []Chunk, missing []Chunk, err error) {
-	decodeContext := NewDecodeContext()
+func (c *chunkFetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
+	var (
+		requests  = make([]decodeRequest, 0, len(keys))
+		responses = make(chan decodeResponse)
+		missing   []Chunk
+	)
 
 	i, j := 0, 0
 	for i < len(chunks) && j < len(keys) {
@@ -149,24 +193,40 @@ func ProcessCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) (found [
 			missing = append(missing, chunks[i])
 			i++
 		} else if chunkKey > keys[j] {
-			level.Debug(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
+			level.Error(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
 			j++
 		} else {
-			chunk := chunks[i]
-			err = chunk.Decode(decodeContext, bufs[j])
-			if err != nil {
-				cacheCorrupt.Inc()
-				return
-			}
-			found = append(found, chunk)
+			requests = append(requests, decodeRequest{
+				chunk: chunks[i],
+				buf:   bufs[j],
+			})
 			i++
 			j++
 		}
 	}
-
 	for ; i < len(chunks); i++ {
 		missing = append(missing, chunks[i])
 	}
 
-	return
+	go func() {
+		for _, request := range requests {
+			c.decodeRequests <- request
+		}
+	}()
+
+	var (
+		err   error
+		found []Chunk
+	)
+	for i := 0; i < len(requests); i++ {
+		response := <-responses
+
+		// Don't exit early, as we don't want to block the workers.
+		if response.err != nil {
+			err = response.err
+		} else {
+			found = append(found, response.chunk)
+		}
+	}
+	return found, missing, err
 }

From 0de080a35faf6f79c4f235909a4e331dd67f9494 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 21 Aug 2018 13:46:54 +0100
Subject: [PATCH 115/660] Don't leak DecodeContexts.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go       | 2 +-
 chunk_store_test.go  | 4 ++++
 chunk_store_utils.go | 5 +++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 21b6752b04ee4..ea0a32e6b3317 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -94,7 +94,7 @@ func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, err
 
 // Stop any background goroutines (ie in the cache.)
 func (c *store) Stop() {
-	c.cache.Stop()
+	c.chunkFetcher.Stop()
 }
 
 // Put implements ChunkStore
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 26fdc0509a2f0..5171aacfc6188 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -207,6 +207,7 @@ func TestChunkStore_Get(t *testing.T) {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+				defer store.Stop()
 
 				if err := store.Put(ctx, []Chunk{
 					fooChunk1,
@@ -324,6 +325,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
 				t.Log("========= Running query", tc.query, "with schema", schema.name)
 				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+				defer store.Stop()
 
 				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
 					t.Fatal(err)
@@ -359,6 +361,7 @@ func TestChunkStoreRandom(t *testing.T) {
 	for _, schema := range schemas {
 		t.Run(schema.name, func(t *testing.T) {
 			store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+			defer store.Stop()
 
 			// put 100 chunks from 0 to 99
 			const chunkLen = 13 * 3600 // in seconds
@@ -422,6 +425,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
 	ctx := user.InjectOrgID(context.Background(), userID)
 	store := newTestChunkStore(t, v6Schema, newStore)
+	defer store.Stop()
 
 	// Put 24 chunks 1hr chunks in the store
 	const chunkLen = 60 // in seconds
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index a54ec882ea4ab..1d9c9240b5690 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -100,8 +100,9 @@ func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, er
 	}
 
 	c := &chunkFetcher{
-		storage: storage,
-		cache:   cache,
+		storage:        storage,
+		cache:          cache,
+		decodeRequests: make(chan decodeRequest),
 	}
 
 	c.wait.Add(chunkDecodeParallelism)

From bf4478fb1e83a0a073b72fdb2061ddf81eda3cd1 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 21 Aug 2018 19:14:04 +0100
Subject: [PATCH 116/660] Add test; add missing channel.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache.go       |  7 +++++++
 cache/cache_test.go  | 36 +++++++++++++++++++++++++++++++++---
 chunk_store.go       | 16 ++++++++--------
 chunk_store_utils.go | 34 +++++++++++++++++++++-------------
 series_store.go      | 12 ++++++------
 5 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index cb832a02cd3b1..440f913f13a8d 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -20,6 +20,9 @@ type Config struct {
 	memcache       MemcachedConfig
 	memcacheClient MemcachedClientConfig
 	diskcache      DiskcacheConfig
+
+	// For tests to inject specific implementations.
+	Cache Cache
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -34,6 +37,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 
 // New creates a new Cache using Config.
 func New(cfg Config) (Cache, error) {
+	if cfg.Cache != nil {
+		return cfg.Cache, nil
+	}
+
 	caches := []Cache{}
 
 	if cfg.EnableDiskcache {
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 943744bedd09b..9da25e2af0037 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -5,6 +5,7 @@ import (
 	"math/rand"
 	"os"
 	"path"
+	"sort"
 	"strconv"
 	"testing"
 
@@ -93,6 +94,26 @@ func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []
 	require.Equal(t, chunks, result)
 }
 
+func testChunkFetcher(t *testing.T, c cache.Cache, keys []string, chunks []chunk.Chunk) {
+	fetcher, err := chunk.NewChunkFetcher(cache.Config{
+		Cache: c,
+	}, nil)
+	require.NoError(t, err)
+	defer fetcher.Stop()
+
+	found, err := fetcher.FetchChunks(context.Background(), chunks, keys)
+	require.NoError(t, err)
+	sort.Sort(byExternalKey(found))
+	sort.Sort(byExternalKey(chunks))
+	require.Equal(t, chunks, found)
+}
+
+type byExternalKey []chunk.Chunk
+
+func (a byExternalKey) Len() int           { return len(a) }
+func (a byExternalKey) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+func (a byExternalKey) Less(i, j int) bool { return a[i].ExternalKey() < a[j].ExternalKey() }
+
 func testCacheMiss(t *testing.T, cache cache.Cache) {
 	for i := 0; i < 100; i++ {
 		key := strconv.Itoa(rand.Int())
@@ -106,9 +127,18 @@ func testCacheMiss(t *testing.T, cache cache.Cache) {
 
 func testCache(t *testing.T, cache cache.Cache) {
 	keys, chunks := fillCache(t, cache)
-	testCacheSingle(t, cache, keys, chunks)
-	testCacheMultiple(t, cache, keys, chunks)
-	testCacheMiss(t, cache)
+	t.Run("Single", func(t *testing.T) {
+		testCacheSingle(t, cache, keys, chunks)
+	})
+	t.Run("Multiple", func(t *testing.T) {
+		testCacheMultiple(t, cache, keys, chunks)
+	})
+	t.Run("Miss", func(t *testing.T) {
+		testCacheMiss(t, cache)
+	})
+	t.Run("Fetcher", func(t *testing.T) {
+		testChunkFetcher(t, cache, keys, chunks)
+	})
 }
 
 func TestMemcache(t *testing.T) {
diff --git a/chunk_store.go b/chunk_store.go
index ea0a32e6b3317..3edfeb9cdc098 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -75,26 +75,26 @@ type store struct {
 
 	storage StorageClient
 	schema  Schema
-	*chunkFetcher
+	*Fetcher
 }
 
 func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
-	fetcher, err := newChunkFetcher(cfg.CacheConfig, storage)
+	fetcher, err := NewChunkFetcher(cfg.CacheConfig, storage)
 	if err != nil {
 		return nil, err
 	}
 
 	return &store{
-		cfg:          cfg,
-		storage:      storage,
-		schema:       schema,
-		chunkFetcher: fetcher,
+		cfg:     cfg,
+		storage: storage,
+		schema:  schema,
+		Fetcher: fetcher,
 	}, nil
 }
 
 // Stop any background goroutines (ie in the cache.)
 func (c *store) Stop() {
-	c.chunkFetcher.Stop()
+	c.Fetcher.Stop()
 }
 
 // Put implements ChunkStore
@@ -243,7 +243,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
-	allChunks, err := c.fetchChunks(ctx, filtered, keys)
+	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		return nil, promql.ErrStorage(err)
 	}
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 1d9c9240b5690..8f91ebe660933 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -73,9 +73,10 @@ func (s *spanLogger) Log(kvps ...interface{}) error {
 	return nil
 }
 
-// chunkFetcher deals with fetching chunk contents from the cache/store,
-// and writing back any misses to the cache.
-type chunkFetcher struct {
+// Fetcher deals with fetching chunk contents from the cache/store,
+// and writing back any misses to the cache.  Also responsible for decoding
+// chunks from the cache, in parallel.
+type Fetcher struct {
 	storage StorageClient
 	cache   cache.Cache
 
@@ -93,13 +94,14 @@ type decodeResponse struct {
 	err   error
 }
 
-func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, error) {
+// NewChunkFetcher makes a new ChunkFetcher.
+func NewChunkFetcher(cfg cache.Config, storage StorageClient) (*Fetcher, error) {
 	cache, err := cache.New(cfg)
 	if err != nil {
 		return nil, err
 	}
 
-	c := &chunkFetcher{
+	c := &Fetcher{
 		storage:        storage,
 		cache:          cache,
 		decodeRequests: make(chan decodeRequest),
@@ -113,13 +115,14 @@ func newChunkFetcher(cfg cache.Config, storage StorageClient) (*chunkFetcher, er
 	return c, nil
 }
 
-func (c *chunkFetcher) Stop() {
+// Stop the ChunkFetcher.
+func (c *Fetcher) Stop() {
 	close(c.decodeRequests)
 	c.wait.Wait()
 	c.cache.Stop()
 }
 
-func (c *chunkFetcher) worker() {
+func (c *Fetcher) worker() {
 	defer c.wait.Done()
 	decodeContext := NewDecodeContext()
 	for req := range c.decodeRequests {
@@ -134,7 +137,8 @@ func (c *chunkFetcher) worker() {
 	}
 }
 
-func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
+// FetchChunks fetchers a set of chunks from cache and store.
+func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.fetchChunks")
 	defer log.Span.Finish()
 
@@ -149,7 +153,10 @@ func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []s
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
 
-	fromStorage, err := c.storage.GetChunks(ctx, missing)
+	var fromStorage []Chunk
+	if len(missing) > 0 {
+		fromStorage, err = c.storage.GetChunks(ctx, missing)
+	}
 
 	// Always cache any chunks we did get
 	if cacheErr := c.writeBackCache(ctx, fromStorage); cacheErr != nil {
@@ -164,7 +171,7 @@ func (c *chunkFetcher) fetchChunks(ctx context.Context, chunks []Chunk, keys []s
 	return allChunks, nil
 }
 
-func (c *chunkFetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
+func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 	for i := range chunks {
 		encoded, err := chunks[i].Encode()
 		if err != nil {
@@ -179,7 +186,7 @@ func (c *chunkFetcher) writeBackCache(ctx context.Context, chunks []Chunk) error
 
 // ProcessCacheResponse decodes the chunks coming back from the cache, separating
 // hits and misses.
-func (c *chunkFetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
+func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
 	var (
 		requests  = make([]decodeRequest, 0, len(keys))
 		responses = make(chan decodeResponse)
@@ -198,8 +205,9 @@ func (c *chunkFetcher) processCacheResponse(chunks []Chunk, keys []string, bufs
 			j++
 		} else {
 			requests = append(requests, decodeRequest{
-				chunk: chunks[i],
-				buf:   bufs[j],
+				chunk:     chunks[i],
+				buf:       bufs[j],
+				responses: responses,
 			})
 			i++
 			j++
diff --git a/series_store.go b/series_store.go
index 3f007844330c6..242f59116adef 100644
--- a/series_store.go
+++ b/series_store.go
@@ -26,17 +26,17 @@ type seriesStore struct {
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
-	fetcher, err := newChunkFetcher(cfg.CacheConfig, storage)
+	fetcher, err := NewChunkFetcher(cfg.CacheConfig, storage)
 	if err != nil {
 		return nil, err
 	}
 
 	return &seriesStore{
 		store: store{
-			cfg:          cfg,
-			storage:      storage,
-			schema:       schema,
-			chunkFetcher: fetcher,
+			cfg:     cfg,
+			storage: storage,
+			schema:  schema,
+			Fetcher: fetcher,
 		},
 		cardinalityCache: newFifoCache(cfg.CardinalityCacheSize),
 	}, nil
@@ -95,7 +95,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
-	allChunks, err := c.fetchChunks(ctx, filtered, keys)
+	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		level.Error(log).Log("err", err)
 		return nil, err

From 09314357ed676487a6cda6fa364c60a3d4dd9d46 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 17 Aug 2018 13:06:49 +0530
Subject: [PATCH 117/660] Add column key client support

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/fixtures.go       |  19 +++-
 gcp/storage_client.go | 237 ++++++++++++++++++++++++++++++++----------
 2 files changed, 195 insertions(+), 61 deletions(-)

diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 80e3fe8deb57c..d8339b379316e 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -22,6 +22,8 @@ const (
 type fixture struct {
 	srv  *bttest.Server
 	name string
+
+	columnKeyClient bool
 }
 
 func (f *fixture) Name() string {
@@ -60,13 +62,16 @@ func (f *fixture) Clients() (
 			Prefix: "chunks",
 		},
 	}
-	sClient = &storageClient{
-		schemaCfg: schemaConfig,
-		client:    client,
-	}
 	tClient = &tableClient{
 		client: adminClient,
 	}
+
+	if f.columnKeyClient {
+		sClient = newStorageClientColumnKey(Config{}, client, schemaConfig)
+	} else {
+		sClient = newStorageClientV1(Config{}, client, schemaConfig)
+	}
+
 	return
 }
 
@@ -78,6 +83,10 @@ func (f *fixture) Teardown() error {
 // Fixtures for unit testing GCP storage.
 var Fixtures = []testutils.Fixture{
 	&fixture{
-		name: "GCP",
+		name:            "GCP-ColumnKey",
+		columnKeyClient: true,
+	},
+	&fixture{
+		name: "GCPv1",
 	},
 }
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index b2d256b760af2..8ad705c75359e 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"hash/fnv"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
@@ -27,6 +28,8 @@ const (
 type Config struct {
 	project  string
 	instance string
+
+	ColumnKey bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -35,34 +38,87 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.instance, "bigtable.instance", "", "Bigtable instance ID.")
 }
 
-// storageClient implements chunk.storageClient for GCP.
-type storageClient struct {
+// NewStorageClient returns a new StorageClient.
+func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	if cfg.ColumnKey {
+		return NewStorageClientColumnKey(ctx, cfg, schemaCfg)
+	}
+	return NewStorageClientV1(ctx, cfg, schemaCfg)
+}
+
+// storageClientColumnKey implements chunk.storageClient for GCP.
+type storageClientColumnKey struct {
 	cfg       Config
 	schemaCfg chunk.SchemaConfig
 	client    *bigtable.Client
+	keysFn    keysFn
 }
 
-// NewStorageClient returns a new StorageClient.
-func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+// storageClientV1 implements chunk.storageClient for GCP.
+type storageClientV1 struct {
+	storageClientColumnKey
+}
+
+// NewStorageClientV1 returns a new v1 StorageClient.
+func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
-	return &storageClient{
+	return newStorageClientV1(cfg, client, schemaCfg), nil
+}
+
+func newStorageClientV1(cfg Config, client *bigtable.Client, schemaCfg chunk.SchemaConfig) *storageClientV1 {
+	return &storageClientV1{
+		storageClientColumnKey{
+			cfg:       cfg,
+			schemaCfg: schemaCfg,
+			client:    client,
+			keysFn: func(hashValue string, rangeValue []byte) (string, string) {
+				// TODO the hashValue should actually be hashed - but I have data written in
+				// this format, so we need to do a proper migration.
+				rowKey := hashValue + separator + string(rangeValue)
+				return rowKey, column
+			},
+		},
+	}
+}
+
+// NewStorageClientColumnKey returns a new v2 StorageClient.
+func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	if err != nil {
+		return nil, err
+	}
+
+	return newStorageClientColumnKey(cfg, client, schemaCfg), nil
+}
+
+func newStorageClientColumnKey(cfg Config, client *bigtable.Client, schemaCfg chunk.SchemaConfig) *storageClientColumnKey {
+	return &storageClientColumnKey{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		client:    client,
-	}, nil
+		keysFn: func(hashValue string, rangeValue []byte) (string, string) {
+			// We are hashing the hash value to improve distribution of keys.
+			return hashKey(hashValue), string(rangeValue)
+		},
+	}
 }
 
-func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
+func (s *storageClientColumnKey) NewWriteBatch() chunk.WriteBatch {
 	return bigtableWriteBatch{
 		tables: map[string]map[string]*bigtable.Mutation{},
+		keysFn: s.keysFn,
 	}
 }
 
+// keysFn returns the row and column keys for the given hash and range keys.
+type keysFn func(hashValue string, rangeValue []byte) (rowKey, columnKey string)
+
 type bigtableWriteBatch struct {
 	tables map[string]map[string]*bigtable.Mutation
+	keysFn keysFn
 }
 
 func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
@@ -72,19 +128,17 @@ func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 		b.tables[tableName] = rows
 	}
 
-	// TODO the hashValue should actually be hashed - but I have data written in
-	// this format, so we need to do a proper migration.
-	rowKey := hashValue + separator + string(rangeValue)
+	rowKey, columnKey := b.keysFn(hashValue, rangeValue)
 	mutation, ok := rows[rowKey]
 	if !ok {
 		mutation = bigtable.NewMutation()
 		rows[rowKey] = mutation
 	}
 
-	mutation.Set(columnFamily, column, 0, value)
+	mutation.Set(columnFamily, columnKey, 0, value)
 }
 
-func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
 	bigtableBatch := batch.(bigtableWriteBatch)
 
 	for tableName, rows := range bigtableBatch.tables {
@@ -110,75 +164,72 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
 	table := s.client.Open(query.TableName)
 
-	var rowRange bigtable.RowRange
-
-	/* BigTable only seems to support regex match on cell values, so doing it
-	   client side for now
-	readOpts := []bigtable.ReadOption{
+	rOpts := []bigtable.ReadOption{
 		bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)),
 	}
-	if query.ValueEqual != nil {
-		readOpts = append(readOpts, bigtable.RowFilter(bigtable.ValueFilter(string(query.ValueEqual))))
-	}
-	*/
 
 	if len(query.RangeValuePrefix) > 0 {
-		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
+		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnFilter(string(query.RangeValuePrefix)+".*"))) // TODO: Check again and anchor.
 	} else if len(query.RangeValueStart) > 0 {
-		rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+string('\xff'))
-	} else {
-		rowRange = bigtable.PrefixRange(query.HashValue + separator)
+		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValueStart), "")))
 	}
+	hashValue := hashKey(query.HashValue)
 
-	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
-		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(bigtableReadBatch(r))
-		}
-		return true
-	})
+	r, err := table.ReadRow(ctx, hashValue, rOpts...)
 	if err != nil {
 		sp.LogFields(otlog.String("error", err.Error()))
 		return errors.WithStack(err)
 	}
+
+	val, ok := r[columnFamily]
+	if !ok {
+		panic("bad response from bigtable, columnFamily missing")
+	}
+
+	if query.ValueEqual != nil {
+		filteredItems := make([]bigtable.ReadItem, 0, len(val))
+		for _, item := range val {
+			if bytes.Equal(query.ValueEqual, item.Value) {
+				filteredItems = append(filteredItems, item)
+			}
+		}
+
+		val = filteredItems
+	}
+	callback(bigtableReadBatchColumnKey{
+		items:        val,
+		columnPrefix: columnFamily + ":",
+	})
 	return nil
 }
 
-// bigtableReadBatch represents a batch of rows read from Bigtable.  As the
-// bigtable interface gives us rows one-by-one, a batch always only contains
-// a single row.
-type bigtableReadBatch bigtable.Row
+// bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
+type bigtableReadBatchColumnKey struct {
+	items        []bigtable.ReadItem
+	columnPrefix string
+}
 
-func (bigtableReadBatch) Len() int {
-	return 1
+func (b bigtableReadBatchColumnKey) Len() int {
+	return len(b.items)
 }
 
-func (b bigtableReadBatch) RangeValue(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
-	// String before the first separator is the hashkey
-	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
-	return []byte(parts[1])
+func (b bigtableReadBatchColumnKey) RangeValue(index int) []byte {
+	return []byte(
+		strings.TrimPrefix(b.items[index].Column, b.columnPrefix),
+	)
 }
 
-func (b bigtableReadBatch) Value(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
-	cf, ok := b[columnFamily]
-	if !ok || len(cf) != 1 {
-		panic("bad response from bigtable")
-	}
-	return cf[0].Value
+func (b bigtableReadBatchColumnKey) Value(index int) []byte {
+	return b.items[index].Value
 }
 
-func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	keys := map[string][]string{}
 	muts := map[string][]*bigtable.Mutation{}
 
@@ -212,7 +263,7 @@ func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	return nil
 }
 
-func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(input)))
@@ -291,3 +342,77 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 
 	return output, nil
 }
+
+func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
+	defer sp.Finish()
+
+	table := s.client.Open(query.TableName)
+
+	var rowRange bigtable.RowRange
+
+	/* BigTable only seems to support regex match on cell values, so doing it
+	   client side for now
+	readOpts := []bigtable.ReadOption{
+		bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)),
+	}
+	if query.ValueEqual != nil {
+		readOpts = append(readOpts, bigtable.RowFilter(bigtable.ValueFilter(string(query.ValueEqual))))
+	}
+	*/
+
+	if len(query.RangeValuePrefix) > 0 {
+		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
+	} else if len(query.RangeValueStart) > 0 {
+		rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+string('\xff'))
+	} else {
+		rowRange = bigtable.PrefixRange(query.HashValue + separator)
+	}
+
+	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
+		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
+			return callback(bigtableReadBatchV1(r))
+		}
+
+		return true
+	})
+	if err != nil {
+		sp.LogFields(otlog.String("error", err.Error()))
+		return errors.WithStack(err)
+	}
+	return nil
+}
+
+// bigtableReadBatchV1 represents a batch of rows read from Bigtable.  As the
+// bigtable interface gives us rows one-by-one, a batch always only contains
+// a single row.
+type bigtableReadBatchV1 bigtable.Row
+
+func (bigtableReadBatchV1) Len() int {
+	return 1
+}
+func (b bigtableReadBatchV1) RangeValue(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	// String before the first separator is the hashkey
+	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
+	return []byte(parts[1])
+}
+func (b bigtableReadBatchV1) Value(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	cf, ok := b[columnFamily]
+	if !ok || len(cf) != 1 {
+		panic("bad response from bigtable")
+	}
+	return cf[0].Value
+}
+
+func hashKey(key string) string {
+	hasher := fnv.New64a()
+	hasher.Write([]byte(key))
+	hashedKey := string(hasher.Sum(nil))
+	return hashedKey + key // For maintaining uniqueness.
+}

From 3d0faf2b74ac5a7de2f40259b3dd4885994ca033 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 17 Aug 2018 13:07:34 +0530
Subject: [PATCH 118/660] Add plumbing for column key client

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 composite_store.go      | 170 ++++++++++++++++++++++++++++------------
 composite_store_test.go | 156 ++++++++++++++++++++++++++++++++++++
 gcp/storage_client.go   |   6 +-
 schema_config.go        |  18 +++--
 storage/factory.go      |  27 ++++++-
 5 files changed, 314 insertions(+), 63 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index 5bad4e02fbb2c..c68ad1bb20858 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -2,7 +2,6 @@ package chunk
 
 import (
 	"context"
-	"fmt"
 	"sort"
 
 	"github.com/prometheus/common/model"
@@ -34,84 +33,157 @@ func (a byStart) Len() int           { return len(a) }
 func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
 func (a byStart) Less(i, j int) bool { return a[i].start < a[j].start }
 
-// NewStore creates a new Store which delegates to different stores depending
-// on time.
-func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storage StorageClient) (Store, error) {
-	store, err := newStore(cfg, v1Schema(schemaCfg), storage)
-	if err != nil {
-		return nil, err
-	}
+// SchemaOpt stores when a schema starts.
+type SchemaOpt struct {
+	From     model.Time
+	NewStore func(StorageClient) (Store, error)
+}
 
-	stores := []compositeStoreEntry{
-		{0, store},
-	}
+// SchemaOpts returns the schemas and the times when they activate.
+func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
+	opts := []SchemaOpt{{
+		From: 0,
+		NewStore: func(storage StorageClient) (Store, error) {
+			return newStore(cfg, v1Schema(schemaCfg), storage)
+		},
+	}}
 
 	if schemaCfg.DailyBucketsFrom.IsSet() {
-		store, err := newStore(cfg, v2Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.DailyBucketsFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.DailyBucketsFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v2Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.Base64ValuesFrom.IsSet() {
-		store, err := newStore(cfg, v3Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.Base64ValuesFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.Base64ValuesFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v3Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.V4SchemaFrom.IsSet() {
-		store, err := newStore(cfg, v4Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V4SchemaFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V4SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v4Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.V5SchemaFrom.IsSet() {
-		store, err := newStore(cfg, v5Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V5SchemaFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V5SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v5Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.V6SchemaFrom.IsSet() {
-		store, err := newStore(cfg, v6Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V6SchemaFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V6SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v6Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.V7SchemaFrom.IsSet() {
-		store, err := newStore(cfg, v7Schema(schemaCfg), storage)
-		if err != nil {
-			return nil, err
-		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V7SchemaFrom.Time, store})
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V7SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v7Schema(schemaCfg), storage)
+			},
+		})
 	}
 
 	if schemaCfg.V8SchemaFrom.IsSet() {
-		store, err := newStore(cfg, v8Schema(schemaCfg), storage)
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V8SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newStore(cfg, v8Schema(schemaCfg), storage)
+			},
+		})
+	}
+
+	if schemaCfg.V9SchemaFrom.IsSet() {
+		opts = append(opts, SchemaOpt{
+			From: schemaCfg.V9SchemaFrom.Time,
+			NewStore: func(storage StorageClient) (Store, error) {
+				return newSeriesStore(cfg, v9Schema(schemaCfg), storage)
+			},
+		})
+	}
+
+	return opts
+}
+
+// StorageOpt stores when a StorageClient is to be used.
+type StorageOpt struct {
+	From   model.Time
+	Client StorageClient
+}
+
+// NewStore creates a new Store which delegates to different stores depending
+// on time.
+func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, schemaOpts []SchemaOpt, storageOpts []StorageOpt) (Store, error) {
+	stores := []compositeStoreEntry{}
+	from := schemaOpts[0].From
+
+	i, j := 0, 0
+	for i+1 < len(schemaOpts) && j+1 < len(storageOpts) {
+		currSchema := schemaOpts[i]
+		currStorage := storageOpts[j]
+
+		nextSchema := schemaOpts[i+1]
+		nextStorage := storageOpts[j+1]
+
+		store, err := currSchema.NewStore(currStorage.Client)
 		if err != nil {
 			return nil, err
 		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V8SchemaFrom.Time, store})
+		stores = append(stores, compositeStoreEntry{from, store})
+
+		if nextSchema.From.Before(nextStorage.From) {
+			from = nextSchema.From
+			i++
+		} else if nextSchema.From.After(nextStorage.From) {
+			from = nextStorage.From
+			j++
+		} else {
+			from = nextSchema.From
+			i++
+			j++
+		}
 	}
 
-	if schemaCfg.V9SchemaFrom.IsSet() {
-		store, err := newSeriesStore(cfg, v9Schema(schemaCfg), storage)
+	// Now cover the remaining schemas and storages.
+	for i < len(schemaOpts) && j < len(storageOpts) {
+		store, err := schemaOpts[i].NewStore(storageOpts[j].Client)
 		if err != nil {
 			return nil, err
 		}
-		stores = append(stores, compositeStoreEntry{schemaCfg.V9SchemaFrom.Time, store})
-	}
+		stores = append(stores, compositeStoreEntry{from, store})
+
+		// No more storageOpts.
+		if j+1 == len(storageOpts) {
+			i++
+			if i < len(schemaOpts) {
+				from = schemaOpts[i].From
+			}
+			continue
+		}
 
-	if !sort.IsSorted(byStart(stores)) {
-		return nil, fmt.Errorf("schemas not in time-sorted order")
+		// No more schemaOpts. No comparison as we'll enter this after the exit of previous loop.
+		j++
+		if j < len(storageOpts) {
+			from = storageOpts[j].From
+		}
 	}
 
 	return compositeStore{stores}, nil
diff --git a/composite_store_test.go b/composite_store_test.go
index 3bb1cb2cb657e..a344835c8833d 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 )
 
@@ -169,3 +170,158 @@ func TestCompositeStore(t *testing.T) {
 		})
 	}
 }
+
+type dummyStoreAndClient struct {
+	client        StorageClient
+	schemaVersion int
+}
+
+func (dummyStoreAndClient) Put(ctx context.Context, chunks []Chunk) error { return nil }
+func (dummyStoreAndClient) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	return nil
+}
+func (dummyStoreAndClient) Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+	return nil, nil
+}
+func (dummyStoreAndClient) Stop() {}
+
+func (dummyStoreAndClient) NewWriteBatch() WriteBatch                    { return nil }
+func (dummyStoreAndClient) BatchWrite(context.Context, WriteBatch) error { return nil }
+func (dummyStoreAndClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
+	return nil
+}
+func (dummyStoreAndClient) PutChunks(ctx context.Context, chunks []Chunk) error { return nil }
+func (dummyStoreAndClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
+	return nil, nil
+}
+
+func TestNewStoreTimeConvergence(t *testing.T) {
+	oldClient, newClient := dummyStoreAndClient{schemaVersion: -1}, dummyStoreAndClient{schemaVersion: -2} // Just to make sure they are not equal.
+
+	type expectation struct {
+		time          int64
+		schemaVersion int
+		client        dummyStoreAndClient
+	}
+
+	testcases := []struct {
+		schemaTimes  []int64
+		storageTimes []int64
+
+		exp []expectation
+	}{
+		{
+			schemaTimes:  []int64{0, 10, 20},
+			storageTimes: []int64{0},
+
+			exp: []expectation{
+				{
+					time:          0,
+					schemaVersion: 0,
+					client:        oldClient,
+				},
+				{
+					time:          10,
+					schemaVersion: 1,
+					client:        oldClient,
+				},
+				{
+					time:          20,
+					schemaVersion: 2,
+					client:        oldClient,
+				},
+			},
+		},
+		{
+			schemaTimes:  []int64{0, 10, 20, 40},
+			storageTimes: []int64{0, 30},
+
+			exp: []expectation{
+				{
+					time:          0,
+					schemaVersion: 0,
+					client:        oldClient,
+				},
+				{
+					time:          10,
+					schemaVersion: 1,
+					client:        oldClient,
+				},
+				{
+					time:          20,
+					schemaVersion: 2,
+					client:        oldClient,
+				},
+				{
+					time:          30,
+					schemaVersion: 2,
+					client:        newClient,
+				},
+				{
+					time:          40,
+					schemaVersion: 3,
+					client:        newClient,
+				},
+			},
+		},
+		{
+			schemaTimes:  []int64{0, 10, 20, 40},
+			storageTimes: []int64{0, 20},
+
+			exp: []expectation{
+				{
+					time:          0,
+					schemaVersion: 0,
+					client:        oldClient,
+				},
+				{
+					time:          10,
+					schemaVersion: 1,
+					client:        oldClient,
+				},
+				{
+					time:          20,
+					schemaVersion: 2,
+					client:        newClient,
+				},
+				{
+					time:          40,
+					schemaVersion: 3,
+					client:        newClient,
+				},
+			},
+		},
+	}
+
+	for _, testcase := range testcases {
+		storageOpts := []StorageOpt{{model.TimeFromUnix(testcase.storageTimes[0]), oldClient}}
+		if len(testcase.storageTimes) > 1 {
+			storageOpts = append(storageOpts, StorageOpt{model.TimeFromUnix(testcase.storageTimes[1]), newClient})
+		}
+
+		schemaOpts := make([]SchemaOpt, 0, len(testcase.schemaTimes))
+		for i, schemaTime := range testcase.schemaTimes {
+			store := dummyStoreAndClient{schemaVersion: i}
+
+			schemaOpts = append(schemaOpts, SchemaOpt{
+				model.TimeFromUnix(schemaTime),
+				func(storage StorageClient) (Store, error) {
+					store.client = storage
+					return store, nil
+				},
+			})
+		}
+
+		store, err := NewStore(StoreConfig{}, SchemaConfig{}, schemaOpts, storageOpts)
+		require.NoError(t, err)
+		cs := store.(compositeStore)
+		require.Equal(t, len(testcase.exp), len(cs.stores))
+
+		for i, store := range cs.stores {
+			require.Equal(t, model.TimeFromUnix(testcase.exp[i].time), store.start)
+			require.Equal(t, testcase.exp[i].schemaVersion, store.Store.(dummyStoreAndClient).schemaVersion)
+			require.Equal(t, testcase.exp[i].client, store.Store.(dummyStoreAndClient).client)
+		}
+	}
+
+}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 8ad705c75359e..b1971986fb2f5 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -28,8 +28,6 @@ const (
 type Config struct {
 	project  string
 	instance string
-
-	ColumnKey bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -39,8 +37,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewStorageClient returns a new StorageClient.
-func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
-	if cfg.ColumnKey {
+func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig, bigtableColumnKey bool) (chunk.StorageClient, error) {
+	if bigtableColumnKey {
 		return NewStorageClientColumnKey(ctx, cfg, schemaCfg)
 	}
 	return NewStorageClientV1(ctx, cfg, schemaCfg)
diff --git a/schema_config.go b/schema_config.go
index 3d777cf58321a..0cf2993cb1271 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -23,14 +23,15 @@ const (
 type SchemaConfig struct {
 	// After midnight on this day, we start bucketing indexes by day instead of by
 	// hour.  Only the day matters, not the time within the day.
-	DailyBucketsFrom util.DayValue
-	Base64ValuesFrom util.DayValue
-	V4SchemaFrom     util.DayValue
-	V5SchemaFrom     util.DayValue
-	V6SchemaFrom     util.DayValue
-	V7SchemaFrom     util.DayValue
-	V8SchemaFrom     util.DayValue
-	V9SchemaFrom     util.DayValue
+	DailyBucketsFrom      util.DayValue
+	Base64ValuesFrom      util.DayValue
+	V4SchemaFrom          util.DayValue
+	V5SchemaFrom          util.DayValue
+	V6SchemaFrom          util.DayValue
+	V7SchemaFrom          util.DayValue
+	V8SchemaFrom          util.DayValue
+	V9SchemaFrom          util.DayValue
+	BigtableColumnKeyFrom util.DayValue
 
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
 	ThroughputUpdatesDisabled bool
@@ -61,6 +62,7 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema (Deprecated).")
 	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema (Deprecated).")
 	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
+	f.Var(&cfg.BigtableColumnKeyFrom, "bigtable.column-key-from", "The date (in the format YYYY-MM-DD) after which we use bigtable column keys.")
 
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
diff --git a/storage/factory.go b/storage/factory.go
index b48b0d2f502a5..c7cd968a79f30 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -7,6 +7,8 @@ import (
 	"strings"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/aws"
 	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
@@ -31,7 +33,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewStorageClient makes a storage client based on the configuration.
-func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig, bigtableColumnKey bool) (chunk.StorageClient, error) {
 	switch cfg.StorageClient {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
@@ -42,7 +44,7 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageCl
 		}
 		return aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
-		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg, bigtableColumnKey)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:
@@ -69,3 +71,24 @@ func NewTableClient(cfg Config) (chunk.TableClient, error) {
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
 	}
 }
+
+// Opts returns the StorageOpts for the given configuration.
+func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	opts := []chunk.StorageOpt{}
+	client, err := NewStorageClient(cfg, schemaCfg, false)
+	if err != nil {
+		return nil, errors.Wrap(err, "error creating storage client")
+	}
+
+	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
+	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
+		client, err = NewStorageClient(cfg, schemaCfg, true)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating storage client")
+		}
+
+		opts = append(opts, chunk.StorageOpt{From: schemaCfg.BigtableColumnKeyFrom.Time, Client: client})
+	}
+
+	return opts, nil
+}

From f67d956b558b8a361d060b214210f9b724e3660a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 20 Aug 2018 20:42:52 +0100
Subject: [PATCH 119/660] Reduce some boilerplate in test.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 composite_store_test.go | 266 ++++++++++++++++++++--------------------
 1 file changed, 135 insertions(+), 131 deletions(-)

diff --git a/composite_store_test.go b/composite_store_test.go
index a344835c8833d..6bbe7b6c38462 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -4,10 +4,12 @@ import (
 	"context"
 	"fmt"
 	"reflect"
+	"strconv"
 	"testing"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 )
@@ -171,157 +173,159 @@ func TestCompositeStore(t *testing.T) {
 	}
 }
 
-type dummyStoreAndClient struct {
-	client        StorageClient
-	schemaVersion int
-}
+type dummy struct {
+	version int
 
-func (dummyStoreAndClient) Put(ctx context.Context, chunks []Chunk) error { return nil }
-func (dummyStoreAndClient) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
-	return nil
-}
-func (dummyStoreAndClient) Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
-	return nil, nil
+	// Include nil-implementations of these interfaces so I don't have to stub out
+	// the methods.
+	StorageClient
+	Store
 }
-func (dummyStoreAndClient) Stop() {}
 
-func (dummyStoreAndClient) NewWriteBatch() WriteBatch                    { return nil }
-func (dummyStoreAndClient) BatchWrite(context.Context, WriteBatch) error { return nil }
-func (dummyStoreAndClient) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
-	return nil
-}
-func (dummyStoreAndClient) PutChunks(ctx context.Context, chunks []Chunk) error { return nil }
-func (dummyStoreAndClient) GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error) {
-	return nil, nil
+func dummySchema(from model.Time, version int) SchemaOpt {
+	return SchemaOpt{
+		From: from,
+		NewStore: func(s StorageClient) (Store, error) {
+			return dummy{
+				version:       version,
+				StorageClient: s,
+			}, nil
+		},
+	}
 }
 
 func TestNewStoreTimeConvergence(t *testing.T) {
-	oldClient, newClient := dummyStoreAndClient{schemaVersion: -1}, dummyStoreAndClient{schemaVersion: -2} // Just to make sure they are not equal.
+	oldClient := dummy{version: -1}
+	newClient := dummy{version: -2}
+	newerClient := dummy{version: -3}
 
 	type expectation struct {
-		time          int64
-		schemaVersion int
-		client        dummyStoreAndClient
+		time   model.Time
+		schema int
+		client StorageClient
 	}
 
-	testcases := []struct {
-		schemaTimes  []int64
-		storageTimes []int64
+	for i, testcase := range []struct {
+		schemaOpts  []SchemaOpt
+		storageOpts []StorageOpt
 
-		exp []expectation
+		expected []expectation
 	}{
 		{
-			schemaTimes:  []int64{0, 10, 20},
-			storageTimes: []int64{0},
-
-			exp: []expectation{
-				{
-					time:          0,
-					schemaVersion: 0,
-					client:        oldClient,
-				},
-				{
-					time:          10,
-					schemaVersion: 1,
-					client:        oldClient,
-				},
-				{
-					time:          20,
-					schemaVersion: 2,
-					client:        oldClient,
-				},
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
 			},
 		},
 		{
-			schemaTimes:  []int64{0, 10, 20, 40},
-			storageTimes: []int64{0, 30},
-
-			exp: []expectation{
-				{
-					time:          0,
-					schemaVersion: 0,
-					client:        oldClient,
-				},
-				{
-					time:          10,
-					schemaVersion: 1,
-					client:        oldClient,
-				},
-				{
-					time:          20,
-					schemaVersion: 2,
-					client:        oldClient,
-				},
-				{
-					time:          30,
-					schemaVersion: 2,
-					client:        newClient,
-				},
-				{
-					time:          40,
-					schemaVersion: 3,
-					client:        newClient,
-				},
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+				dummySchema(10, 2),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 2, oldClient},
 			},
 		},
 		{
-			schemaTimes:  []int64{0, 10, 20, 40},
-			storageTimes: []int64{0, 20},
-
-			exp: []expectation{
-				{
-					time:          0,
-					schemaVersion: 0,
-					client:        oldClient,
-				},
-				{
-					time:          10,
-					schemaVersion: 1,
-					client:        oldClient,
-				},
-				{
-					time:          20,
-					schemaVersion: 2,
-					client:        newClient,
-				},
-				{
-					time:          40,
-					schemaVersion: 3,
-					client:        newClient,
-				},
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+				{10, newClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 1, newClient},
 			},
 		},
+		{
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+				dummySchema(10, 2),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+				{10, newClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 2, newClient},
+			},
+		},
+		{
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+				dummySchema(20, 2),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+				{10, newClient},
+				{30, newerClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 1, newClient},
+				{20, 2, newClient},
+				{30, 2, newerClient},
+			},
+		},
+		{
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+				dummySchema(10, 2),
+				dummySchema(30, 3),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+				{20, newClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 2, oldClient},
+				{20, 2, newClient},
+				{30, 3, newClient},
+			},
+		},
+		{
+			schemaOpts: []SchemaOpt{
+				dummySchema(0, 1),
+				dummySchema(10, 2),
+				dummySchema(20, 3),
+				dummySchema(40, 4),
+			},
+			storageOpts: []StorageOpt{
+				{0, oldClient},
+				{20, newClient},
+			},
+			expected: []expectation{
+				{0, 1, oldClient},
+				{10, 2, oldClient},
+				{20, 3, newClient},
+				{40, 4, newClient},
+			},
+		},
+	} {
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+			store, err := NewStore(StoreConfig{}, SchemaConfig{}, testcase.schemaOpts, testcase.storageOpts)
+			require.NoError(t, err)
+			cs := store.(compositeStore)
+			require.Equal(t, len(testcase.expected), len(cs.stores))
+
+			for i, store := range cs.stores {
+				assert.Equal(t, testcase.expected[i].time, store.start, "%d", i)
+				assert.Equal(t, testcase.expected[i].schema, store.Store.(dummy).version, "%d", i)
+				assert.Equal(t, testcase.expected[i].client, store.Store.(dummy).StorageClient, "%d", i)
+			}
+		})
 	}
-
-	for _, testcase := range testcases {
-		storageOpts := []StorageOpt{{model.TimeFromUnix(testcase.storageTimes[0]), oldClient}}
-		if len(testcase.storageTimes) > 1 {
-			storageOpts = append(storageOpts, StorageOpt{model.TimeFromUnix(testcase.storageTimes[1]), newClient})
-		}
-
-		schemaOpts := make([]SchemaOpt, 0, len(testcase.schemaTimes))
-		for i, schemaTime := range testcase.schemaTimes {
-			store := dummyStoreAndClient{schemaVersion: i}
-
-			schemaOpts = append(schemaOpts, SchemaOpt{
-				model.TimeFromUnix(schemaTime),
-				func(storage StorageClient) (Store, error) {
-					store.client = storage
-					return store, nil
-				},
-			})
-		}
-
-		store, err := NewStore(StoreConfig{}, SchemaConfig{}, schemaOpts, storageOpts)
-		require.NoError(t, err)
-		cs := store.(compositeStore)
-		require.Equal(t, len(testcase.exp), len(cs.stores))
-
-		for i, store := range cs.stores {
-			require.Equal(t, model.TimeFromUnix(testcase.exp[i].time), store.start)
-			require.Equal(t, testcase.exp[i].schemaVersion, store.Store.(dummyStoreAndClient).schemaVersion)
-			require.Equal(t, testcase.exp[i].client, store.Store.(dummyStoreAndClient).client)
-		}
-	}
-
 }

From 92e5904e7b0c612d8994fe11cff4299843e34b65 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 20 Aug 2018 21:43:37 +0100
Subject: [PATCH 120/660] Slight simplification of the NewStore function.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 composite_store.go | 61 ++++++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 32 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index c68ad1bb20858..2a3bfbdf1181e 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -129,61 +129,58 @@ type StorageOpt struct {
 	Client StorageClient
 }
 
+func latest(a, b model.Time) model.Time {
+	if a.Before(b) {
+		return b
+	}
+	return a
+}
+
 // NewStore creates a new Store which delegates to different stores depending
 // on time.
 func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, schemaOpts []SchemaOpt, storageOpts []StorageOpt) (Store, error) {
 	stores := []compositeStoreEntry{}
-	from := schemaOpts[0].From
+	add := func(i, j int) error {
+		schemaOpt := schemaOpts[i]
+		storageOpt := storageOpts[j]
+		store, err := schemaOpt.NewStore(storageOpt.Client)
+		stores = append(stores, compositeStoreEntry{latest(schemaOpt.From, storageOpt.From), store})
+		return err
+	}
 
 	i, j := 0, 0
 	for i+1 < len(schemaOpts) && j+1 < len(storageOpts) {
-		currSchema := schemaOpts[i]
-		currStorage := storageOpts[j]
-
-		nextSchema := schemaOpts[i+1]
-		nextStorage := storageOpts[j+1]
-
-		store, err := currSchema.NewStore(currStorage.Client)
-		if err != nil {
+		if err := add(i, j); err != nil {
 			return nil, err
 		}
-		stores = append(stores, compositeStoreEntry{from, store})
 
-		if nextSchema.From.Before(nextStorage.From) {
-			from = nextSchema.From
+		// Increment the interval that finished first.
+		nextSchemaOpt := schemaOpts[i+1]
+		nextStorageOpt := storageOpts[j+1]
+		if nextSchemaOpt.From.Before(nextStorageOpt.From) {
 			i++
-		} else if nextSchema.From.After(nextStorage.From) {
-			from = nextStorage.From
+		} else if nextSchemaOpt.From.After(nextStorageOpt.From) {
 			j++
 		} else {
-			from = nextSchema.From
 			i++
 			j++
 		}
 	}
 
-	// Now cover the remaining schemas and storages.
-	for i < len(schemaOpts) && j < len(storageOpts) {
-		store, err := schemaOpts[i].NewStore(storageOpts[j].Client)
-		if err != nil {
+	for ; i+1 < len(schemaOpts); i++ {
+		if err := add(i, j); err != nil {
 			return nil, err
 		}
-		stores = append(stores, compositeStoreEntry{from, store})
+	}
 
-		// No more storageOpts.
-		if j+1 == len(storageOpts) {
-			i++
-			if i < len(schemaOpts) {
-				from = schemaOpts[i].From
-			}
-			continue
+	for ; j+1 < len(storageOpts); j++ {
+		if err := add(i, j); err != nil {
+			return nil, err
 		}
+	}
 
-		// No more schemaOpts. No comparison as we'll enter this after the exit of previous loop.
-		j++
-		if j < len(storageOpts) {
-			from = storageOpts[j].From
-		}
+	if err := add(i, j); err != nil {
+		return nil, err
 	}
 
 	return compositeStore{stores}, nil

From 13958a8216874b9818449e7a31b289efabd47ad3 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 12:51:15 +0530
Subject: [PATCH 121/660] review-feedback: Rename Opt to StorageClients

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/storage_client.go |  6 ++++--
 storage/factory.go    | 47 +++++++++++++++++++++----------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index b1971986fb2f5..8ad705c75359e 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -28,6 +28,8 @@ const (
 type Config struct {
 	project  string
 	instance string
+
+	ColumnKey bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -37,8 +39,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewStorageClient returns a new StorageClient.
-func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig, bigtableColumnKey bool) (chunk.StorageClient, error) {
-	if bigtableColumnKey {
+func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	if cfg.ColumnKey {
 		return NewStorageClientColumnKey(ctx, cfg, schemaCfg)
 	}
 	return NewStorageClientV1(ctx, cfg, schemaCfg)
diff --git a/storage/factory.go b/storage/factory.go
index c7cd968a79f30..a5c562be99630 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -32,8 +32,28 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 }
 
-// NewStorageClient makes a storage client based on the configuration.
-func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig, bigtableColumnKey bool) (chunk.StorageClient, error) {
+// Clients makes the storage clients based on the configuration.
+func Clients(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	opts := []chunk.StorageOpt{}
+	client, err := newStorageClient(cfg, schemaCfg)
+	if err != nil {
+		return nil, errors.Wrap(err, "error creating storage client")
+	}
+
+	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
+	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
+		client, err = gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating storage client")
+		}
+
+		opts = append(opts, chunk.StorageOpt{From: schemaCfg.BigtableColumnKeyFrom.Time, Client: client})
+	}
+
+	return opts, nil
+}
+
+func newStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	switch cfg.StorageClient {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
@@ -44,7 +64,7 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig, bigtableColumnKe
 		}
 		return aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
-		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg, bigtableColumnKey)
+		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:
@@ -71,24 +91,3 @@ func NewTableClient(cfg Config) (chunk.TableClient, error) {
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
 	}
 }
-
-// Opts returns the StorageOpts for the given configuration.
-func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	opts := []chunk.StorageOpt{}
-	client, err := NewStorageClient(cfg, schemaCfg, false)
-	if err != nil {
-		return nil, errors.Wrap(err, "error creating storage client")
-	}
-
-	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
-	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
-		client, err = NewStorageClient(cfg, schemaCfg, true)
-		if err != nil {
-			return nil, errors.Wrap(err, "error creating storage client")
-		}
-
-		opts = append(opts, chunk.StorageOpt{From: schemaCfg.BigtableColumnKeyFrom.Time, Client: client})
-	}
-
-	return opts, nil
-}

From 74b43e93cc49d0daffb607caf34eee589f06685a Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 19:30:52 +0530
Subject: [PATCH 122/660] Simplify filtering on column keys

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/storage_client.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 8ad705c75359e..bd20c75809329 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -165,6 +165,8 @@ func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.Wri
 }
 
 func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+	const null = string('\xff')
+
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -175,9 +177,9 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.Ind
 	}
 
 	if len(query.RangeValuePrefix) > 0 {
-		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnFilter(string(query.RangeValuePrefix)+".*"))) // TODO: Check again and anchor.
+		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValuePrefix), string(query.RangeValuePrefix)+null)))
 	} else if len(query.RangeValueStart) > 0 {
-		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValueStart), "")))
+		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValueStart), null)))
 	}
 	hashValue := hashKey(query.HashValue)
 
@@ -344,6 +346,8 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 }
 
 func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+	const null = string('\xff')
+
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -364,7 +368,7 @@ func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery
 	if len(query.RangeValuePrefix) > 0 {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
 	} else if len(query.RangeValueStart) > 0 {
-		rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+string('\xff'))
+		rowRange = bigtable.NewRange(query.HashValue+separator+string(query.RangeValueStart), query.HashValue+separator+null)
 	} else {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator)
 	}

From 3f4c65e3922019e63b8d7a4f947189332a725628 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 23 Aug 2018 15:17:59 +0530
Subject: [PATCH 123/660] Don't hash the row key

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/storage_client.go | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index bd20c75809329..8a29215d21295 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -5,7 +5,6 @@ import (
 	"context"
 	"flag"
 	"fmt"
-	"hash/fnv"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
@@ -75,8 +74,6 @@ func newStorageClientV1(cfg Config, client *bigtable.Client, schemaCfg chunk.Sch
 			schemaCfg: schemaCfg,
 			client:    client,
 			keysFn: func(hashValue string, rangeValue []byte) (string, string) {
-				// TODO the hashValue should actually be hashed - but I have data written in
-				// this format, so we need to do a proper migration.
 				rowKey := hashValue + separator + string(rangeValue)
 				return rowKey, column
 			},
@@ -100,8 +97,9 @@ func newStorageClientColumnKey(cfg Config, client *bigtable.Client, schemaCfg ch
 		schemaCfg: schemaCfg,
 		client:    client,
 		keysFn: func(hashValue string, rangeValue []byte) (string, string) {
-			// We are hashing the hash value to improve distribution of keys.
-			return hashKey(hashValue), string(rangeValue)
+			// We could hash the row key for better distribution but we decided against it
+			// because that would make migrations very, very hard.
+			return hashValue, string(rangeValue)
 		},
 	}
 }
@@ -181,9 +179,8 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.Ind
 	} else if len(query.RangeValueStart) > 0 {
 		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValueStart), null)))
 	}
-	hashValue := hashKey(query.HashValue)
 
-	r, err := table.ReadRow(ctx, hashValue, rOpts...)
+	r, err := table.ReadRow(ctx, query.HashValue, rOpts...)
 	if err != nil {
 		sp.LogFields(otlog.String("error", err.Error()))
 		return errors.WithStack(err)
@@ -413,10 +410,3 @@ func (b bigtableReadBatchV1) Value(index int) []byte {
 	}
 	return cf[0].Value
 }
-
-func hashKey(key string) string {
-	hasher := fnv.New64a()
-	hasher.Write([]byte(key))
-	hashedKey := string(hasher.Sum(nil))
-	return hashedKey + key // For maintaining uniqueness.
-}

From 61e7470827e408660ebe38f6f480415728833a34 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 13:53:43 +0530
Subject: [PATCH 124/660] Expose fifoCache

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 fifo_cache.go      | 15 +++++++++------
 fifo_cache_test.go | 18 +++++++++---------
 series_store.go    |  8 ++++----
 3 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/fifo_cache.go b/fifo_cache.go
index 3a0bb8526f51e..93d9cd9d485cb 100644
--- a/fifo_cache.go
+++ b/fifo_cache.go
@@ -5,9 +5,9 @@ import (
 	"time"
 )
 
-// fifoCache is a simple string -> interface{} cache which uses a fifo slide to
+// FifoCache is a simple string -> interface{} cache which uses a fifo slide to
 // manage evictions.  O(1) inserts and updates, O(1) gets.
-type fifoCache struct {
+type FifoCache struct {
 	lock    sync.RWMutex
 	size    int
 	entries []cacheEntry
@@ -24,15 +24,17 @@ type cacheEntry struct {
 	prev, next int
 }
 
-func newFifoCache(size int) *fifoCache {
-	return &fifoCache{
+// NewFifoCache returns a new initialised FifoCache of size.
+func NewFifoCache(size int) *FifoCache {
+	return &FifoCache{
 		size:    size,
 		entries: make([]cacheEntry, 0, size),
 		index:   make(map[string]int, size),
 	}
 }
 
-func (c *fifoCache) put(key string, value interface{}) {
+// Put stores the value against the key.
+func (c *FifoCache) Put(key string, value interface{}) {
 	if c.size == 0 {
 		return
 	}
@@ -95,7 +97,8 @@ func (c *fifoCache) put(key string, value interface{}) {
 	c.index[key] = index
 }
 
-func (c *fifoCache) get(key string) (value interface{}, updated time.Time, ok bool) {
+// Get returns the stored value against the key and when the key was last updated.
+func (c *FifoCache) Get(key string) (value interface{}, updated time.Time, ok bool) {
 	if c.size == 0 {
 		return
 	}
diff --git a/fifo_cache_test.go b/fifo_cache_test.go
index 1beae800a5da3..62625d7397a07 100644
--- a/fifo_cache_test.go
+++ b/fifo_cache_test.go
@@ -12,56 +12,56 @@ const size = 10
 const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
-	c := newFifoCache(size)
+	c := NewFifoCache(size)
 
 	// Check put / get works
 	for i := 0; i < size; i++ {
-		c.put(strconv.Itoa(i), i)
+		c.Put(strconv.Itoa(i), i)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size; i++ {
-		value, _, ok := c.get(strconv.Itoa(i))
+		value, _, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
 
 	// Check evictions
 	for i := size; i < size+overwrite; i++ {
-		c.put(strconv.Itoa(i), i)
+		c.Put(strconv.Itoa(i), i)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size-overwrite; i++ {
-		_, _, ok := c.get(strconv.Itoa(i))
+		_, _, ok := c.Get(strconv.Itoa(i))
 		require.False(t, ok)
 	}
 	for i := size; i < size+overwrite; i++ {
-		value, _, ok := c.get(strconv.Itoa(i))
+		value, _, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
 
 	// Check updates work
 	for i := size; i < size+overwrite; i++ {
-		c.put(strconv.Itoa(i), i*2)
+		c.Put(strconv.Itoa(i), i*2)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := size; i < size+overwrite; i++ {
-		value, _, ok := c.get(strconv.Itoa(i))
+		value, _, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i*2, value.(int))
 	}
 }
 
-func (c *fifoCache) print() {
+func (c *FifoCache) print() {
 	fmt.Println("first", c.first, "last", c.last)
 	for i, entry := range c.entries {
 		fmt.Printf("  %d -> key: %s, value: %v, next: %d, prev: %d\n", i, entry.key, entry.value, entry.next, entry.prev)
diff --git a/series_store.go b/series_store.go
index 3f007844330c6..a1280846c38e3 100644
--- a/series_store.go
+++ b/series_store.go
@@ -22,7 +22,7 @@ var (
 // seriesStore implements Store
 type seriesStore struct {
 	store
-	cardinalityCache *fifoCache
+	cardinalityCache *FifoCache
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
@@ -38,7 +38,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 			schema:       schema,
 			chunkFetcher: fetcher,
 		},
-		cardinalityCache: newFifoCache(cfg.CardinalityCacheSize),
+		cardinalityCache: NewFifoCache(cfg.CardinalityCacheSize),
 	}, nil
 }
 
@@ -182,7 +182,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	level.Debug(log).Log("queries", len(queries))
 
 	for _, query := range queries {
-		value, updated, ok := c.cardinalityCache.get(query.HashValue)
+		value, updated, ok := c.cardinalityCache.Get(query.HashValue)
 		if !ok {
 			continue
 		}
@@ -201,7 +201,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 
 	// TODO This is not correct, will overcount for queries > 24hrs
 	for _, query := range queries {
-		c.cardinalityCache.put(query.HashValue, len(entries))
+		c.cardinalityCache.Put(query.HashValue, len(entries))
 	}
 	if len(entries) > c.cfg.CardinalityLimit {
 		return nil, errCardinalityExceeded

From 03f04a5a39ab141735d1dcc7edf338bb7c6f409f Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 14:18:59 +0530
Subject: [PATCH 125/660] Implement caching of queries

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/caching_storage_client.go | 63 +++++++++++++++++++++++++++++++
 storage/factory.go                | 22 ++++++++---
 2 files changed, 79 insertions(+), 6 deletions(-)
 create mode 100644 storage/caching_storage_client.go

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
new file mode 100644
index 0000000000000..bb7f9d573117c
--- /dev/null
+++ b/storage/caching_storage_client.go
@@ -0,0 +1,63 @@
+package storage
+
+import (
+	"context"
+	"time"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+type cachingStorageClient struct {
+	chunk.StorageClient
+	cache    *chunk.FifoCache
+	validity time.Duration
+}
+
+func newCachingStorageClient(client chunk.StorageClient, size int, validity time.Duration) chunk.StorageClient {
+	if size == 0 {
+		return client
+	}
+
+	return &cachingStorageClient{
+		StorageClient: client,
+		cache:         chunk.NewFifoCache(size),
+		validity:      validity,
+	}
+}
+
+func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+	value, updated, ok := s.cache.Get(queryKey(query))
+	if ok && time.Now().Sub(updated) < s.validity {
+		batches := value.([]chunk.ReadBatch)
+		for _, batch := range batches {
+			callback(batch)
+		}
+
+		return nil
+	}
+
+	readBatches := []chunk.ReadBatch{}
+	err := s.StorageClient.QueryPages(ctx, query, copyingCallback(&readBatches, callback))
+	if err != nil {
+		return err
+	}
+
+	s.cache.Put(queryKey(query), readBatches)
+	return nil
+}
+
+func copyingCallback(readBatches *[]chunk.ReadBatch, cb func(chunk.ReadBatch) bool) func(chunk.ReadBatch) bool {
+	return func(result chunk.ReadBatch) bool {
+		*readBatches = append(*readBatches, result)
+		return cb(result)
+	}
+}
+
+func queryKey(q chunk.IndexQuery) string {
+	const sep = "\xff"
+	return q.TableName + sep +
+		q.HashValue + sep +
+		string(q.RangeValuePrefix) + sep +
+		string(q.RangeValueStart) + sep +
+		string(q.ValueEqual)
+}
diff --git a/storage/factory.go b/storage/factory.go
index b48b0d2f502a5..c40f28bccfa87 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -5,6 +5,7 @@ import (
 	"flag"
 	"fmt"
 	"strings"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -20,6 +21,9 @@ type Config struct {
 	AWSStorageConfig       aws.StorageConfig
 	GCPStorageConfig       gcp.Config
 	CassandraStorageConfig cassandra.Config
+
+	IndexCacheSize     int
+	IndexCacheValidity time.Duration
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -28,26 +32,32 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.AWSStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
+
+	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable.")
+	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 15*time.Minute, "Period for which entries in the index cache are valid.")
 }
 
 // NewStorageClient makes a storage client based on the configuration.
-func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (client chunk.StorageClient, err error) {
 	switch cfg.StorageClient {
 	case "inmemory":
-		return chunk.NewMockStorage(), nil
+		client, err = chunk.NewMockStorage(), nil
 	case "aws":
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
+		client, err = aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
-		return gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		client, err = gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
-		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+		client, err = cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
+		client, err = nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
 	}
+
+	client = newCachingStorageClient(client, cfg.IndexCacheSize, cfg.IndexCacheValidity)
+	return
 }
 
 // NewTableClient makes a new table client based on the configuration.

From f346050302179d034d83ac580224c2c9b744bd5b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 15:48:47 +0530
Subject: [PATCH 126/660] Instrument FifoCache and add expiry

* Instrument the FifoCache
* Move expiry checks into the cache itself

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 fifo_cache.go                     | 104 +++++++++++++++++++++++++-----
 fifo_cache_test.go                |  26 ++++++--
 series_store.go                   |   8 +--
 storage/caching_storage_client.go |  82 ++++++++++++++++++-----
 4 files changed, 179 insertions(+), 41 deletions(-)

diff --git a/fifo_cache.go b/fifo_cache.go
index 93d9cd9d485cb..70b68f804e0dd 100644
--- a/fifo_cache.go
+++ b/fifo_cache.go
@@ -3,18 +3,73 @@ package chunk
 import (
 	"sync"
 	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+)
+
+var (
+	cacheEntriesAdded = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "added_total",
+		Help:      "The total number of Put calls on the cache",
+	}, []string{"cache"})
+
+	cacheEntriesAddedNew = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "added_new_total",
+		Help:      "The total number of new entries added to the cache",
+	}, []string{"cache"})
+
+	cacheEntriesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "evicted_total",
+		Help:      "The total number of evicted entries",
+	}, []string{"cache"})
+
+	cacheTotalGets = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "gets_total",
+		Help:      "The total number of Get calls",
+	}, []string{"cache"})
+
+	cacheTotalMisses = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "misses_total",
+		Help:      "The total number of Get calls that had no valid entry",
+	}, []string{"cache"})
+
+	cacheStaleGets = promauto.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "stale_gets_total",
+		Help:      "The total number of Get calls that had an entry which expired",
+	}, []string{"cache"})
 )
 
 // FifoCache is a simple string -> interface{} cache which uses a fifo slide to
 // manage evictions.  O(1) inserts and updates, O(1) gets.
 type FifoCache struct {
-	lock    sync.RWMutex
-	size    int
-	entries []cacheEntry
-	index   map[string]int
+	lock     sync.RWMutex
+	size     int
+	validity time.Duration
+	entries  []cacheEntry
+	index    map[string]int
 
 	// indexes into entries to identify the most recent and least recent entry.
 	first, last int
+
+	entriesAdded    prometheus.Counter
+	entriesAddedNew prometheus.Counter
+	entriesEvicted  prometheus.Counter
+	totalGets       prometheus.Counter
+	totalMisses     prometheus.Counter
+	staleGets       prometheus.Counter
 }
 
 type cacheEntry struct {
@@ -25,16 +80,25 @@ type cacheEntry struct {
 }
 
 // NewFifoCache returns a new initialised FifoCache of size.
-func NewFifoCache(size int) *FifoCache {
+func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
 	return &FifoCache{
-		size:    size,
-		entries: make([]cacheEntry, 0, size),
-		index:   make(map[string]int, size),
+		size:     size,
+		validity: validity,
+		entries:  make([]cacheEntry, 0, size),
+		index:    make(map[string]int, size),
+
+		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
+		entriesAddedNew: cacheEntriesAddedNew.WithLabelValues(name),
+		entriesEvicted:  cacheEntriesEvicted.WithLabelValues(name),
+		totalGets:       cacheTotalGets.WithLabelValues(name),
+		totalMisses:     cacheTotalMisses.WithLabelValues(name),
+		staleGets:       cacheStaleGets.WithLabelValues(name),
 	}
 }
 
 // Put stores the value against the key.
 func (c *FifoCache) Put(key string, value interface{}) {
+	c.entriesAdded.Inc()
 	if c.size == 0 {
 		return
 	}
@@ -64,9 +128,11 @@ func (c *FifoCache) Put(key string, value interface{}) {
 		c.entries[index] = entry
 		return
 	}
+	c.entriesAddedNew.Inc()
 
 	// Otherwise, see if we need to evict an entry.
 	if len(c.entries) >= c.size {
+		c.entriesEvicted.Inc()
 		index = c.last
 		entry := c.entries[index]
 
@@ -98,19 +164,27 @@ func (c *FifoCache) Put(key string, value interface{}) {
 }
 
 // Get returns the stored value against the key and when the key was last updated.
-func (c *FifoCache) Get(key string) (value interface{}, updated time.Time, ok bool) {
+func (c *FifoCache) Get(key string) (interface{}, bool) {
+	c.totalGets.Inc()
 	if c.size == 0 {
-		return
+		return nil, false
 	}
 
 	c.lock.RLock()
 	defer c.lock.RUnlock()
 
-	var index int
-	index, ok = c.index[key]
+	index, ok := c.index[key]
 	if ok {
-		value = c.entries[index].value
-		updated = c.entries[index].updated
+		updated := c.entries[index].updated
+		if time.Now().Sub(updated) < c.validity {
+			return c.entries[index].value, true
+		}
+
+		c.totalMisses.Inc()
+		c.staleGets.Inc()
+		return nil, false
 	}
-	return
+
+	c.totalMisses.Inc()
+	return nil, false
 }
diff --git a/fifo_cache_test.go b/fifo_cache_test.go
index 62625d7397a07..de44754beb66a 100644
--- a/fifo_cache_test.go
+++ b/fifo_cache_test.go
@@ -4,6 +4,7 @@ import (
 	"fmt"
 	"strconv"
 	"testing"
+	"time"
 
 	"github.com/stretchr/testify/require"
 )
@@ -12,7 +13,7 @@ const size = 10
 const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
-	c := NewFifoCache(size)
+	c := NewFifoCache("test", size, 1*time.Minute)
 
 	// Check put / get works
 	for i := 0; i < size; i++ {
@@ -23,7 +24,7 @@ func TestFifoCache(t *testing.T) {
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size; i++ {
-		value, _, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
@@ -37,11 +38,11 @@ func TestFifoCache(t *testing.T) {
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size-overwrite; i++ {
-		_, _, ok := c.Get(strconv.Itoa(i))
+		_, ok := c.Get(strconv.Itoa(i))
 		require.False(t, ok)
 	}
 	for i := size; i < size+overwrite; i++ {
-		value, _, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
@@ -55,12 +56,27 @@ func TestFifoCache(t *testing.T) {
 	require.Len(t, c.entries, size)
 
 	for i := size; i < size+overwrite; i++ {
-		value, _, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i*2, value.(int))
 	}
 }
 
+func TestFifoCacheExpiry(t *testing.T) {
+	c := NewFifoCache("test", size, 5*time.Millisecond)
+
+	c.Put("0", 0)
+
+	value, ok := c.Get("0")
+	require.True(t, ok)
+	require.Equal(t, 0, value.(int))
+
+	// Expire the entry.
+	time.Sleep(5 * time.Millisecond)
+	_, ok = c.Get(strconv.Itoa(0))
+	require.False(t, ok)
+}
+
 func (c *FifoCache) print() {
 	fmt.Println("first", c.first, "last", c.last)
 	for i, entry := range c.entries {
diff --git a/series_store.go b/series_store.go
index a1280846c38e3..54e91a9e055a4 100644
--- a/series_store.go
+++ b/series_store.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"time"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/common/model"
@@ -38,7 +37,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 			schema:       schema,
 			chunkFetcher: fetcher,
 		},
-		cardinalityCache: NewFifoCache(cfg.CardinalityCacheSize),
+		cardinalityCache: NewFifoCache("cardinality", cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity),
 	}, nil
 }
 
@@ -182,13 +181,12 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	level.Debug(log).Log("queries", len(queries))
 
 	for _, query := range queries {
-		value, updated, ok := c.cardinalityCache.Get(query.HashValue)
+		value, ok := c.cardinalityCache.Get(query.HashValue)
 		if !ok {
 			continue
 		}
-		entryAge := time.Now().Sub(updated)
 		cardinality := value.(int)
-		if entryAge < c.cfg.CardinalityCacheValidity && cardinality > c.cfg.CardinalityLimit {
+		if cardinality > c.cfg.CardinalityLimit {
 			return nil, errCardinalityExceeded
 		}
 	}
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index bb7f9d573117c..07dcceae950a5 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -1,7 +1,9 @@
 package storage
 
 import (
+	"bytes"
 	"context"
+	"strings"
 	"time"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -20,44 +22,92 @@ func newCachingStorageClient(client chunk.StorageClient, size int, validity time
 
 	return &cachingStorageClient{
 		StorageClient: client,
-		cache:         chunk.NewFifoCache(size),
+		cache:         chunk.NewFifoCache("index", size, validity),
 		validity:      validity,
 	}
 }
 
 func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	value, updated, ok := s.cache.Get(queryKey(query))
-	if ok && time.Now().Sub(updated) < s.validity {
+	value, ok := s.cache.Get(queryKey(query))
+	if ok {
 		batches := value.([]chunk.ReadBatch)
-		for _, batch := range batches {
-			callback(batch)
-		}
+		filteredBatch := filterBatchByQuery(query, batches)
+		callback(filteredBatch)
 
 		return nil
 	}
 
-	readBatches := []chunk.ReadBatch{}
-	err := s.StorageClient.QueryPages(ctx, query, copyingCallback(&readBatches, callback))
+	batches := []chunk.ReadBatch{}
+	cacheableQuery := chunk.IndexQuery{
+		TableName: query.TableName,
+		HashValue: query.HashValue,
+	} // Just reads the entire row and caches it.
+
+	err := s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
 	if err != nil {
 		return err
 	}
 
-	s.cache.Put(queryKey(query), readBatches)
+	filteredBatch := filterBatchByQuery(query, batches)
+	callback(filteredBatch)
+
+	s.cache.Put(queryKey(query), batches)
+
 	return nil
 }
 
-func copyingCallback(readBatches *[]chunk.ReadBatch, cb func(chunk.ReadBatch) bool) func(chunk.ReadBatch) bool {
+type readBatch []cell
+
+func (b readBatch) Len() int                { return len(b) }
+func (b readBatch) RangeValue(i int) []byte { return b[i].column }
+func (b readBatch) Value(i int) []byte      { return b[i].value }
+
+type cell struct {
+	column []byte
+	value  []byte
+}
+
+func copyingCallback(readBatches *[]chunk.ReadBatch) func(chunk.ReadBatch) bool {
 	return func(result chunk.ReadBatch) bool {
 		*readBatches = append(*readBatches, result)
-		return cb(result)
+		return true
 	}
 }
 
 func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
-	return q.TableName + sep +
-		q.HashValue + sep +
-		string(q.RangeValuePrefix) + sep +
-		string(q.RangeValueStart) + sep +
-		string(q.ValueEqual)
+	return q.TableName + sep + q.HashValue
+}
+
+func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readBatch {
+	var filter func([]byte, []byte) bool
+
+	if len(query.RangeValuePrefix) != 0 {
+		filter = func(rangeValue []byte, value []byte) bool {
+			return strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix))
+		}
+	}
+	if len(query.RangeValueStart) != 0 {
+		filter = func(rangeValue []byte, value []byte) bool {
+			return string(rangeValue) >= string(query.RangeValueStart)
+		}
+	}
+	if len(query.ValueEqual) != 0 {
+		// This is on top of the existing filters.
+		existingFilter := filter
+		filter = func(rangeValue []byte, value []byte) bool {
+			return existingFilter(rangeValue, value) && bytes.Equal(value, query.ValueEqual)
+		}
+	}
+
+	finalBatch := make(readBatch, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
+	for _, batch := range batches {
+		for i := 0; i < batch.Len(); i++ {
+			if filter(batch.RangeValue(i), batch.Value(i)) {
+				finalBatch = append(finalBatch, cell{column: batch.RangeValue(i), value: batch.Value(i)})
+			}
+		}
+	}
+
+	return finalBatch
 }

From 80d9d3c3a275004b12f8761f82ec37a0e8cd575b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 21 Aug 2018 23:01:22 +0530
Subject: [PATCH 127/660] Add tests for caching store

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/caching_fixtures.go       | 27 ++++++++++++++++
 storage/caching_storage_client.go |  2 +-
 storage/index_test.go             | 53 ++++++++++++++++++-------------
 storage/utils_test.go             |  1 +
 4 files changed, 60 insertions(+), 23 deletions(-)
 create mode 100644 storage/caching_fixtures.go

diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
new file mode 100644
index 0000000000000..35b6bb0c4690b
--- /dev/null
+++ b/storage/caching_fixtures.go
@@ -0,0 +1,27 @@
+package storage
+
+import (
+	"time"
+
+	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/testutils"
+)
+
+type fixture struct {
+	fixture testutils.Fixture
+}
+
+func (f fixture) Name() string { return "caching-store" }
+func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	storageClient, tableClient, schemaConfig, err := f.fixture.Clients()
+	client := newCachingStorageClient(storageClient, 500, 5*time.Minute)
+	return client, tableClient, schemaConfig, err
+}
+func (f fixture) Teardown() error { return f.fixture.Teardown() }
+
+// Fixtures for unit testing the caching storage.
+var Fixtures = []testutils.Fixture{
+	fixture{gcp.Fixtures[0]},
+}
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 07dcceae950a5..5b1f71a8a0014 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -80,7 +80,7 @@ func queryKey(q chunk.IndexQuery) string {
 }
 
 func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readBatch {
-	var filter func([]byte, []byte) bool
+	filter := func([]byte, []byte) bool { return true }
 
 	if len(query.RangeValuePrefix) != 0 {
 		filter = func(rangeValue []byte, value []byte) bool {
diff --git a/storage/index_test.go b/storage/index_test.go
index b6cdc0f172cf1..91d8532bb96b4 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -104,10 +104,10 @@ func TestQueryPages(t *testing.T) {
 		require.NoError(t, err)
 
 		tests := []struct {
-			name           string
-			query          chunk.IndexQuery
-			provisionedErr int
-			want           []chunk.IndexEntry
+			name   string
+			query  chunk.IndexQuery
+			repeat bool
+			want   []chunk.IndexEntry
 		}{
 			{
 				"check HashValue only",
@@ -115,7 +115,7 @@ func TestQueryPages(t *testing.T) {
 					TableName: tableName,
 					HashValue: "flip",
 				},
-				0,
+				false,
 				[]chunk.IndexEntry{entries[5], entries[6], entries[7]},
 			},
 			{
@@ -125,7 +125,7 @@ func TestQueryPages(t *testing.T) {
 					HashValue:       "foo",
 					RangeValueStart: []byte("bar:2"),
 				},
-				0,
+				false,
 				[]chunk.IndexEntry{entries[1], entries[2], entries[3], entries[4]},
 			},
 			{
@@ -135,7 +135,7 @@ func TestQueryPages(t *testing.T) {
 					HashValue:        "foo",
 					RangeValuePrefix: []byte("baz:"),
 				},
-				0,
+				false,
 				[]chunk.IndexEntry{entries[3], entries[4]},
 			},
 			{
@@ -146,7 +146,7 @@ func TestQueryPages(t *testing.T) {
 					RangeValuePrefix: []byte("bar"),
 					ValueEqual:       []byte("20"),
 				},
-				0,
+				false,
 				[]chunk.IndexEntry{entries[1]},
 			},
 			{
@@ -157,27 +157,36 @@ func TestQueryPages(t *testing.T) {
 					RangeValuePrefix: []byte("bar"),
 					ValueEqual:       []byte("20"),
 				},
-				2,
+				true,
 				[]chunk.IndexEntry{entries[1]},
 			},
 		}
 
 		for _, tt := range tests {
 			t.Run(tt.name, func(t *testing.T) {
-				var have []chunk.IndexEntry
-				err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
-					for i := 0; i < read.Len(); i++ {
-						have = append(have, chunk.IndexEntry{
-							TableName:  tt.query.TableName,
-							HashValue:  tt.query.HashValue,
-							RangeValue: read.RangeValue(i),
-							Value:      read.Value(i),
-						})
+				run := true
+				for run {
+					var have []chunk.IndexEntry
+					err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
+						for i := 0; i < read.Len(); i++ {
+							have = append(have, chunk.IndexEntry{
+								TableName:  tt.query.TableName,
+								HashValue:  tt.query.HashValue,
+								RangeValue: read.RangeValue(i),
+								Value:      read.Value(i),
+							})
+						}
+						return true
+					})
+					require.NoError(t, err)
+					require.Equal(t, tt.want, have)
+
+					if tt.repeat {
+						tt.repeat = false
+					} else {
+						run = false
 					}
-					return true
-				})
-				require.NoError(t, err)
-				require.Equal(t, tt.want, have)
+				}
 			})
 		}
 	})
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 0dda70ab42bce..c1500f655c975 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -20,6 +20,7 @@ type storageClientTest func(*testing.T, chunk.StorageClient)
 
 func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 	fixtures := append(aws.Fixtures, gcp.Fixtures...)
+	fixtures = append(fixtures, Fixtures...)
 
 	cassandraFixtures, err := cassandra.Fixtures()
 	require.NoError(t, err)

From 336323a7eb5be6df70037512b4fb6c7a62a68bae Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 22 Aug 2018 20:16:14 +0530
Subject: [PATCH 128/660] logging: kitlog drops keys with space. fix some debug
 logs

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 series_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/series_store.go b/series_store.go
index 3f007844330c6..6a581fec401e9 100644
--- a/series_store.go
+++ b/series_store.go
@@ -70,14 +70,14 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("Series IDs", len(seriesIDs))
+	level.Debug(log).Log("series-ids", len(seriesIDs))
 
 	// Lookup the series in the index to get the chunks.
 	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("Chunk IDs", len(chunkIDs))
+	level.Debug(log).Log("chunk-ids", len(chunkIDs))
 
 	// Filter out chunks that are not in the selected time range.
 	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
@@ -85,7 +85,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 		return nil, err
 	}
 	filtered, keys := filterChunksByTime(from, through, chunks)
-	level.Debug(log).Log("Chunks post filtering", len(chunks))
+	level.Debug(log).Log("chunks-post-filtering", len(chunks))
 
 	// Protect ourselves against OOMing.
 	if len(chunkIDs) > c.cfg.QueryChunkLimit {

From 4f8b284bf22542fb6521f12ae4e5986c67758193 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 23 Aug 2018 17:20:14 +0530
Subject: [PATCH 129/660] Add tracing for the fifo cache

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 fifo_cache.go                     | 18 ++++++++++++++++--
 fifo_cache_test.go                | 23 +++++++++++++----------
 series_store.go                   |  4 ++--
 storage/caching_storage_client.go |  4 ++--
 4 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/fifo_cache.go b/fifo_cache.go
index 70b68f804e0dd..9bfe9d7161c66 100644
--- a/fifo_cache.go
+++ b/fifo_cache.go
@@ -1,9 +1,12 @@
 package chunk
 
 import (
+	"context"
 	"sync"
 	"time"
 
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 )
@@ -64,6 +67,7 @@ type FifoCache struct {
 	// indexes into entries to identify the most recent and least recent entry.
 	first, last int
 
+	name            string
 	entriesAdded    prometheus.Counter
 	entriesAddedNew prometheus.Counter
 	entriesEvicted  prometheus.Counter
@@ -87,6 +91,7 @@ func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
 		entries:  make([]cacheEntry, 0, size),
 		index:    make(map[string]int, size),
 
+		name:            name,
 		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
 		entriesAddedNew: cacheEntriesAddedNew.WithLabelValues(name),
 		entriesEvicted:  cacheEntriesEvicted.WithLabelValues(name),
@@ -97,7 +102,10 @@ func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
 }
 
 // Put stores the value against the key.
-func (c *FifoCache) Put(key string, value interface{}) {
+func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
+	span, ctx := ot.StartSpanFromContext(ctx, c.name+"-cache-put")
+	defer span.Finish()
+
 	c.entriesAdded.Inc()
 	if c.size == 0 {
 		return
@@ -164,7 +172,10 @@ func (c *FifoCache) Put(key string, value interface{}) {
 }
 
 // Get returns the stored value against the key and when the key was last updated.
-func (c *FifoCache) Get(key string) (interface{}, bool) {
+func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
+	span, ctx := ot.StartSpanFromContext(ctx, c.name+"-cache-get")
+	defer span.Finish()
+
 	c.totalGets.Inc()
 	if c.size == 0 {
 		return nil, false
@@ -177,14 +188,17 @@ func (c *FifoCache) Get(key string) (interface{}, bool) {
 	if ok {
 		updated := c.entries[index].updated
 		if time.Now().Sub(updated) < c.validity {
+			span.LogFields(otlog.Bool("hit", true))
 			return c.entries[index].value, true
 		}
 
 		c.totalMisses.Inc()
 		c.staleGets.Inc()
+		span.LogFields(otlog.Bool("hit", false), otlog.Bool("stale", true))
 		return nil, false
 	}
 
+	span.LogFields(otlog.Bool("hit", false), otlog.Bool("stale", false))
 	c.totalMisses.Inc()
 	return nil, false
 }
diff --git a/fifo_cache_test.go b/fifo_cache_test.go
index de44754beb66a..b058652bf598a 100644
--- a/fifo_cache_test.go
+++ b/fifo_cache_test.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"fmt"
 	"strconv"
 	"testing"
@@ -14,49 +15,50 @@ const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
 	c := NewFifoCache("test", size, 1*time.Minute)
+	ctx := context.Background()
 
 	// Check put / get works
 	for i := 0; i < size; i++ {
-		c.Put(strconv.Itoa(i), i)
+		c.Put(ctx, strconv.Itoa(i), i)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size; i++ {
-		value, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(ctx, strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
 
 	// Check evictions
 	for i := size; i < size+overwrite; i++ {
-		c.Put(strconv.Itoa(i), i)
+		c.Put(ctx, strconv.Itoa(i), i)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := 0; i < size-overwrite; i++ {
-		_, ok := c.Get(strconv.Itoa(i))
+		_, ok := c.Get(ctx, strconv.Itoa(i))
 		require.False(t, ok)
 	}
 	for i := size; i < size+overwrite; i++ {
-		value, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(ctx, strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i, value.(int))
 	}
 
 	// Check updates work
 	for i := size; i < size+overwrite; i++ {
-		c.Put(strconv.Itoa(i), i*2)
+		c.Put(ctx, strconv.Itoa(i), i*2)
 		//c.print()
 	}
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
 	for i := size; i < size+overwrite; i++ {
-		value, ok := c.Get(strconv.Itoa(i))
+		value, ok := c.Get(ctx, strconv.Itoa(i))
 		require.True(t, ok)
 		require.Equal(t, i*2, value.(int))
 	}
@@ -64,16 +66,17 @@ func TestFifoCache(t *testing.T) {
 
 func TestFifoCacheExpiry(t *testing.T) {
 	c := NewFifoCache("test", size, 5*time.Millisecond)
+	ctx := context.Background()
 
-	c.Put("0", 0)
+	c.Put(ctx, "0", 0)
 
-	value, ok := c.Get("0")
+	value, ok := c.Get(ctx, "0")
 	require.True(t, ok)
 	require.Equal(t, 0, value.(int))
 
 	// Expire the entry.
 	time.Sleep(5 * time.Millisecond)
-	_, ok = c.Get(strconv.Itoa(0))
+	_, ok = c.Get(ctx, strconv.Itoa(0))
 	require.False(t, ok)
 }
 
diff --git a/series_store.go b/series_store.go
index 54e91a9e055a4..a53cd4fb0bbd0 100644
--- a/series_store.go
+++ b/series_store.go
@@ -181,7 +181,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	level.Debug(log).Log("queries", len(queries))
 
 	for _, query := range queries {
-		value, ok := c.cardinalityCache.Get(query.HashValue)
+		value, ok := c.cardinalityCache.Get(ctx, query.HashValue)
 		if !ok {
 			continue
 		}
@@ -199,7 +199,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 
 	// TODO This is not correct, will overcount for queries > 24hrs
 	for _, query := range queries {
-		c.cardinalityCache.Put(query.HashValue, len(entries))
+		c.cardinalityCache.Put(ctx, query.HashValue, len(entries))
 	}
 	if len(entries) > c.cfg.CardinalityLimit {
 		return nil, errCardinalityExceeded
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 5b1f71a8a0014..6f89028d6fa0d 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -28,7 +28,7 @@ func newCachingStorageClient(client chunk.StorageClient, size int, validity time
 }
 
 func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	value, ok := s.cache.Get(queryKey(query))
+	value, ok := s.cache.Get(ctx, queryKey(query))
 	if ok {
 		batches := value.([]chunk.ReadBatch)
 		filteredBatch := filterBatchByQuery(query, batches)
@@ -51,7 +51,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.Index
 	filteredBatch := filterBatchByQuery(query, batches)
 	callback(filteredBatch)
 
-	s.cache.Put(queryKey(query), batches)
+	s.cache.Put(ctx, queryKey(query), batches)
 
 	return nil
 }

From 0c71b68542dce29b66d8d9db49a3a83f00cdef63 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 22 Aug 2018 11:02:16 +0100
Subject: [PATCH 130/660] Add some per-query histograms to the chunk store.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 series_store.go | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/series_store.go b/series_store.go
index 6a581fec401e9..77cf99ba9163c 100644
--- a/series_store.go
+++ b/series_store.go
@@ -7,6 +7,8 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
@@ -17,6 +19,34 @@ import (
 
 var (
 	errCardinalityExceeded = errors.New("cardinality limit exceeded")
+
+	indexLookupsPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_index_lookups_per_query",
+		Help:      "Distribution of #index lookups per query.",
+		Buckets:   prometheus.DefBuckets,
+	})
+	preIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_series_pre_intersection_per_query",
+		Help:      "Distribution of #series (pre intersection) per query.",
+		// A reasonable upper bound is around 100k - 10*(8^8) = 167k.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 8),
+	})
+	postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_series_post_intersection_per_query",
+		Help:      "Distribution of #series (post intersection) per query.",
+		// A reasonable upper bound is around 100k - 10*(8^8) = 167k.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 8),
+	})
+	chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_chunks_per_query",
+		Help:      "Distribution of #chunks per query.",
+		// For v. high cardinality could go upto 1m chunks per query - 10*(8^9) = 1.3m.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 9),
+	})
 )
 
 // seriesStore implements Store
@@ -86,6 +116,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("chunks-post-filtering", len(chunks))
+	chunksPerQuery.Observe(float64(len(filtered)))
 
 	// Protect ourselves against OOMing.
 	if len(chunkIDs) > c.cfg.QueryChunkLimit {
@@ -112,12 +143,19 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
-		return c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, nil)
+		indexLookupsPerQuery.Observe(float64(1))
+		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, nil)
+		if err != nil {
+			preIntersectionPerQuery.Observe(float64(len(series)))
+			postIntersectionPerQuery.Observe(float64(len(series)))
+		}
+		return series, err
 	}
 
 	// Otherwise get series which include other matchers
 	incomingIDs := make(chan []string)
 	incomingErrors := make(chan error)
+	indexLookupsPerQuery.Observe(float64(len(matchers)))
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
 			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, matcher)
@@ -129,13 +167,15 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 		}(matcher)
 	}
 
-	// Receive chunkSets from all matchers
+	// Receive series IDs from all matchers, intersect as we go.
 	var ids []string
+	var preIntersectionCount int
 	var lastErr error
 	var cardinalityExceededErrors int
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingIDs:
+			preIntersectionCount += len(incoming)
 			if ids == nil {
 				ids = incoming
 			} else {
@@ -154,6 +194,8 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	} else if lastErr != nil {
 		return nil, lastErr
 	}
+	preIntersectionPerQuery.Observe(float64(preIntersectionCount))
+	postIntersectionPerQuery.Observe(float64(len(ids)))
 
 	level.Debug(log).Log("msg", "post intersection", "ids", len(ids))
 	return ids, nil

From c0d9b3af659fb1fca479c9842c2c639f7243b281 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 22 Aug 2018 12:33:03 +0100
Subject: [PATCH 131/660] Review feedback

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 series_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/series_store.go b/series_store.go
index 77cf99ba9163c..cfe7ca9943567 100644
--- a/series_store.go
+++ b/series_store.go
@@ -143,7 +143,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
-		indexLookupsPerQuery.Observe(float64(1))
+		indexLookupsPerQuery.Observe(1)
 		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, nil)
 		if err != nil {
 			preIntersectionPerQuery.Observe(float64(len(series)))

From 942427be95e009020d009a0bf6fa2e2c01c0c81f Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 24 Aug 2018 10:40:17 +0100
Subject: [PATCH 132/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_utils.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 8f91ebe660933..4edfa15c1af52 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -201,7 +201,7 @@ func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]b
 			missing = append(missing, chunks[i])
 			i++
 		} else if chunkKey > keys[j] {
-			level.Error(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
+			level.Warn(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
 			j++
 		} else {
 			requests = append(requests, decodeRequest{

From 7f2ee3f422ae93d79eaaf22718b63744d596f9f4 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 23 Aug 2018 17:25:20 +0530
Subject: [PATCH 133/660] nit: Add that cache validity should be less than
 chunk idle time

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/factory.go b/storage/factory.go
index c40f28bccfa87..e4c60f68c45e8 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -34,7 +34,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable.")
-	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 15*time.Minute, "Period for which entries in the index cache are valid.")
+	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
 }
 
 // NewStorageClient makes a storage client based on the configuration.

From 6483e4ee93f77739fc5347bcca5241857a21c393 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 24 Aug 2018 11:34:18 +0530
Subject: [PATCH 134/660] When the columnfamily is missing it means no data

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/storage_client.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 8a29215d21295..18b779c2e9f19 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -188,7 +188,8 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.Ind
 
 	val, ok := r[columnFamily]
 	if !ok {
-		panic("bad response from bigtable, columnFamily missing")
+		// There are no matching rows.
+		return nil
 	}
 
 	if query.ValueEqual != nil {

From 733e2f503405ea2c62f40110aef86711641f7766 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 24 Aug 2018 21:56:34 +0530
Subject: [PATCH 135/660] review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/factory.go b/storage/factory.go
index e4c60f68c45e8..eddd7f2055d0f 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -53,7 +53,7 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (client chunk.St
 	case "cassandra":
 		client, err = cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:
-		client, err = nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
 	}
 
 	client = newCachingStorageClient(client, cfg.IndexCacheSize, cfg.IndexCacheValidity)

From 5bf743dcb0b2997d8b0410978059d5cb75a29d21 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 28 Aug 2018 12:35:01 +0530
Subject: [PATCH 136/660] Make BTCloumnKey flag noop on other clients

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/factory.go b/storage/factory.go
index a5c562be99630..cf2ed24c4f758 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -41,7 +41,7 @@ func Clients(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, erro
 	}
 
 	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
-	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
+	if cfg.StorageClient == "gcp" && schemaCfg.BigtableColumnKeyFrom.IsSet() {
 		client, err = gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating storage client")

From 4f452a334057357915e91594c34655e1fd528650 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 28 Aug 2018 14:42:42 +0530
Subject: [PATCH 137/660] Move fifo_cache to pkg/chunk/cache

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 fifo_cache.go => cache/fifo_cache.go           | 2 +-
 fifo_cache_test.go => cache/fifo_cache_test.go | 2 +-
 series_store.go                                | 5 +++--
 storage/caching_storage_client.go              | 6 +++---
 4 files changed, 8 insertions(+), 7 deletions(-)
 rename fifo_cache.go => cache/fifo_cache.go (99%)
 rename fifo_cache_test.go => cache/fifo_cache_test.go (99%)

diff --git a/fifo_cache.go b/cache/fifo_cache.go
similarity index 99%
rename from fifo_cache.go
rename to cache/fifo_cache.go
index 9bfe9d7161c66..1eb5099ece8d6 100644
--- a/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -1,4 +1,4 @@
-package chunk
+package cache
 
 import (
 	"context"
diff --git a/fifo_cache_test.go b/cache/fifo_cache_test.go
similarity index 99%
rename from fifo_cache_test.go
rename to cache/fifo_cache_test.go
index b058652bf598a..038103deebfa0 100644
--- a/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -1,4 +1,4 @@
-package chunk
+package cache
 
 import (
 	"context"
diff --git a/series_store.go b/series_store.go
index a53cd4fb0bbd0..77e7fe14504e8 100644
--- a/series_store.go
+++ b/series_store.go
@@ -10,6 +10,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/util"
 	"github.com/weaveworks/cortex/pkg/util/extract"
 )
@@ -21,7 +22,7 @@ var (
 // seriesStore implements Store
 type seriesStore struct {
 	store
-	cardinalityCache *FifoCache
+	cardinalityCache *cache.FifoCache
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
@@ -37,7 +38,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 			schema:       schema,
 			chunkFetcher: fetcher,
 		},
-		cardinalityCache: NewFifoCache("cardinality", cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity),
+		cardinalityCache: cache.NewFifoCache("cardinality", cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity),
 	}, nil
 }
 
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 6f89028d6fa0d..5b58078b2af41 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -7,11 +7,12 @@ import (
 	"time"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
 type cachingStorageClient struct {
 	chunk.StorageClient
-	cache    *chunk.FifoCache
+	cache    *cache.FifoCache
 	validity time.Duration
 }
 
@@ -22,8 +23,7 @@ func newCachingStorageClient(client chunk.StorageClient, size int, validity time
 
 	return &cachingStorageClient{
 		StorageClient: client,
-		cache:         chunk.NewFifoCache("index", size, validity),
-		validity:      validity,
+		cache:         cache.NewFifoCache("index", size, validity),
 	}
 }
 

From 339aed26949f1e01d1c1de60683558060b4bd10c Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 29 Aug 2018 15:15:11 +0530
Subject: [PATCH 138/660] Review comments

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 composite_store.go      | 8 +++++++-
 composite_store_test.go | 2 +-
 storage/factory.go      | 9 ++++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index 2a3bfbdf1181e..ebbd2f38c0e25 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -138,7 +138,13 @@ func latest(a, b model.Time) model.Time {
 
 // NewStore creates a new Store which delegates to different stores depending
 // on time.
-func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, schemaOpts []SchemaOpt, storageOpts []StorageOpt) (Store, error) {
+func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt) (Store, error) {
+	schemaOpts := SchemaOpts(cfg, schemaCfg)
+
+	return newCompositeStore(cfg, schemaCfg, schemaOpts, storageOpts)
+}
+
+func newCompositeStore(cfg StoreConfig, schemaCfg SchemaConfig, schemaOpts []SchemaOpt, storageOpts []StorageOpt) (Store, error) {
 	stores := []compositeStoreEntry{}
 	add := func(i, j int) error {
 		schemaOpt := schemaOpts[i]
diff --git a/composite_store_test.go b/composite_store_test.go
index 6bbe7b6c38462..947a5003627b5 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -316,7 +316,7 @@ func TestNewStoreTimeConvergence(t *testing.T) {
 		},
 	} {
 		t.Run(strconv.Itoa(i), func(t *testing.T) {
-			store, err := NewStore(StoreConfig{}, SchemaConfig{}, testcase.schemaOpts, testcase.storageOpts)
+			store, err := newCompositeStore(StoreConfig{}, SchemaConfig{}, testcase.schemaOpts, testcase.storageOpts)
 			require.NoError(t, err)
 			cs := store.(compositeStore)
 			require.Equal(t, len(testcase.expected), len(cs.stores))
diff --git a/storage/factory.go b/storage/factory.go
index 19951c6f2ccd9..f1771fe0fc8d1 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -39,8 +39,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
 }
 
-// Clients makes the storage clients based on the configuration.
-func Clients(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+// Opts makes the storage clients based on the configuration.
+func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
 	opts := []chunk.StorageOpt{}
 	client, err := newStorageClient(cfg, schemaCfg)
 	if err != nil {
@@ -54,7 +54,10 @@ func Clients(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, erro
 			return nil, errors.Wrap(err, "error creating storage client")
 		}
 
-		opts = append(opts, chunk.StorageOpt{From: schemaCfg.BigtableColumnKeyFrom.Time, Client: client})
+		opts = append(opts, chunk.StorageOpt{
+			From:   schemaCfg.BigtableColumnKeyFrom.Time,
+			Client: newCachingStorageClient(client, cfg.IndexCacheSize, cfg.IndexCacheValidity),
+		})
 	}
 
 	return opts, nil

From e76ea36608bd40dd5a2dbefaf5f2faf4bafd15d9 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 29 Aug 2018 16:01:13 +0100
Subject: [PATCH 139/660] Batch index lookups.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go             |   7 +-
 cassandra/storage_client.go       |   7 +-
 chunk_store.go                    |  40 +-----------
 gcp/storage_client.go             |  90 +++++++++++++++++++++++++-
 inmemory_storage_client.go        |  21 ++++--
 storage/caching_storage_client.go | 104 +++++++++---------------------
 storage/index_test.go             |  12 ++--
 storage_client.go                 |   2 +-
 util/util.go                      |  75 +++++++++++++++++++++
 9 files changed, 235 insertions(+), 123 deletions(-)
 create mode 100644 util/util.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 3bf4b168694f2..739dbac066ae5 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -29,6 +29,7 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -288,7 +289,11 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 54d40be2f397f..7df057e61e7d6 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -11,6 +11,7 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 const (
@@ -172,7 +173,11 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	var q *gocql.Query
 
 	switch {
diff --git a/chunk_store.go b/chunk_store.go
index 3edfeb9cdc098..d86366d1175f4 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -412,38 +412,8 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
-	incomingEntries := make(chan []IndexEntry)
-	incomingErrors := make(chan error)
-	for _, query := range queries {
-		go func(query IndexQuery) {
-			entries, err := c.lookupEntriesByQuery(ctx, query)
-			if err != nil {
-				incomingErrors <- err
-			} else {
-				incomingEntries <- entries
-			}
-		}(query)
-	}
-
-	// Combine the results into one slice
-	var entries []IndexEntry
-	var lastErr error
-	for i := 0; i < len(queries); i++ {
-		select {
-		case incoming := <-incomingEntries:
-			entries = append(entries, incoming...)
-		case err := <-incomingErrors:
-			lastErr = err
-		}
-	}
-
-	return entries, lastErr
-}
-
-func (c *store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
-
-	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch) (shouldContinue bool) {
+	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
 		for i := 0; i < resp.Len(); i++ {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
@@ -453,12 +423,8 @@ func (c *store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]I
 			})
 		}
 		return true
-	}); err != nil {
-		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
-		return nil, err
-	}
-
-	return entries, nil
+	})
+	return entries, err
 }
 
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 18b779c2e9f19..888ca2bbe6c23 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -162,7 +163,88 @@ func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.Wri
 	return nil
 }
 
-func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages")
+	defer sp.Finish()
+
+	// A limitation of this approach is that this only fetches whole rows; but
+	// whatever, we filter them in the cache on the client.  But for unit tests to
+	// pass, we must do this.
+	callback = chunk_util.QueryFilter(callback)
+
+	type tableQuery struct {
+		name    string
+		queries map[string]chunk.IndexQuery
+		rows    bigtable.RowList
+	}
+
+	tableQueries := map[string]tableQuery{}
+	for _, query := range queries {
+		tq, ok := tableQueries[query.TableName]
+		if !ok {
+			tq = tableQuery{
+				name:    query.TableName,
+				queries: map[string]chunk.IndexQuery{},
+			}
+		}
+		tq.queries[query.HashValue] = query
+		tq.rows = append(tq.rows, query.HashValue)
+		tableQueries[query.TableName] = tq
+	}
+
+	errs := make(chan error)
+
+	for _, tq := range tableQueries {
+
+		table := s.client.Open(tq.name)
+		for i := 0; i < len(tq.rows); i += maxRowReads {
+
+			page := tq.rows[i:util.Min(i+maxRowReads, len(tq.rows))]
+			go func(page bigtable.RowList, tq tableQuery) {
+				var processingErr error
+				// rows are returned in key order, not order in row list
+				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
+
+					query, ok := tq.queries[row.Key()]
+					if !ok {
+						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
+						return false
+					}
+
+					val, ok := row[columnFamily]
+					if !ok {
+						// There are no matching rows.
+						return true
+					}
+
+					return callback(query, bigtableReadBatchColumnKey{
+						items:        val,
+						columnPrefix: columnFamily + ":",
+					})
+				})
+
+				if processingErr != nil {
+					errs <- processingErr
+				} else {
+					errs <- err
+				}
+			}(page, tq)
+		}
+	}
+
+	var lastErr error
+	for _, tq := range tableQueries {
+		for i := 0; i < len(tq.rows); i += maxRowReads {
+			err := <-errs
+			if err != nil {
+				lastErr = err
+			}
+		}
+	}
+	return lastErr
+}
+
+func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -343,7 +425,11 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 	return output, nil
 }
 
-func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index c95f7e5a4ea62..452cb99eb1725 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -158,13 +158,26 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 }
 
 // QueryPages implements StorageClient.
-func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
-	logger := util.WithContext(ctx, util.Logger)
-	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
-
+func (m *MockStorage) QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	for _, query := range queries {
+		err := m.query(ctx, query, func(b ReadBatch) bool {
+			return callback(query, b)
+		})
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func(ReadBatch) (shouldContinue bool)) error {
+	logger := util.WithContext(ctx, util.Logger)
+	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
+
 	table, ok := m.tables[query.TableName]
 	if !ok {
 		return fmt.Errorf("table not found")
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 5b58078b2af41..9b5b1f941fd90 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -1,13 +1,12 @@
 package storage
 
 import (
-	"bytes"
 	"context"
-	"strings"
 	"time"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 type cachingStorageClient struct {
@@ -27,87 +26,48 @@ func newCachingStorageClient(client chunk.StorageClient, size int, validity time
 	}
 }
 
-func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	value, ok := s.cache.Get(ctx, queryKey(query))
-	if ok {
-		batches := value.([]chunk.ReadBatch)
-		filteredBatch := filterBatchByQuery(query, batches)
-		callback(filteredBatch)
+func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	// We cache the entire row, so filter client side.
+	callback = chunk_util.QueryFilter(callback)
+	cacheableMissed := []chunk.IndexQuery{}
+	missed := map[string]chunk.IndexQuery{}
+
+	for _, query := range queries {
+		value, ok := s.cache.Get(ctx, queryKey(query))
+		if !ok {
+			cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
+				TableName: query.TableName,
+				HashValue: query.HashValue,
+			})
+			missed[queryKey(query)] = query
+			continue
+		}
 
-		return nil
+		for _, batch := range value.([]chunk.ReadBatch) {
+			callback(query, batch)
+		}
 	}
 
-	batches := []chunk.ReadBatch{}
-	cacheableQuery := chunk.IndexQuery{
-		TableName: query.TableName,
-		HashValue: query.HashValue,
-	} // Just reads the entire row and caches it.
-
-	err := s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
+	results := map[string][]chunk.ReadBatch{}
+	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
+		key := queryKey(cacheableQuery)
+		results[key] = append(results[key], r)
+		return true
+	})
 	if err != nil {
 		return err
 	}
 
-	filteredBatch := filterBatchByQuery(query, batches)
-	callback(filteredBatch)
-
-	s.cache.Put(ctx, queryKey(query), batches)
-
-	return nil
-}
-
-type readBatch []cell
-
-func (b readBatch) Len() int                { return len(b) }
-func (b readBatch) RangeValue(i int) []byte { return b[i].column }
-func (b readBatch) Value(i int) []byte      { return b[i].value }
-
-type cell struct {
-	column []byte
-	value  []byte
-}
-
-func copyingCallback(readBatches *[]chunk.ReadBatch) func(chunk.ReadBatch) bool {
-	return func(result chunk.ReadBatch) bool {
-		*readBatches = append(*readBatches, result)
-		return true
+	for key, batches := range results {
+		query := missed[key]
+		for _, batch := range batches {
+			callback(query, batch)
+		}
 	}
+	return nil
 }
 
 func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
-
-func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readBatch {
-	filter := func([]byte, []byte) bool { return true }
-
-	if len(query.RangeValuePrefix) != 0 {
-		filter = func(rangeValue []byte, value []byte) bool {
-			return strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix))
-		}
-	}
-	if len(query.RangeValueStart) != 0 {
-		filter = func(rangeValue []byte, value []byte) bool {
-			return string(rangeValue) >= string(query.RangeValueStart)
-		}
-	}
-	if len(query.ValueEqual) != 0 {
-		// This is on top of the existing filters.
-		existingFilter := filter
-		filter = func(rangeValue []byte, value []byte) bool {
-			return existingFilter(rangeValue, value) && bytes.Equal(value, query.ValueEqual)
-		}
-	}
-
-	finalBatch := make(readBatch, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
-	for _, batch := range batches {
-		for i := 0; i < batch.Len(); i++ {
-			if filter(batch.RangeValue(i), batch.Value(i)) {
-				finalBatch = append(finalBatch, cell{column: batch.RangeValue(i), value: batch.Value(i)})
-			}
-		}
-	}
-
-	return finalBatch
-}
diff --git a/storage/index_test.go b/storage/index_test.go
index 91d8532bb96b4..e8f8db007612d 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -21,12 +21,14 @@ func TestIndexBasic(t *testing.T) {
 
 		// Make sure we get back the correct entries by hash value.
 		for i := 0; i < 30; i++ {
-			entry := chunk.IndexQuery{
-				TableName: tableName,
-				HashValue: fmt.Sprintf("hash%d", i),
+			entries := []chunk.IndexQuery{
+				{
+					TableName: tableName,
+					HashValue: fmt.Sprintf("hash%d", i),
+				},
 			}
 			var have []chunk.IndexEntry
-			err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch) bool {
+			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
 				for j := 0; j < read.Len(); j++ {
 					have = append(have, chunk.IndexEntry{
 						RangeValue: read.RangeValue(j),
@@ -167,7 +169,7 @@ func TestQueryPages(t *testing.T) {
 				run := true
 				for run {
 					var have []chunk.IndexEntry
-					err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
+					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
 						for i := 0; i < read.Len(); i++ {
 							have = append(have, chunk.IndexEntry{
 								TableName:  tt.query.TableName,
diff --git a/storage_client.go b/storage_client.go
index c86f573b77bca..92ccd55758278 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -9,7 +9,7 @@ type StorageClient interface {
 	BatchWrite(context.Context, WriteBatch) error
 
 	// For the read path.
-	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error
+	QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error
 
 	// For storing and retrieving chunks.
 	PutChunks(ctx context.Context, chunks []Chunk) error
diff --git a/util/util.go b/util/util.go
new file mode 100644
index 0000000000000..610255f9c7ec0
--- /dev/null
+++ b/util/util.go
@@ -0,0 +1,75 @@
+package util
+
+import (
+	"bytes"
+	"context"
+	"strings"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+// DoSingleQuery is the interface for indexes that don't support batching yet.
+type DoSingleQuery func(
+	ctx context.Context, query chunk.IndexQuery,
+	callback func(chunk.ReadBatch) bool,
+) error
+
+// DoParallelQueries translates between our interface for query batching,
+// and indexes that don't yet support batching.
+func DoParallelQueries(
+	ctx context.Context, doSingleQuery DoSingleQuery, queries []chunk.IndexQuery,
+	callback func(chunk.IndexQuery, chunk.ReadBatch) bool,
+) error {
+	incomingErrors := make(chan error)
+	for _, query := range queries {
+		go func(query chunk.IndexQuery) {
+			incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
+				return callback(query, r)
+			})
+		}(query)
+	}
+	var lastErr error
+	for i := 0; i < len(queries); i++ {
+		err := <-incomingErrors
+		if err != nil {
+
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
+
+type readBatch []cell
+
+func (b readBatch) Len() int                { return len(b) }
+func (b readBatch) RangeValue(i int) []byte { return b[i].column }
+func (b readBatch) Value(i int) []byte      { return b[i].value }
+
+type cell struct {
+	column []byte
+	value  []byte
+}
+
+func QueryFilter(callback Callback) Callback {
+	return func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		finalBatch := make(readBatch, 0, batch.Len())
+		for i := 0; i < batch.Len(); i++ {
+			rangeValue, value := batch.RangeValue(i), batch.Value(i)
+
+			if len(query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix)) {
+				continue
+			}
+			if len(query.RangeValueStart) != 0 && string(rangeValue) < string(query.RangeValueStart) {
+				continue
+			}
+			if len(query.ValueEqual) != 0 && !bytes.Equal(value, query.ValueEqual) {
+				continue
+			}
+
+			finalBatch = append(finalBatch, cell{column: rangeValue, value: value})
+		}
+		return callback(query, finalBatch)
+	}
+}

From fff0a18215e2773d3be0b3c405658a660d5771d8 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 29 Aug 2018 21:33:43 +0100
Subject: [PATCH 140/660] Prevent concurrent modifications of the map.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 9b5b1f941fd90..b8ff6275d21bd 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -2,6 +2,7 @@ package storage
 
 import (
 	"context"
+	"sync"
 	"time"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -48,8 +49,11 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		}
 	}
 
+	var resultsMtx sync.Mutex
 	results := map[string][]chunk.ReadBatch{}
 	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
+		resultsMtx.Lock()
+		defer resultsMtx.Unlock()
 		key := queryKey(cacheableQuery)
 		results[key] = append(results[key], r)
 		return true
@@ -58,6 +62,8 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		return err
 	}
 
+	resultsMtx.Lock()
+	defer resultsMtx.Unlock()
 	for key, batches := range results {
 		query := missed[key]
 		for _, batch := range batches {

From 8825368cc6e2ec113dbf6a79d15db65dbff3e073 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 30 Aug 2018 17:30:29 +0100
Subject: [PATCH 141/660] Turn chunk.ReadBatch into a iterator style interface
 to reduce copying.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go       | 29 +++++++++++--------
 cassandra/storage_client.go | 21 +++++++-------
 chunk_store.go              |  6 ++--
 gcp/storage_client.go       | 55 +++++++++++++++++++++----------------
 inmemory_storage_client.go  | 26 +++++++++++-------
 storage/index_test.go       | 10 +++----
 storage_client.go           |  6 ++--
 util/util.go                | 50 +++++++++++++++++----------------
 8 files changed, 112 insertions(+), 91 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 739dbac066ae5..0561b755b99b3 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -289,8 +289,8 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (s storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
-	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
+func (a storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
 }
 
 func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
@@ -363,7 +363,7 @@ func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callba
 	return nil
 }
 
-func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
+func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -393,7 +393,10 @@ func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput
 		}
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		return dynamoDBReadResponse(queryOutput.Items), nil
+		return &dynamoDBReadResponse{
+			i:     -1,
+			items: queryOutput.Items,
+		}, nil
 	}
 	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
 }
@@ -777,18 +780,22 @@ func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) e
 }
 
 // Slice of values returned; map key is attribute name
-type dynamoDBReadResponse []map[string]*dynamodb.AttributeValue
+type dynamoDBReadResponse struct {
+	i     int
+	items []map[string]*dynamodb.AttributeValue
+}
 
-func (b dynamoDBReadResponse) Len() int {
-	return len(b)
+func (b *dynamoDBReadResponse) Next() bool {
+	b.i++
+	return b.i < len(b.items)
 }
 
-func (b dynamoDBReadResponse) RangeValue(i int) []byte {
-	return b[i][rangeKey].B
+func (b *dynamoDBReadResponse) RangeValue() []byte {
+	return b.items[b.i][rangeKey].B
 }
 
-func (b dynamoDBReadResponse) Value(i int) []byte {
-	chunkValue, ok := b[i][valueKey]
+func (b *dynamoDBReadResponse) Value() []byte {
+	chunkValue, ok := b.items[b.i][valueKey]
 	if !ok {
 		return nil
 	}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 7df057e61e7d6..7f8aa7830ae41 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -210,7 +210,7 @@ func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 	defer iter.Close()
 	scanner := iter.Scanner()
 	for scanner.Next() {
-		var b readBatch
+		b := &readBatch{}
 		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
 			return errors.WithStack(err)
 		}
@@ -223,27 +223,26 @@ func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 
 // readBatch represents a batch of rows read from Cassandra.
 type readBatch struct {
+	consumed   bool
 	rangeValue []byte
 	value      []byte
 }
 
 // Len implements chunk.ReadBatch; in Cassandra we 'stream' results back
 // one-by-one, so this always returns 1.
-func (readBatch) Len() int {
-	return 1
+func (b *readBatch) Next() bool {
+	if b.consumed {
+		return false
+	}
+	b.consumed = true
+	return true
 }
 
-func (b readBatch) RangeValue(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
+func (b *readBatch) RangeValue() []byte {
 	return b.rangeValue
 }
 
-func (b readBatch) Value(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
+func (b *readBatch) Value() []byte {
 	return b.value
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index d86366d1175f4..853125d53bffb 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -414,12 +414,12 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
 	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
-		for i := 0; i < resp.Len(); i++ {
+		for resp.Next() {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
 				HashValue:  query.HashValue,
-				RangeValue: resp.RangeValue(i),
-				Value:      resp.Value(i),
+				RangeValue: resp.RangeValue(),
+				Value:      resp.Value(),
 			})
 		}
 		return true
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 888ca2bbe6c23..d287c8268931c 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -217,7 +217,8 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 						return true
 					}
 
-					return callback(query, bigtableReadBatchColumnKey{
+					return callback(query, &bigtableReadBatchColumnKey{
+						i:            -1,
 						items:        val,
 						columnPrefix: columnFamily + ":",
 					})
@@ -284,7 +285,8 @@ func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQue
 
 		val = filteredItems
 	}
-	callback(bigtableReadBatchColumnKey{
+	callback(&bigtableReadBatchColumnKey{
+		i:            -1,
 		items:        val,
 		columnPrefix: columnFamily + ":",
 	})
@@ -293,22 +295,22 @@ func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQue
 
 // bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
 type bigtableReadBatchColumnKey struct {
+	i            int
 	items        []bigtable.ReadItem
 	columnPrefix string
 }
 
-func (b bigtableReadBatchColumnKey) Len() int {
-	return len(b.items)
+func (b *bigtableReadBatchColumnKey) Next() bool {
+	b.i++
+	return b.i < len(b.items)
 }
 
-func (b bigtableReadBatchColumnKey) RangeValue(index int) []byte {
-	return []byte(
-		strings.TrimPrefix(b.items[index].Column, b.columnPrefix),
-	)
+func (b *bigtableReadBatchColumnKey) RangeValue() []byte {
+	return []byte(strings.TrimPrefix(b.items[b.i].Column, b.columnPrefix))
 }
 
-func (b bigtableReadBatchColumnKey) Value(index int) []byte {
-	return b.items[index].Value
+func (b *bigtableReadBatchColumnKey) Value() []byte {
+	return b.items[b.i].Value
 }
 
 func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
@@ -459,7 +461,9 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(bigtableReadBatchV1(r))
+			return callback(&bigtableReadBatchV1{
+				row: r,
+			})
 		}
 
 		return true
@@ -474,24 +478,27 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 // bigtableReadBatchV1 represents a batch of rows read from Bigtable.  As the
 // bigtable interface gives us rows one-by-one, a batch always only contains
 // a single row.
-type bigtableReadBatchV1 bigtable.Row
-
-func (bigtableReadBatchV1) Len() int {
-	return 1
+type bigtableReadBatchV1 struct {
+	consumed bool
+	row      bigtable.Row
 }
-func (b bigtableReadBatchV1) RangeValue(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
+
+func (b *bigtableReadBatchV1) Next() bool {
+	if b.consumed {
+		return false
 	}
+	b.consumed = true
+	return true
+}
+
+func (b *bigtableReadBatchV1) RangeValue() []byte {
 	// String before the first separator is the hashkey
-	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
+	parts := strings.SplitN(b.row.Key(), separator, 2)
 	return []byte(parts[1])
 }
-func (b bigtableReadBatchV1) Value(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
-	cf, ok := b[columnFamily]
+
+func (b *bigtableReadBatchV1) Value() []byte {
+	cf, ok := b.row[columnFamily]
 	if !ok || len(cf) != 1 {
 		panic("bad response from bigtable")
 	}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 452cb99eb1725..df2f3971711f2 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -243,12 +243,14 @@ func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func
 		items = filtered
 	}
 
-	result := mockReadBatch{}
+	result := mockReadBatch{
+		index: -1,
+	}
 	for _, item := range items {
-		result = append(result, item)
+		result.items = append(result.items, item)
 	}
 
-	callback(result)
+	callback(&result)
 	return nil
 }
 
@@ -302,16 +304,20 @@ func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, val
 	}{tableName, hashValue, rangeValue, value})
 }
 
-type mockReadBatch []mockItem
+type mockReadBatch struct {
+	index int
+	items []mockItem
+}
 
-func (b mockReadBatch) Len() int {
-	return len(b)
+func (b *mockReadBatch) Next() bool {
+	b.index++
+	return b.index < len(b.items)
 }
 
-func (b mockReadBatch) RangeValue(i int) []byte {
-	return b[i].rangeValue
+func (b *mockReadBatch) RangeValue() []byte {
+	return b.items[b.index].rangeValue
 }
 
-func (b mockReadBatch) Value(i int) []byte {
-	return b[i].value
+func (b *mockReadBatch) Value() []byte {
+	return b.items[b.index].value
 }
diff --git a/storage/index_test.go b/storage/index_test.go
index e8f8db007612d..260d0fb563256 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -29,9 +29,9 @@ func TestIndexBasic(t *testing.T) {
 			}
 			var have []chunk.IndexEntry
 			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-				for j := 0; j < read.Len(); j++ {
+				for read.Next() {
 					have = append(have, chunk.IndexEntry{
-						RangeValue: read.RangeValue(j),
+						RangeValue: read.RangeValue(),
 					})
 				}
 				return true
@@ -170,12 +170,12 @@ func TestQueryPages(t *testing.T) {
 				for run {
 					var have []chunk.IndexEntry
 					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-						for i := 0; i < read.Len(); i++ {
+						for read.Next() {
 							have = append(have, chunk.IndexEntry{
 								TableName:  tt.query.TableName,
 								HashValue:  tt.query.HashValue,
-								RangeValue: read.RangeValue(i),
-								Value:      read.Value(i),
+								RangeValue: read.RangeValue(),
+								Value:      read.Value(),
 							})
 						}
 						return true
diff --git a/storage_client.go b/storage_client.go
index 92ccd55758278..d867def5ab31d 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -23,7 +23,7 @@ type WriteBatch interface {
 
 // ReadBatch represents the results of a QueryPages.
 type ReadBatch interface {
-	Len() int
-	RangeValue(index int) []byte
-	Value(index int) []byte
+	Next() bool
+	RangeValue() []byte
+	Value() []byte
 }
diff --git a/util/util.go b/util/util.go
index 610255f9c7ec0..6a6c412d30318 100644
--- a/util/util.go
+++ b/util/util.go
@@ -39,37 +39,39 @@ func DoParallelQueries(
 	return lastErr
 }
 
+// Callback from an IndexQuery.
 type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
 
-type readBatch []cell
+type filteringBatch struct {
+	query chunk.IndexQuery
+	chunk.ReadBatch
+}
+
+func (f *filteringBatch) Next() bool {
+	for f.ReadBatch.Next() {
+		rangeValue, value := f.ReadBatch.RangeValue(), f.ReadBatch.Value()
+
+		if len(f.query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(f.query.RangeValuePrefix)) {
+			continue
+		}
+		if len(f.query.RangeValueStart) != 0 && string(rangeValue) < string(f.query.RangeValueStart) {
+			continue
+		}
+		if len(f.query.ValueEqual) != 0 && !bytes.Equal(value, f.query.ValueEqual) {
+			continue
+		}
 
-func (b readBatch) Len() int                { return len(b) }
-func (b readBatch) RangeValue(i int) []byte { return b[i].column }
-func (b readBatch) Value(i int) []byte      { return b[i].value }
+		return true
+	}
 
-type cell struct {
-	column []byte
-	value  []byte
+	return false
 }
 
+// QueryFilter wraps a callback to ensure the results are filtered correctly;
+// useful for the cache and BigTable backend, which only ever fetches the whole
+// row.
 func QueryFilter(callback Callback) Callback {
 	return func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
-		finalBatch := make(readBatch, 0, batch.Len())
-		for i := 0; i < batch.Len(); i++ {
-			rangeValue, value := batch.RangeValue(i), batch.Value(i)
-
-			if len(query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix)) {
-				continue
-			}
-			if len(query.RangeValueStart) != 0 && string(rangeValue) < string(query.RangeValueStart) {
-				continue
-			}
-			if len(query.ValueEqual) != 0 && !bytes.Equal(value, query.ValueEqual) {
-				continue
-			}
-
-			finalBatch = append(finalBatch, cell{column: rangeValue, value: value})
-		}
-		return callback(query, finalBatch)
+		return callback(query, &filteringBatch{query, batch})
 	}
 }

From 754479596157bd75aec0ef6211796283b59a2417 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 24 Aug 2018 17:22:42 +0100
Subject: [PATCH 142/660] Remove v7 & v8 deprecated schemas.

As discussed in the community call ~a month ago.  Removed the flags as well, so the jobs won't start if you are using these schemas.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go      |  73 ++--------------------
 chunk_store_test.go |   2 -
 composite_store.go  |  18 ------
 schema.go           | 143 ++------------------------------------------
 schema_config.go    |   4 --
 schema_test.go      |  65 --------------------
 6 files changed, 10 insertions(+), 295 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 3edfeb9cdc098..c8abb23441c12 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -178,13 +178,12 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 
 	// Fetch metric name chunks if the matcher is of type equal,
 	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
-	if ok && metricNameMatcher.Type == labels.MatchEqual {
-		log.Span.SetTag("metric", metricNameMatcher.Value)
-		return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
+	if !ok && metricNameMatcher.Type != labels.MatchEqual {
+		return nil, fmt.Errorf("query must contain metric name")
 	}
 
-	// Otherwise we consult the metric name index first and then create queries for each matching metric name.
-	return c.getSeriesChunks(ctx, from, through, matchers, metricNameMatcher)
+	log.Span.SetTag("metric", metricNameMatcher.Value)
+	return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
 }
 
 func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time) (shortcut bool, err error) {
@@ -253,70 +252,6 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	return filteredChunks, nil
 }
 
-func (c *store) getSeriesChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricNameMatcher *labels.Matcher) ([]Chunk, error) {
-	// Get all series from the index
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-	seriesQueries, err := c.schema.GetReadQueries(from, through, userID)
-	if err != nil {
-		return nil, err
-	}
-	seriesEntries, err := c.lookupEntriesByQueries(ctx, seriesQueries)
-	if err != nil {
-		return nil, err
-	}
-
-	chunks := make([]Chunk, 0, len(seriesEntries))
-outer:
-	for _, seriesEntry := range seriesEntries {
-		metric, err := parseSeriesRangeValue(seriesEntry.RangeValue, seriesEntry.Value)
-		if err != nil {
-			return nil, err
-		}
-
-		// Apply metric name matcher
-		if metricNameMatcher != nil && !metricNameMatcher.Matches(string(metric[model.LabelName(metricNameMatcher.Name)])) {
-			continue outer
-		}
-
-		// Apply matchers
-		for _, matcher := range allMatchers {
-			if !matcher.Matches(string(metric[model.LabelName(matcher.Name)])) {
-				continue outer
-			}
-		}
-
-		var matchers []*labels.Matcher
-		for labelName, labelValue := range metric {
-			if labelName == "__name__" {
-				continue
-			}
-
-			matcher, err := labels.NewMatcher(labels.MatchEqual, string(labelName), string(labelValue))
-			if err != nil {
-				return nil, err
-			}
-			matchers = append(matchers, matcher)
-		}
-
-		cs, err := c.getMetricNameChunks(ctx, from, through, matchers, string(metric[model.MetricNameLabel]))
-		if err != nil {
-			return nil, err
-		}
-
-		for _, chunk := range cs {
-			// getMetricNameChunks() may have selected too many metrics - metrics that match all matchers,
-			// but also have additional labels. We don't want to return those.
-			if chunk.Metric.Equal(metric) {
-				chunks = append(chunks, chunk)
-			}
-		}
-	}
-	return chunks, nil
-}
-
 func (c *store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksByMetricName")
 	defer log.Finish()
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 5171aacfc6188..62e7e632920cd 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -37,8 +37,6 @@ var schemas = []struct {
 	{"v4 schema", v4Schema, newStore, true},
 	{"v5 schema", v5Schema, newStore, true},
 	{"v6 schema", v6Schema, newStore, true},
-	{"v7 schema", v7Schema, newStore, true},
-	{"v8 schema", v8Schema, newStore, false},
 	{"v9 schema", v9Schema, newSeriesStore, true},
 }
 
diff --git a/composite_store.go b/composite_store.go
index ebbd2f38c0e25..5c5423c601a54 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -93,24 +93,6 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		})
 	}
 
-	if schemaCfg.V7SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V7SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v7Schema(schemaCfg), storage)
-			},
-		})
-	}
-
-	if schemaCfg.V8SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V8SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v8Schema(schemaCfg), storage)
-			},
-		})
-	}
-
 	if schemaCfg.V9SchemaFrom.IsSet() {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.V9SchemaFrom.Time,
diff --git a/schema.go b/schema.go
index acb292bc96f1d..00d09e9f8937e 100644
--- a/schema.go
+++ b/schema.go
@@ -1,14 +1,11 @@
 package chunk
 
 import (
-	"crypto/sha1"
-	"encoding/json"
 	"errors"
 	"fmt"
 	"strings"
 
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 var (
@@ -22,11 +19,9 @@ var (
 	// For v9 schema
 	seriesRangeKeyV1      = []byte{'7'}
 	labelSeriesRangeKeyV1 = []byte{'8'}
-)
 
-// Errors
-var (
-	ErrNoMetricNameNotSupported = errors.New("metric name required for pre-v8 schemas")
+	// ErrNotSupported when a schema doesn't support that particular lookup.
+	ErrNotSupported = errors.New("not supported")
 )
 
 // Schema interface defines methods to calculate the hash and range keys needed
@@ -36,7 +31,6 @@ type Schema interface {
 	GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
 
 	// When doing a read, use these methods to return the list of entries you should query
-	GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error)
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
@@ -132,22 +126,6 @@ func v6Schema(cfg SchemaConfig) Schema {
 	}
 }
 
-// DEPRECATED: v7 schema is an extension of v6, with support for queries with no metric names, but is broken
-func v7Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		v7Entries{},
-	}
-}
-
-// DEPRECATED: v8 schema is an extension of v6, with support for a labelset/series index, but is too slow in practice
-func v8Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		v8Entries{},
-	}
-}
-
 // v9 schema index series, not chunks.
 func v9Schema(cfg SchemaConfig) Schema {
 	return schema{
@@ -175,20 +153,6 @@ func (s schema) GetWriteEntries(from, through model.Time, userID string, metricN
 	return result, nil
 }
 
-func (s schema) GetReadQueries(from, through model.Time, userID string) ([]IndexQuery, error) {
-	var result []IndexQuery
-
-	buckets := s.buckets(from, through, userID)
-	for _, bucket := range buckets {
-		entries, err := s.entries.GetReadQueries(bucket)
-		if err != nil {
-			return nil, err
-		}
-		result = append(result, entries...)
-	}
-	return result, nil
-}
-
 func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
 	var result []IndexQuery
 
@@ -247,7 +211,6 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetReadQueries(bucket Bucket) ([]IndexQuery, error)
 	GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
@@ -275,10 +238,6 @@ func (originalEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValu
 	return result, nil
 }
 
-func (originalEntries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -313,7 +272,7 @@ func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName
 }
 
 func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
+	return nil, ErrNotSupported
 }
 
 type base64Entries struct {
@@ -338,10 +297,6 @@ func (base64Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue,
 	return result, nil
 }
 
-func (base64Entries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
 	encodedBytes := encodeBase64Value(labelValue)
 	return []IndexQuery{
@@ -380,10 +335,6 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model
 	return entries, nil
 }
 
-func (labelNameInHashKeyEntries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -414,7 +365,7 @@ func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, m
 }
 
 func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
+	return nil, ErrNotSupported
 }
 
 // v5Entries includes chunk end time in range key - see #298.
@@ -447,10 +398,6 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
-func (v5Entries) GetReadQueries(_ Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -479,7 +426,7 @@ func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.
 }
 
 func (v5Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
+	return nil, ErrNotSupported
 }
 
 // v6Entries fixes issues with v5 time encoding being wrong (see #337), and
@@ -513,10 +460,6 @@ func (v6Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
-func (v6Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
@@ -552,77 +495,7 @@ func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.
 }
 
 func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
-// v7Entries is a deprecated scherma initially used to support queries with no metric name. Use v8Entries instead.
-type v7Entries struct {
-	v6Entries
-}
-
-func (entries v7Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
-	indexEntries, err := entries.v6Entries.GetWriteEntries(bucket, metricName, labels, chunkID)
-	if err != nil {
-		return nil, err
-	}
-
-	metricName, err = extract.MetricNameFromMetric(labels)
-	if err != nil {
-		return nil, err
-	}
-	metricNameHashBytes := sha1.Sum([]byte(metricName))
-
-	// Add IndexEntry for metric name with userID:bigBucket HashValue
-	indexEntries = append(indexEntries, IndexEntry{
-		TableName:  bucket.tableName,
-		HashValue:  bucket.hashKey,
-		RangeValue: encodeRangeKey(encodeBase64Bytes(metricNameHashBytes[:]), nil, nil, metricNameRangeKeyV1),
-		Value:      []byte(metricName),
-	})
-
-	return indexEntries, nil
-}
-
-func (v7Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
-	// Replaced with v8Schema series index
-	return nil, ErrNoMetricNameNotSupported
-}
-
-// v8Entries supports queries with no metric name by using a series index.
-type v8Entries struct {
-	v6Entries
-}
-
-func (entries v8Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
-	indexEntries, err := entries.v6Entries.GetWriteEntries(bucket, metricName, labels, chunkID)
-	if err != nil {
-		return nil, err
-	}
-
-	seriesID := metricSeriesID(labels)
-	seriesBytes, err := json.Marshal(labels)
-	if err != nil {
-		return nil, err
-	}
-
-	// Add IndexEntry for series with userID:bigBucket HashValue
-	indexEntries = append(indexEntries, IndexEntry{
-		TableName:  bucket.tableName,
-		HashValue:  bucket.hashKey,
-		RangeValue: encodeRangeKey([]byte(seriesID), nil, nil, seriesRangeKeyV1),
-		Value:      seriesBytes,
-	})
-
-	return indexEntries, nil
-}
-
-func (v8Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
-	return []IndexQuery{
-		{
-			TableName: bucket.tableName,
-			HashValue: bucket.hashKey,
-		},
-	}, nil
+	return nil, ErrNotSupported
 }
 
 // v9Entries adds a layer of indirection between labels -> series -> chunks.
@@ -666,10 +539,6 @@ func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
-func (v9Entries) GetReadQueries(bucket Bucket) ([]IndexQuery, error) {
-	return nil, ErrNoMetricNameNotSupported
-}
-
 func (v9Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
diff --git a/schema_config.go b/schema_config.go
index 0cf2993cb1271..4ee6baa85b701 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -28,8 +28,6 @@ type SchemaConfig struct {
 	V4SchemaFrom          util.DayValue
 	V5SchemaFrom          util.DayValue
 	V6SchemaFrom          util.DayValue
-	V7SchemaFrom          util.DayValue
-	V8SchemaFrom          util.DayValue
 	V9SchemaFrom          util.DayValue
 	BigtableColumnKeyFrom util.DayValue
 
@@ -59,8 +57,6 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
 	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
 	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
-	f.Var(&cfg.V7SchemaFrom, "dynamodb.v7-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v7 schema (Deprecated).")
-	f.Var(&cfg.V8SchemaFrom, "dynamodb.v8-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v8 schema (Deprecated).")
 	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
 	f.Var(&cfg.BigtableColumnKeyFrom, "bigtable.column-key-from", "The date (in the format YYYY-MM-DD) after which we use bigtable column keys.")
 
diff --git a/schema_test.go b/schema_test.go
index e99ad66d28644..81e36c961e8a6 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -2,9 +2,7 @@ package chunk
 
 import (
 	"bytes"
-	"crypto/sha1"
 	"encoding/base64"
-	"encoding/json"
 	"fmt"
 	"reflect"
 	"sort"
@@ -195,20 +193,13 @@ func TestSchemaRangeKey(t *testing.T) {
 		labelBuckets  = v4Schema(cfg)
 		tsRangeKeys   = v5Schema(cfg)
 		v6RangeKeys   = v6Schema(cfg)
-		v7RangeKeys   = v7Schema(cfg)
-		v8RangeKeys   = v8Schema(cfg)
 		metric        = model.Metric{
 			model.MetricNameLabel: metricName,
 			"bar": "bary",
 			"baz": "bazy",
 		}
-		fooSha1Hash = sha1.Sum([]byte("foo"))
 	)
 
-	seriesID := metricSeriesID(metric)
-	metricBytes, err := json.Marshal(metric)
-	require.NoError(t, err)
-
 	mkEntries := func(hashKey string, callback func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte)) []IndexEntry {
 		result := []IndexEntry{}
 		for labelName, labelValue := range metric {
@@ -312,62 +303,6 @@ func TestSchemaRangeKey(t *testing.T) {
 				},
 			},
 		},
-		{
-			v7RangeKeys,
-			[]IndexEntry{
-				{
-					TableName:  table,
-					HashValue:  "userid:d0",
-					RangeValue: append(encodeBase64Bytes(fooSha1Hash[:]), []byte("\x00\x00\x006\x00")...),
-					Value:      []byte("foo"),
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo:bar",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
-					Value:      []byte("bary"),
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo:baz",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
-					Value:      []byte("bazy"),
-				},
-			},
-		},
-		{
-			v8RangeKeys,
-			[]IndexEntry{
-				{
-					TableName:  table,
-					HashValue:  "userid:d0",
-					RangeValue: append([]byte(seriesID), []byte("\x00\x00\x007\x00")...),
-					Value:      metricBytes,
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x003\x00"),
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo:bar",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
-					Value:      []byte("bary"),
-				},
-				{
-					TableName:  table,
-					HashValue:  "userid:d0:foo:baz",
-					RangeValue: []byte("0036ee7f\x00\x00chunkID\x005\x00"),
-					Value:      []byte("bazy"),
-				},
-			},
-		},
 	} {
 		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
 			have, err := tc.Schema.GetWriteEntries(

From cd97ce9947f732f2cf9792684cdcbf9335d55e72 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 3 Sep 2018 11:02:41 +0100
Subject: [PATCH 143/660] Check -dynamodb.url is set in aws mode (#979)

rather than crashing
---
 storage/factory.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage/factory.go b/storage/factory.go
index f1771fe0fc8d1..43f341d49a6b8 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -68,6 +68,9 @@ func newStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (client chunk.St
 	case "inmemory":
 		client, err = chunk.NewMockStorage(), nil
 	case "aws":
+		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
+			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
+		}
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)

From c74c0cecbe402fa0d1e2d0986c78224772d77414 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 3 Sep 2018 11:03:25 +0100
Subject: [PATCH 144/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go        |  3 +++
 gcp/storage_client.go | 18 ++++++++----------
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 853125d53bffb..e0c84adb7b2a0 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -424,6 +424,9 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 		}
 		return true
 	})
+	if err != nil {
+		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
+	}
 	return entries, err
 }
 
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index d287c8268931c..da6def351683b 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -19,6 +19,7 @@ import (
 
 const (
 	columnFamily = "f"
+	columnPrefix = columnFamily + ":"
 	column       = "c"
 	separator    = "\000"
 	maxRowReads  = 100
@@ -218,9 +219,8 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 					}
 
 					return callback(query, &bigtableReadBatchColumnKey{
-						i:            -1,
-						items:        val,
-						columnPrefix: columnFamily + ":",
+						i:     -1,
+						items: val,
 					})
 				})
 
@@ -286,18 +286,16 @@ func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQue
 		val = filteredItems
 	}
 	callback(&bigtableReadBatchColumnKey{
-		i:            -1,
-		items:        val,
-		columnPrefix: columnFamily + ":",
+		i:     -1,
+		items: val,
 	})
 	return nil
 }
 
 // bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
 type bigtableReadBatchColumnKey struct {
-	i            int
-	items        []bigtable.ReadItem
-	columnPrefix string
+	i     int
+	items []bigtable.ReadItem
 }
 
 func (b *bigtableReadBatchColumnKey) Next() bool {
@@ -306,7 +304,7 @@ func (b *bigtableReadBatchColumnKey) Next() bool {
 }
 
 func (b *bigtableReadBatchColumnKey) RangeValue() []byte {
-	return []byte(strings.TrimPrefix(b.items[b.i].Column, b.columnPrefix))
+	return []byte(strings.TrimPrefix(b.items[b.i].Column, columnPrefix))
 }
 
 func (b *bigtableReadBatchColumnKey) Value() []byte {

From 513b5106a3e46d9740bfe6195a7cf1e6bbbbeb0b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 3 Sep 2018 13:01:16 +0100
Subject: [PATCH 145/660] Revert "Merge pull request #971 from
 grafana/batch-index-lookups"

This reverts commit 0d275f02acedb80b6f78d8cdfcef72b0534c4cac, reversing
changes made to 1fffffffa71d93f89a0e96f26b7495dbee8851ae.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go             |  32 ++-----
 cassandra/storage_client.go       |  28 +++---
 chunk_store.go                    |  45 +++++++--
 gcp/storage_client.go             | 149 ++++++------------------------
 inmemory_storage_client.go        |  47 +++-------
 storage/caching_storage_client.go | 110 ++++++++++++++--------
 storage/index_test.go             |  22 ++---
 storage_client.go                 |   8 +-
 util/util.go                      |  77 ---------------
 9 files changed, 189 insertions(+), 329 deletions(-)
 delete mode 100644 util/util.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 0561b755b99b3..3bf4b168694f2 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -29,7 +29,6 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/chunk"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -289,11 +288,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (a storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
-	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
-}
-
-func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -363,7 +358,7 @@ func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callba
 	return nil
 }
 
-func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
+func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -393,10 +388,7 @@ func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput
 		}
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		return &dynamoDBReadResponse{
-			i:     -1,
-			items: queryOutput.Items,
-		}, nil
+		return dynamoDBReadResponse(queryOutput.Items), nil
 	}
 	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
 }
@@ -780,22 +772,18 @@ func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) e
 }
 
 // Slice of values returned; map key is attribute name
-type dynamoDBReadResponse struct {
-	i     int
-	items []map[string]*dynamodb.AttributeValue
-}
+type dynamoDBReadResponse []map[string]*dynamodb.AttributeValue
 
-func (b *dynamoDBReadResponse) Next() bool {
-	b.i++
-	return b.i < len(b.items)
+func (b dynamoDBReadResponse) Len() int {
+	return len(b)
 }
 
-func (b *dynamoDBReadResponse) RangeValue() []byte {
-	return b.items[b.i][rangeKey].B
+func (b dynamoDBReadResponse) RangeValue(i int) []byte {
+	return b[i][rangeKey].B
 }
 
-func (b *dynamoDBReadResponse) Value() []byte {
-	chunkValue, ok := b.items[b.i][valueKey]
+func (b dynamoDBReadResponse) Value(i int) []byte {
+	chunkValue, ok := b[i][valueKey]
 	if !ok {
 		return nil
 	}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 7f8aa7830ae41..54d40be2f397f 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -11,7 +11,6 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 const (
@@ -173,11 +172,7 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
-	return util.DoParallelQueries(ctx, s.query, queries, callback)
-}
-
-func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	var q *gocql.Query
 
 	switch {
@@ -210,7 +205,7 @@ func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 	defer iter.Close()
 	scanner := iter.Scanner()
 	for scanner.Next() {
-		b := &readBatch{}
+		var b readBatch
 		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
 			return errors.WithStack(err)
 		}
@@ -223,26 +218,27 @@ func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 
 // readBatch represents a batch of rows read from Cassandra.
 type readBatch struct {
-	consumed   bool
 	rangeValue []byte
 	value      []byte
 }
 
 // Len implements chunk.ReadBatch; in Cassandra we 'stream' results back
 // one-by-one, so this always returns 1.
-func (b *readBatch) Next() bool {
-	if b.consumed {
-		return false
-	}
-	b.consumed = true
-	return true
+func (readBatch) Len() int {
+	return 1
 }
 
-func (b *readBatch) RangeValue() []byte {
+func (b readBatch) RangeValue(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
 	return b.rangeValue
 }
 
-func (b *readBatch) Value() []byte {
+func (b readBatch) Value(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
 	return b.value
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index f811a9cf7b8b8..c8abb23441c12 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -347,22 +347,53 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+	incomingEntries := make(chan []IndexEntry)
+	incomingErrors := make(chan error)
+	for _, query := range queries {
+		go func(query IndexQuery) {
+			entries, err := c.lookupEntriesByQuery(ctx, query)
+			if err != nil {
+				incomingErrors <- err
+			} else {
+				incomingEntries <- entries
+			}
+		}(query)
+	}
+
+	// Combine the results into one slice
+	var entries []IndexEntry
+	var lastErr error
+	for i := 0; i < len(queries); i++ {
+		select {
+		case incoming := <-incomingEntries:
+			entries = append(entries, incoming...)
+		case err := <-incomingErrors:
+			lastErr = err
+		}
+	}
+
+	return entries, lastErr
+}
+
+func (c *store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
-	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
-		for resp.Next() {
+
+	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch) (shouldContinue bool) {
+		for i := 0; i < resp.Len(); i++ {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
 				HashValue:  query.HashValue,
-				RangeValue: resp.RangeValue(),
-				Value:      resp.Value(),
+				RangeValue: resp.RangeValue(i),
+				Value:      resp.Value(i),
 			})
 		}
 		return true
-	})
-	if err != nil {
+	}); err != nil {
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
+		return nil, err
 	}
-	return entries, err
+
+	return entries, nil
 }
 
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index da6def351683b..18b779c2e9f19 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -13,13 +13,11 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
 	columnFamily = "f"
-	columnPrefix = columnFamily + ":"
 	column       = "c"
 	separator    = "\000"
 	maxRowReads  = 100
@@ -164,88 +162,7 @@ func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.Wri
 	return nil
 }
 
-func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
-	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages")
-	defer sp.Finish()
-
-	// A limitation of this approach is that this only fetches whole rows; but
-	// whatever, we filter them in the cache on the client.  But for unit tests to
-	// pass, we must do this.
-	callback = chunk_util.QueryFilter(callback)
-
-	type tableQuery struct {
-		name    string
-		queries map[string]chunk.IndexQuery
-		rows    bigtable.RowList
-	}
-
-	tableQueries := map[string]tableQuery{}
-	for _, query := range queries {
-		tq, ok := tableQueries[query.TableName]
-		if !ok {
-			tq = tableQuery{
-				name:    query.TableName,
-				queries: map[string]chunk.IndexQuery{},
-			}
-		}
-		tq.queries[query.HashValue] = query
-		tq.rows = append(tq.rows, query.HashValue)
-		tableQueries[query.TableName] = tq
-	}
-
-	errs := make(chan error)
-
-	for _, tq := range tableQueries {
-
-		table := s.client.Open(tq.name)
-		for i := 0; i < len(tq.rows); i += maxRowReads {
-
-			page := tq.rows[i:util.Min(i+maxRowReads, len(tq.rows))]
-			go func(page bigtable.RowList, tq tableQuery) {
-				var processingErr error
-				// rows are returned in key order, not order in row list
-				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
-
-					query, ok := tq.queries[row.Key()]
-					if !ok {
-						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
-						return false
-					}
-
-					val, ok := row[columnFamily]
-					if !ok {
-						// There are no matching rows.
-						return true
-					}
-
-					return callback(query, &bigtableReadBatchColumnKey{
-						i:     -1,
-						items: val,
-					})
-				})
-
-				if processingErr != nil {
-					errs <- processingErr
-				} else {
-					errs <- err
-				}
-			}(page, tq)
-		}
-	}
-
-	var lastErr error
-	for _, tq := range tableQueries {
-		for i := 0; i < len(tq.rows); i += maxRowReads {
-			err := <-errs
-			if err != nil {
-				lastErr = err
-			}
-		}
-	}
-	return lastErr
-}
-
-func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -285,30 +202,31 @@ func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQue
 
 		val = filteredItems
 	}
-	callback(&bigtableReadBatchColumnKey{
-		i:     -1,
-		items: val,
+	callback(bigtableReadBatchColumnKey{
+		items:        val,
+		columnPrefix: columnFamily + ":",
 	})
 	return nil
 }
 
 // bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
 type bigtableReadBatchColumnKey struct {
-	i     int
-	items []bigtable.ReadItem
+	items        []bigtable.ReadItem
+	columnPrefix string
 }
 
-func (b *bigtableReadBatchColumnKey) Next() bool {
-	b.i++
-	return b.i < len(b.items)
+func (b bigtableReadBatchColumnKey) Len() int {
+	return len(b.items)
 }
 
-func (b *bigtableReadBatchColumnKey) RangeValue() []byte {
-	return []byte(strings.TrimPrefix(b.items[b.i].Column, columnPrefix))
+func (b bigtableReadBatchColumnKey) RangeValue(index int) []byte {
+	return []byte(
+		strings.TrimPrefix(b.items[index].Column, b.columnPrefix),
+	)
 }
 
-func (b *bigtableReadBatchColumnKey) Value() []byte {
-	return b.items[b.i].Value
+func (b bigtableReadBatchColumnKey) Value(index int) []byte {
+	return b.items[index].Value
 }
 
 func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
@@ -425,11 +343,7 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 	return output, nil
 }
 
-func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
-	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
-}
-
-func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -459,9 +373,7 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(&bigtableReadBatchV1{
-				row: r,
-			})
+			return callback(bigtableReadBatchV1(r))
 		}
 
 		return true
@@ -476,27 +388,24 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 // bigtableReadBatchV1 represents a batch of rows read from Bigtable.  As the
 // bigtable interface gives us rows one-by-one, a batch always only contains
 // a single row.
-type bigtableReadBatchV1 struct {
-	consumed bool
-	row      bigtable.Row
-}
+type bigtableReadBatchV1 bigtable.Row
 
-func (b *bigtableReadBatchV1) Next() bool {
-	if b.consumed {
-		return false
-	}
-	b.consumed = true
-	return true
+func (bigtableReadBatchV1) Len() int {
+	return 1
 }
-
-func (b *bigtableReadBatchV1) RangeValue() []byte {
+func (b bigtableReadBatchV1) RangeValue(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
 	// String before the first separator is the hashkey
-	parts := strings.SplitN(b.row.Key(), separator, 2)
+	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
 	return []byte(parts[1])
 }
-
-func (b *bigtableReadBatchV1) Value() []byte {
-	cf, ok := b.row[columnFamily]
+func (b bigtableReadBatchV1) Value(index int) []byte {
+	if index != 0 {
+		panic("index != 0")
+	}
+	cf, ok := b[columnFamily]
 	if !ok || len(cf) != 1 {
 		panic("bad response from bigtable")
 	}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index df2f3971711f2..c95f7e5a4ea62 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -158,26 +158,13 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 }
 
 // QueryPages implements StorageClient.
-func (m *MockStorage) QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error {
-	m.mtx.RLock()
-	defer m.mtx.RUnlock()
-
-	for _, query := range queries {
-		err := m.query(ctx, query, func(b ReadBatch) bool {
-			return callback(query, b)
-		})
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func(ReadBatch) (shouldContinue bool)) error {
+func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
 	logger := util.WithContext(ctx, util.Logger)
 	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
 
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
 	table, ok := m.tables[query.TableName]
 	if !ok {
 		return fmt.Errorf("table not found")
@@ -243,14 +230,12 @@ func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func
 		items = filtered
 	}
 
-	result := mockReadBatch{
-		index: -1,
-	}
+	result := mockReadBatch{}
 	for _, item := range items {
-		result.items = append(result.items, item)
+		result = append(result, item)
 	}
 
-	callback(&result)
+	callback(result)
 	return nil
 }
 
@@ -304,20 +289,16 @@ func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, val
 	}{tableName, hashValue, rangeValue, value})
 }
 
-type mockReadBatch struct {
-	index int
-	items []mockItem
-}
+type mockReadBatch []mockItem
 
-func (b *mockReadBatch) Next() bool {
-	b.index++
-	return b.index < len(b.items)
+func (b mockReadBatch) Len() int {
+	return len(b)
 }
 
-func (b *mockReadBatch) RangeValue() []byte {
-	return b.items[b.index].rangeValue
+func (b mockReadBatch) RangeValue(i int) []byte {
+	return b[i].rangeValue
 }
 
-func (b *mockReadBatch) Value() []byte {
-	return b.items[b.index].value
+func (b mockReadBatch) Value(i int) []byte {
+	return b[i].value
 }
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index b8ff6275d21bd..5b58078b2af41 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -1,13 +1,13 @@
 package storage
 
 import (
+	"bytes"
 	"context"
-	"sync"
+	"strings"
 	"time"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 type cachingStorageClient struct {
@@ -27,53 +27,87 @@ func newCachingStorageClient(client chunk.StorageClient, size int, validity time
 	}
 }
 
-func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
-	// We cache the entire row, so filter client side.
-	callback = chunk_util.QueryFilter(callback)
-	cacheableMissed := []chunk.IndexQuery{}
-	missed := map[string]chunk.IndexQuery{}
-
-	for _, query := range queries {
-		value, ok := s.cache.Get(ctx, queryKey(query))
-		if !ok {
-			cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
-				TableName: query.TableName,
-				HashValue: query.HashValue,
-			})
-			missed[queryKey(query)] = query
-			continue
-		}
+func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+	value, ok := s.cache.Get(ctx, queryKey(query))
+	if ok {
+		batches := value.([]chunk.ReadBatch)
+		filteredBatch := filterBatchByQuery(query, batches)
+		callback(filteredBatch)
 
-		for _, batch := range value.([]chunk.ReadBatch) {
-			callback(query, batch)
-		}
+		return nil
 	}
 
-	var resultsMtx sync.Mutex
-	results := map[string][]chunk.ReadBatch{}
-	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
-		resultsMtx.Lock()
-		defer resultsMtx.Unlock()
-		key := queryKey(cacheableQuery)
-		results[key] = append(results[key], r)
-		return true
-	})
+	batches := []chunk.ReadBatch{}
+	cacheableQuery := chunk.IndexQuery{
+		TableName: query.TableName,
+		HashValue: query.HashValue,
+	} // Just reads the entire row and caches it.
+
+	err := s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
 	if err != nil {
 		return err
 	}
 
-	resultsMtx.Lock()
-	defer resultsMtx.Unlock()
-	for key, batches := range results {
-		query := missed[key]
-		for _, batch := range batches {
-			callback(query, batch)
-		}
-	}
+	filteredBatch := filterBatchByQuery(query, batches)
+	callback(filteredBatch)
+
+	s.cache.Put(ctx, queryKey(query), batches)
+
 	return nil
 }
 
+type readBatch []cell
+
+func (b readBatch) Len() int                { return len(b) }
+func (b readBatch) RangeValue(i int) []byte { return b[i].column }
+func (b readBatch) Value(i int) []byte      { return b[i].value }
+
+type cell struct {
+	column []byte
+	value  []byte
+}
+
+func copyingCallback(readBatches *[]chunk.ReadBatch) func(chunk.ReadBatch) bool {
+	return func(result chunk.ReadBatch) bool {
+		*readBatches = append(*readBatches, result)
+		return true
+	}
+}
+
 func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
+
+func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readBatch {
+	filter := func([]byte, []byte) bool { return true }
+
+	if len(query.RangeValuePrefix) != 0 {
+		filter = func(rangeValue []byte, value []byte) bool {
+			return strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix))
+		}
+	}
+	if len(query.RangeValueStart) != 0 {
+		filter = func(rangeValue []byte, value []byte) bool {
+			return string(rangeValue) >= string(query.RangeValueStart)
+		}
+	}
+	if len(query.ValueEqual) != 0 {
+		// This is on top of the existing filters.
+		existingFilter := filter
+		filter = func(rangeValue []byte, value []byte) bool {
+			return existingFilter(rangeValue, value) && bytes.Equal(value, query.ValueEqual)
+		}
+	}
+
+	finalBatch := make(readBatch, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
+	for _, batch := range batches {
+		for i := 0; i < batch.Len(); i++ {
+			if filter(batch.RangeValue(i), batch.Value(i)) {
+				finalBatch = append(finalBatch, cell{column: batch.RangeValue(i), value: batch.Value(i)})
+			}
+		}
+	}
+
+	return finalBatch
+}
diff --git a/storage/index_test.go b/storage/index_test.go
index 260d0fb563256..91d8532bb96b4 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -21,17 +21,15 @@ func TestIndexBasic(t *testing.T) {
 
 		// Make sure we get back the correct entries by hash value.
 		for i := 0; i < 30; i++ {
-			entries := []chunk.IndexQuery{
-				{
-					TableName: tableName,
-					HashValue: fmt.Sprintf("hash%d", i),
-				},
+			entry := chunk.IndexQuery{
+				TableName: tableName,
+				HashValue: fmt.Sprintf("hash%d", i),
 			}
 			var have []chunk.IndexEntry
-			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-				for read.Next() {
+			err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch) bool {
+				for j := 0; j < read.Len(); j++ {
 					have = append(have, chunk.IndexEntry{
-						RangeValue: read.RangeValue(),
+						RangeValue: read.RangeValue(j),
 					})
 				}
 				return true
@@ -169,13 +167,13 @@ func TestQueryPages(t *testing.T) {
 				run := true
 				for run {
 					var have []chunk.IndexEntry
-					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-						for read.Next() {
+					err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
+						for i := 0; i < read.Len(); i++ {
 							have = append(have, chunk.IndexEntry{
 								TableName:  tt.query.TableName,
 								HashValue:  tt.query.HashValue,
-								RangeValue: read.RangeValue(),
-								Value:      read.Value(),
+								RangeValue: read.RangeValue(i),
+								Value:      read.Value(i),
 							})
 						}
 						return true
diff --git a/storage_client.go b/storage_client.go
index d867def5ab31d..c86f573b77bca 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -9,7 +9,7 @@ type StorageClient interface {
 	BatchWrite(context.Context, WriteBatch) error
 
 	// For the read path.
-	QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error
+	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error
 
 	// For storing and retrieving chunks.
 	PutChunks(ctx context.Context, chunks []Chunk) error
@@ -23,7 +23,7 @@ type WriteBatch interface {
 
 // ReadBatch represents the results of a QueryPages.
 type ReadBatch interface {
-	Next() bool
-	RangeValue() []byte
-	Value() []byte
+	Len() int
+	RangeValue(index int) []byte
+	Value(index int) []byte
 }
diff --git a/util/util.go b/util/util.go
deleted file mode 100644
index 6a6c412d30318..0000000000000
--- a/util/util.go
+++ /dev/null
@@ -1,77 +0,0 @@
-package util
-
-import (
-	"bytes"
-	"context"
-	"strings"
-
-	"github.com/weaveworks/cortex/pkg/chunk"
-)
-
-// DoSingleQuery is the interface for indexes that don't support batching yet.
-type DoSingleQuery func(
-	ctx context.Context, query chunk.IndexQuery,
-	callback func(chunk.ReadBatch) bool,
-) error
-
-// DoParallelQueries translates between our interface for query batching,
-// and indexes that don't yet support batching.
-func DoParallelQueries(
-	ctx context.Context, doSingleQuery DoSingleQuery, queries []chunk.IndexQuery,
-	callback func(chunk.IndexQuery, chunk.ReadBatch) bool,
-) error {
-	incomingErrors := make(chan error)
-	for _, query := range queries {
-		go func(query chunk.IndexQuery) {
-			incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
-				return callback(query, r)
-			})
-		}(query)
-	}
-	var lastErr error
-	for i := 0; i < len(queries); i++ {
-		err := <-incomingErrors
-		if err != nil {
-
-			lastErr = err
-		}
-	}
-	return lastErr
-}
-
-// Callback from an IndexQuery.
-type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
-
-type filteringBatch struct {
-	query chunk.IndexQuery
-	chunk.ReadBatch
-}
-
-func (f *filteringBatch) Next() bool {
-	for f.ReadBatch.Next() {
-		rangeValue, value := f.ReadBatch.RangeValue(), f.ReadBatch.Value()
-
-		if len(f.query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(f.query.RangeValuePrefix)) {
-			continue
-		}
-		if len(f.query.RangeValueStart) != 0 && string(rangeValue) < string(f.query.RangeValueStart) {
-			continue
-		}
-		if len(f.query.ValueEqual) != 0 && !bytes.Equal(value, f.query.ValueEqual) {
-			continue
-		}
-
-		return true
-	}
-
-	return false
-}
-
-// QueryFilter wraps a callback to ensure the results are filtered correctly;
-// useful for the cache and BigTable backend, which only ever fetches the whole
-// row.
-func QueryFilter(callback Callback) Callback {
-	return func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
-		return callback(query, &filteringBatch{query, batch})
-	}
-}

From 13d488bd9c48184b9ea90ced5f54157568152f2e Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve+github@gmail.com>
Date: Mon, 10 Sep 2018 17:00:07 +0530
Subject: [PATCH 146/660] Tiered index cache with memcache as an option (#982)

* Make Cache a generic interface which FiFo also implements

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Make index cache use the generic interface

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add memcache as a cache option to the Index

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Handle unicode with hack. Memcache cannot deal with it.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Do client side expiry validation as write back might skew times

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add metrics

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* protobuf and review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 aws/storage_client.go                |  13 +
 cache/background.go                  |   6 +-
 cache/background_test.go             |   4 +-
 cache/cache.go                       |  14 +-
 cache/cache_test.go                  |   8 +-
 cache/diskcache.go                   |   8 +-
 cache/fifo_cache.go                  |  29 ++
 cache/instrumented.go                |  83 +++-
 cache/memcached.go                   |   8 +-
 cache/memcached_client.go            |  25 +-
 cache/tiered.go                      |  18 +-
 cache/tiered_test.go                 |   6 +-
 cassandra/storage_client.go          |  13 +
 chunk_store_utils.go                 |   4 +-
 gcp/storage_client.go                |  25 +
 inmemory_storage_client.go           |  11 +
 storage/caching_fixtures.go          |   3 +-
 storage/caching_storage_client.go    | 138 +++++-
 storage/caching_storage_client.pb.go | 714 +++++++++++++++++++++++++++
 storage/caching_storage_client.proto |  15 +
 storage/factory.go                   |  51 +-
 21 files changed, 1092 insertions(+), 104 deletions(-)
 create mode 100644 storage/caching_storage_client.pb.go
 create mode 100644 storage/caching_storage_client.proto

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 3bf4b168694f2..9353280ead53b 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -24,6 +24,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/model"
 
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
@@ -159,6 +160,18 @@ type storageClient struct {
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
+// Opts returns the chunk.StorageOpt's for the config.
+func Opts(cfg StorageConfig, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	client, err := NewStorageClient(cfg, schemaCfg)
+	if err != nil {
+		return nil, err
+	}
+	return []chunk.StorageOpt{{
+		From:   model.Time(0),
+		Client: client,
+	}}, err
+}
+
 // NewStorageClient makes a new AWS-backed StorageClient.
 func NewStorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
diff --git a/cache/background.go b/cache/background.go
index 2560cc49a7e46..c41ee5f18e77f 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -77,8 +77,8 @@ func (c *backgroundCache) Stop() error {
 	return c.Cache.Stop()
 }
 
-// StoreChunk writes chunks for the cache in the background.
-func (c *backgroundCache) StoreChunk(ctx context.Context, key string, buf []byte) error {
+// Store writes keys for the cache in the background.
+func (c *backgroundCache) Store(ctx context.Context, key string, buf []byte) error {
 	bgWrite := backgroundWrite{
 		key: key,
 		buf: buf,
@@ -102,7 +102,7 @@ func (c *backgroundCache) writeBackLoop() {
 				return
 			}
 			queueLength.Dec()
-			err := c.Cache.StoreChunk(context.Background(), bgWrite.key, bgWrite.buf)
+			err := c.Cache.Store(context.Background(), bgWrite.key, bgWrite.buf)
 			if err != nil {
 				level.Error(util.Logger).Log("msg", "error writing to memcache", "err", err)
 			}
diff --git a/cache/background_test.go b/cache/background_test.go
index ebf540abd755c..2276c764ad4e9 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -13,14 +13,14 @@ type mockCache struct {
 	cache map[string][]byte
 }
 
-func (m *mockCache) StoreChunk(_ context.Context, key string, buf []byte) error {
+func (m *mockCache) Store(_ context.Context, key string, buf []byte) error {
 	m.Lock()
 	defer m.Unlock()
 	m.cache[key] = buf
 	return nil
 }
 
-func (m *mockCache) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
+func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
 	m.Lock()
 	defer m.Unlock()
 	for _, key := range keys {
diff --git a/cache/cache.go b/cache/cache.go
index 440f913f13a8d..769555e2ea626 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -7,8 +7,8 @@ import (
 
 // Cache byte arrays by key.
 type Cache interface {
-	StoreChunk(ctx context.Context, key string, buf []byte) error
-	FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error)
+	Store(ctx context.Context, key string, buf []byte) error
+	Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error)
 	Stop() error
 }
 
@@ -48,18 +48,18 @@ func New(cfg Config) (Cache, error) {
 		if err != nil {
 			return nil, err
 		}
-		caches = append(caches, instrument("diskcache", cache))
+		caches = append(caches, Instrument("diskcache", cache))
 	}
 
 	if cfg.memcacheClient.Host != "" {
-		client := newMemcachedClient(cfg.memcacheClient)
+		client := NewMemcachedClient(cfg.memcacheClient)
 		cache := NewMemcached(cfg.memcache, client)
-		caches = append(caches, instrument("memcache", cache))
+		caches = append(caches, Instrument("memcache", cache))
 	}
 
-	var cache Cache = tiered(caches)
+	cache := NewTiered(caches)
 	if len(caches) > 1 {
-		cache = instrument("tiered", cache)
+		cache = Instrument("tiered", cache)
 	}
 
 	cache = NewBackground(cfg.background, cache)
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 9da25e2af0037..a22fcb56b17e2 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -46,7 +46,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 		require.NoError(t, err)
 
 		key := c.ExternalKey()
-		err = cache.StoreChunk(context.Background(), key, buf)
+		err = cache.Store(context.Background(), key, buf)
 		require.NoError(t, err)
 
 		keys = append(keys, key)
@@ -61,7 +61,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 		index := rand.Intn(len(keys))
 		key := keys[index]
 
-		found, bufs, missingKeys, err := cache.FetchChunkData(context.Background(), []string{key})
+		found, bufs, missingKeys, err := cache.Fetch(context.Background(), []string{key})
 		require.NoError(t, err)
 		require.Len(t, found, 1)
 		require.Len(t, bufs, 1)
@@ -77,7 +77,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 
 func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []chunk.Chunk) {
 	// test getting them all
-	found, bufs, missingKeys, err := cache.FetchChunkData(context.Background(), keys)
+	found, bufs, missingKeys, err := cache.Fetch(context.Background(), keys)
 	require.NoError(t, err)
 	require.Len(t, found, len(keys))
 	require.Len(t, bufs, len(keys))
@@ -117,7 +117,7 @@ func (a byExternalKey) Less(i, j int) bool { return a[i].ExternalKey() < a[j].Ex
 func testCacheMiss(t *testing.T, cache cache.Cache) {
 	for i := 0; i < 100; i++ {
 		key := strconv.Itoa(rand.Int())
-		found, bufs, missing, err := cache.FetchChunkData(context.Background(), []string{key})
+		found, bufs, missing, err := cache.Fetch(context.Background(), []string{key})
 		require.NoError(t, err)
 		require.Empty(t, found)
 		require.Empty(t, bufs)
diff --git a/cache/diskcache.go b/cache/diskcache.go
index d32683aa40ae6..0a2ae705b4c9b 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -78,8 +78,8 @@ func (d *Diskcache) Stop() error {
 	return d.f.Close()
 }
 
-// FetchChunkData get chunks from the cache.
-func (d *Diskcache) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+// Fetch get chunks from the cache.
+func (d *Diskcache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
 	for _, key := range keys {
 		buf, ok := d.fetch(key)
 		if ok {
@@ -114,8 +114,8 @@ func (d *Diskcache) fetch(key string) ([]byte, bool) {
 	return result, true
 }
 
-// StoreChunk puts a chunk into the cache.
-func (d *Diskcache) StoreChunk(ctx context.Context, key string, value []byte) error {
+// Store puts a chunk into the cache.
+func (d *Diskcache) Store(ctx context.Context, key string, value []byte) error {
 	d.mtx.Lock()
 	defer d.mtx.Unlock()
 
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 1eb5099ece8d6..a5a2f8218f7fc 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -101,6 +101,35 @@ func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
 	}
 }
 
+// Store implements Cache.
+func (c *FifoCache) Store(ctx context.Context, key string, buf []byte) error {
+	c.Put(ctx, key, buf)
+
+	return nil
+}
+
+// Fetch implements Cache.
+func (c *FifoCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
+	found, missing, bufs = make([]string, 0, len(keys)), make([]string, 0, len(keys)), make([][]byte, 0, len(keys))
+	for _, key := range keys {
+		val, ok := c.Get(ctx, key)
+		if !ok {
+			missing = append(missing, key)
+			continue
+		}
+
+		found = append(found, key)
+		bufs = append(bufs, val.([]byte))
+	}
+
+	return
+}
+
+// Stop implements Cache.
+func (c *FifoCache) Stop() error {
+	return nil
+}
+
 // Put stores the value against the key.
 func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
 	span, ctx := ot.StartSpanFromContext(ctx, c.name+"-cache-put")
diff --git a/cache/instrumented.go b/cache/instrumented.go
index 2b582d2f0b36c..fd075d1946a16 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -2,6 +2,7 @@ package cache
 
 import (
 	"context"
+	"time"
 
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
@@ -21,13 +22,13 @@ var (
 	fetchedKeys = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "cache_fetched_keys",
-		Help:      "Total count of chunks requested from cache.",
+		Help:      "Total count of keys requested from cache.",
 	}, []string{"name"})
 
 	hits = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "cache_hits",
-		Help:      "Total count of chunks found in cache.",
+		Help:      "Total count of keys found in cache.",
 	}, []string{"name"})
 )
 
@@ -37,7 +38,19 @@ func init() {
 	prometheus.MustRegister(hits)
 }
 
-func instrument(name string, cache Cache) Cache {
+// Instrument returns an instrumented cache.
+func Instrument(name string, cache Cache) Cache {
+	return &instrumentedCache{
+		name:        name,
+		fetchedKeys: fetchedKeys.WithLabelValues(name),
+		hits:        hits.WithLabelValues(name),
+		trace:       true,
+		Cache:       cache,
+	}
+}
+
+// MetricsInstrument returns an instrumented cache that only tracks metrics and not traces.
+func MetricsInstrument(name string, cache Cache) Cache {
 	return &instrumentedCache{
 		name:        name,
 		fetchedKeys: fetchedKeys.WithLabelValues(name),
@@ -49,36 +62,58 @@ func instrument(name string, cache Cache) Cache {
 type instrumentedCache struct {
 	name              string
 	fetchedKeys, hits prometheus.Counter
+	trace             bool
 	Cache
 }
 
-func (i *instrumentedCache) StoreChunk(ctx context.Context, key string, buf []byte) error {
-	return instr.TimeRequestHistogram(ctx, i.name+".store", requestDuration, func(ctx context.Context) error {
-		return i.Cache.StoreChunk(ctx, key, buf)
+func (i *instrumentedCache) Store(ctx context.Context, key string, buf []byte) error {
+	method := i.name + ".store"
+	if i.trace {
+		return instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+			sp := ot.SpanFromContext(ctx)
+			sp.LogFields(otlog.String("key", key))
+
+			return i.Cache.Store(ctx, key, buf)
+		})
+	}
+
+	return UntracedCollectedRequest(ctx, method, instr.NewHistogramCollector(requestDuration), instr.ErrorCode, func(ctx context.Context) error {
+		return i.Cache.Store(ctx, key, buf)
 	})
 }
 
-func (i *instrumentedCache) FetchChunkData(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
 	var (
 		found   []string
 		bufs    [][]byte
 		missing []string
+		err     error
+		method  = i.name + ".fetch"
 	)
-	err := instr.TimeRequestHistogram(ctx, i.name+".fetch", requestDuration, func(ctx context.Context) error {
-		sp := ot.SpanFromContext(ctx)
-		sp.LogFields(otlog.Int("chunks requested", len(keys)))
 
-		var err error
-		found, bufs, missing, err = i.Cache.FetchChunkData(ctx, keys)
+	if i.trace {
+		err = instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+			sp := ot.SpanFromContext(ctx)
+			sp.LogFields(otlog.Int("keys requested", len(keys)))
 
-		if err == nil {
-			sp.LogFields(otlog.Int("chunks found", len(found)), otlog.Int("chunks missing", len(keys)-len(found)))
-		} else {
-			sp.LogFields(otlog.Error(err))
-		}
+			var err error
+			found, bufs, missing, err = i.Cache.Fetch(ctx, keys)
+
+			if err == nil {
+				sp.LogFields(otlog.Int("keys found", len(found)), otlog.Int("keys missing", len(keys)-len(found)))
+			}
+
+			return err
+		})
+	} else {
+		err = UntracedCollectedRequest(ctx, method, instr.NewHistogramCollector(requestDuration), instr.ErrorCode, func(ctx context.Context) error {
+			var err error
+			found, bufs, missing, err = i.Cache.Fetch(ctx, keys)
+
+			return err
+		})
+	}
 
-		return err
-	})
 	i.fetchedKeys.Add(float64(len(keys)))
 	i.hits.Add(float64(len(found)))
 	return found, bufs, missing, err
@@ -87,3 +122,13 @@ func (i *instrumentedCache) FetchChunkData(ctx context.Context, keys []string) (
 func (i *instrumentedCache) Stop() error {
 	return i.Cache.Stop()
 }
+
+// UntracedCollectedRequest is the same as instr.CollectedRequest but without any tracing.
+func UntracedCollectedRequest(ctx context.Context, method string, col instr.Collector, toStatusCode func(error) string, f func(context.Context) error) error {
+	start := time.Now()
+	col.Before(method, start)
+	err := f(ctx)
+	col.After(method, toStatusCode(err), start)
+
+	return err
+}
diff --git a/cache/memcached.go b/cache/memcached.go
index 7cfb33d4c7140..4e477123826ec 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -63,8 +63,8 @@ func memcacheStatusCode(err error) string {
 	}
 }
 
-// FetchChunkData gets chunks from the chunk cache.
-func (c *Memcached) FetchChunkData(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+// Fetch gets keys from the cache.
+func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
 	var items map[string]*memcache.Item
 	err = instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
 		var err error
@@ -86,8 +86,8 @@ func (c *Memcached) FetchChunkData(ctx context.Context, keys []string) (found []
 	return
 }
 
-// StoreChunk serializes and stores a chunk in the chunk cache.
-func (c *Memcached) StoreChunk(ctx context.Context, key string, buf []byte) error {
+// Store stores the key in the cache.
+func (c *Memcached) Store(ctx context.Context, key string, buf []byte) error {
 	return instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
 		item := memcache.Item{
 			Key:        key,
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 43a52fffab348..eec25e5529681 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -41,15 +41,28 @@ type MemcachedClientConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.Host, "memcached.hostname", "", "Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
-	f.StringVar(&cfg.Service, "memcached.service", "memcached", "SRV service used to discover memcache servers.")
-	f.DurationVar(&cfg.Timeout, "memcached.timeout", 100*time.Millisecond, "Maximum time to wait before giving up on memcached requests.")
-	f.DurationVar(&cfg.UpdateInterval, "memcached.update-interval", 1*time.Minute, "Period with which to poll DNS for memcache servers.")
+	cfg.registerFlagsWithPrefix("", f)
 }
 
-// newMemcachedClient creates a new MemcacheClient that gets its server list
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	cfg.registerFlagsWithPrefix(prefix, f)
+}
+
+func (cfg *MemcachedClientConfig) registerFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	if prefix != "" {
+		prefix = prefix + "."
+	}
+
+	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", "Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
+	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", "SRV service used to discover memcache servers.")
+	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, "Maximum time to wait before giving up on memcached requests.")
+	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, "Period with which to poll DNS for memcache servers.")
+}
+
+// NewMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
-func newMemcachedClient(cfg MemcachedClientConfig) *memcachedClient {
+func NewMemcachedClient(cfg MemcachedClientConfig) MemcachedClient {
 	var servers memcache.ServerList
 	client := memcache.NewFromSelector(&servers)
 	client.Timeout = cfg.Timeout
diff --git a/cache/tiered.go b/cache/tiered.go
index 9151e3afa74e3..65854c096ed4b 100644
--- a/cache/tiered.go
+++ b/cache/tiered.go
@@ -6,19 +6,23 @@ type tiered []Cache
 
 // NewTiered makes a new tiered cache.
 func NewTiered(caches []Cache) Cache {
+	if len(caches) == 1 {
+		return caches[0]
+	}
+
 	return tiered(caches)
 }
 
-func (t tiered) StoreChunk(ctx context.Context, key string, buf []byte) error {
+func (t tiered) Store(ctx context.Context, key string, buf []byte) error {
 	for _, c := range []Cache(t) {
-		if err := c.StoreChunk(ctx, key, buf); err != nil {
+		if err := c.Store(ctx, key, buf); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-func (t tiered) FetchChunkData(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+func (t tiered) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
 	found := make(map[string][]byte, len(keys))
 	missing := keys
 	previousCaches := make([]Cache, 0, len(t))
@@ -30,14 +34,18 @@ func (t tiered) FetchChunkData(ctx context.Context, keys []string) ([]string, []
 			passBufs [][]byte
 		)
 
-		passKeys, passBufs, missing, err = c.FetchChunkData(ctx, missing)
+		passKeys, passBufs, missing, err = c.Fetch(ctx, missing)
 		if err != nil {
 			return nil, nil, nil, err
 		}
 
 		for i, key := range passKeys {
 			found[key] = passBufs[i]
-			tiered(previousCaches).StoreChunk(ctx, key, passBufs[i])
+			tiered(previousCaches).Store(ctx, key, passBufs[i])
+		}
+
+		if len(missing) == 0 {
+			break
 		}
 
 		previousCaches = append(previousCaches, c)
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
index 32cc3b6bc4672..0657cb3d12816 100644
--- a/cache/tiered_test.go
+++ b/cache/tiered_test.go
@@ -23,13 +23,13 @@ func TestTiered(t *testing.T) {
 	level1, level2 := newMockCache(), newMockCache()
 	cache := cache.NewTiered([]cache.Cache{level1, level2})
 
-	err := level1.StoreChunk(context.Background(), "key1", []byte("hello"))
+	err := level1.Store(context.Background(), "key1", []byte("hello"))
 	require.NoError(t, err)
 
-	err = level2.StoreChunk(context.Background(), "key2", []byte("world"))
+	err = level2.Store(context.Background(), "key2", []byte("world"))
 	require.NoError(t, err)
 
-	keys, bufs, missing, err := cache.FetchChunkData(context.Background(), []string{"key1", "key2", "key3"})
+	keys, bufs, missing, err := cache.Fetch(context.Background(), []string{"key1", "key2", "key3"})
 	require.NoError(t, err)
 	require.Equal(t, []string{"key1", "key2"}, keys)
 	require.Equal(t, [][]byte{[]byte("hello"), []byte("world")}, bufs)
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 54d40be2f397f..88d443db8bf55 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 )
@@ -121,6 +122,18 @@ type storageClient struct {
 	session   *gocql.Session
 }
 
+// Opts returns the chunk.StorageOpt's for the config.
+func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	client, err := NewStorageClient(cfg, schemaCfg)
+	if err != nil {
+		return nil, err
+	}
+	return []chunk.StorageOpt{{
+		From:   model.Time(0),
+		Client: client,
+	}}, err
+}
+
 // NewStorageClient returns a new StorageClient.
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	session, err := cfg.session()
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 4edfa15c1af52..14f39aed504b6 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -143,7 +143,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
-	cacheHits, cacheBufs, _, err := c.cache.FetchChunkData(ctx, keys)
+	cacheHits, cacheBufs, _, err := c.cache.Fetch(ctx, keys)
 	if err != nil {
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
@@ -177,7 +177,7 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 		if err != nil {
 			return err
 		}
-		if err := c.cache.StoreChunk(ctx, chunks[i].ExternalKey(), encoded); err != nil {
+		if err := c.cache.Store(ctx, chunks[i].ExternalKey(), encoded); err != nil {
 			return err
 		}
 	}
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 18b779c2e9f19..9135236acc752 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -10,6 +10,7 @@ import (
 	"cloud.google.com/go/bigtable"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
+	"github.com/prometheus/common/model"
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -37,6 +38,30 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.instance, "bigtable.instance", "", "Bigtable instance ID.")
 }
 
+// Opts returns the chunk.StorageOpt's for the config.
+func Opts(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	client, err := NewStorageClientV1(ctx, cfg, schemaCfg)
+	if err != nil {
+		return nil, err
+	}
+
+	opts := []chunk.StorageOpt{}
+	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
+	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
+		client, err = NewStorageClientColumnKey(context.Background(), cfg, schemaCfg)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating storage client")
+		}
+
+		opts = append(opts, chunk.StorageOpt{
+			From:   schemaCfg.BigtableColumnKeyFrom.Time,
+			Client: client,
+		})
+	}
+
+	return opts, nil
+}
+
 // NewStorageClient returns a new StorageClient.
 func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	if cfg.ColumnKey {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index c95f7e5a4ea62..0add546b2a4b0 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/common/model"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -28,6 +29,16 @@ type mockItem struct {
 	value      []byte
 }
 
+// Opts returns the chunk.StorageOpt's for the config.
+func Opts() ([]StorageOpt, error) {
+	client := NewMockStorage()
+
+	return []StorageOpt{{
+		From:   model.Time(0),
+		Client: client,
+	}}, nil
+}
+
 // NewMockStorage creates a new MockStorage.
 func NewMockStorage() *MockStorage {
 	return &MockStorage{
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 35b6bb0c4690b..36e46bfaf0cf8 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -3,6 +3,7 @@ package storage
 import (
 	"time"
 
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
@@ -16,7 +17,7 @@ type fixture struct {
 func (f fixture) Name() string { return "caching-store" }
 func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
 	storageClient, tableClient, schemaConfig, err := f.fixture.Clients()
-	client := newCachingStorageClient(storageClient, 500, 5*time.Minute)
+	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", 500, 5*time.Minute), 5*time.Minute)
 	return client, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 5b58078b2af41..00e9ed922e4d1 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -3,35 +3,115 @@ package storage
 import (
 	"bytes"
 	"context"
+	"encoding/hex"
+	"hash/fnv"
 	"strings"
 	"time"
 
+	proto "github.com/golang/protobuf/proto"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
+var (
+	cacheCorruptErrs = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "querier_index_cache_corruptions_total",
+		Help: "The number of cache corruptions for the index cache.",
+	})
+	cacheHits = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "querier_index_cache_hits_total",
+		Help: "The number of cache hits for the index cache.",
+	})
+	cacheGets = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "querier_index_cache_gets_total",
+		Help: "The number of gets for the index cache.",
+	})
+	cachePuts = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "querier_index_cache_puts_total",
+		Help: "The number of puts for the index cache.",
+	})
+	cacheEncodeErrs = promauto.NewCounter(prometheus.CounterOpts{
+		Name: "querier_index_cache_encode_errors_total",
+		Help: "The number of errors for the index cache while encoding the body.",
+	})
+)
+
+// IndexCache describes the cache for the Index.
+type IndexCache interface {
+	Store(ctx context.Context, key string, val ReadBatch)
+	Fetch(ctx context.Context, key string) (val ReadBatch, ok bool, err error)
+	Stop() error
+}
+
+type indexCache struct {
+	cache.Cache
+}
+
+func (c *indexCache) Store(ctx context.Context, key string, val ReadBatch) {
+	cachePuts.Inc()
+	out, err := proto.Marshal(&val)
+	if err != nil {
+		cacheEncodeErrs.Inc()
+		return
+	}
+
+	// We're doing the hashing to handle unicode and key len properly.
+	// Memcache fails for unicode keys and keys longer than 250 Bytes.
+	c.Cache.Store(ctx, hashKey(key), out)
+	return
+}
+
+func (c *indexCache) Fetch(ctx context.Context, key string) (ReadBatch, bool, error) {
+	cacheGets.Inc()
+
+	found, valBytes, _, err := c.Cache.Fetch(ctx, []string{hashKey(key)})
+	if len(found) != 1 || err != nil {
+		return ReadBatch{}, false, err
+	}
+
+	var rb ReadBatch
+	if err := proto.Unmarshal(valBytes[0], &rb); err != nil {
+		return rb, false, err
+	}
+
+	// Make sure the hash(key) is not a collision by looking at the key in the value.
+	if key == rb.Key && time.Now().Before(time.Unix(0, rb.Expiry)) {
+		cacheHits.Inc()
+		return rb, true, nil
+	}
+
+	return ReadBatch{}, false, nil
+}
+
 type cachingStorageClient struct {
 	chunk.StorageClient
-	cache    *cache.FifoCache
+	cache    IndexCache
 	validity time.Duration
 }
 
-func newCachingStorageClient(client chunk.StorageClient, size int, validity time.Duration) chunk.StorageClient {
-	if size == 0 {
+func newCachingStorageClient(client chunk.StorageClient, cache cache.Cache, validity time.Duration) chunk.StorageClient {
+	if cache == nil {
 		return client
 	}
 
 	return &cachingStorageClient{
 		StorageClient: client,
-		cache:         cache.NewFifoCache("index", size, validity),
+		cache:         &indexCache{cache},
+		validity:      validity,
 	}
 }
 
 func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	value, ok := s.cache.Get(ctx, queryKey(query))
-	if ok {
-		batches := value.([]chunk.ReadBatch)
-		filteredBatch := filterBatchByQuery(query, batches)
+	value, ok, err := s.cache.Fetch(ctx, queryKey(query))
+	if err != nil {
+		cacheCorruptErrs.Inc()
+	}
+
+	if ok && err == nil {
+		filteredBatch, _ := filterBatchByQuery(query, []chunk.ReadBatch{value})
 		callback(filteredBatch)
 
 		return nil
@@ -43,29 +123,30 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.Index
 		HashValue: query.HashValue,
 	} // Just reads the entire row and caches it.
 
-	err := s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
+	expiryTime := time.Now().Add(s.validity)
+	err = s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
 	if err != nil {
 		return err
 	}
 
-	filteredBatch := filterBatchByQuery(query, batches)
+	filteredBatch, totalBatches := filterBatchByQuery(query, batches)
 	callback(filteredBatch)
 
-	s.cache.Put(ctx, queryKey(query), batches)
+	totalBatches.Key = queryKey(query)
+	totalBatches.Expiry = expiryTime.UnixNano()
 
+	s.cache.Store(ctx, totalBatches.Key, totalBatches)
 	return nil
 }
 
-type readBatch []cell
+// Len implements chunk.ReadBatch.
+func (b ReadBatch) Len() int { return len(b.Entries) }
 
-func (b readBatch) Len() int                { return len(b) }
-func (b readBatch) RangeValue(i int) []byte { return b[i].column }
-func (b readBatch) Value(i int) []byte      { return b[i].value }
+// RangeValue implements chunk.ReadBatch.
+func (b ReadBatch) RangeValue(i int) []byte { return b.Entries[i].Column }
 
-type cell struct {
-	column []byte
-	value  []byte
-}
+// Value implements chunk.ReadBatch.
+func (b ReadBatch) Value(i int) []byte { return b.Entries[i].Value }
 
 func copyingCallback(readBatches *[]chunk.ReadBatch) func(chunk.ReadBatch) bool {
 	return func(result chunk.ReadBatch) bool {
@@ -79,7 +160,7 @@ func queryKey(q chunk.IndexQuery) string {
 	return q.TableName + sep + q.HashValue
 }
 
-func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readBatch {
+func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) (filteredBatch, totalBatch ReadBatch) {
 	filter := func([]byte, []byte) bool { return true }
 
 	if len(query.RangeValuePrefix) != 0 {
@@ -100,14 +181,25 @@ func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) readB
 		}
 	}
 
-	finalBatch := make(readBatch, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
+	filteredBatch.Entries = make([]*Entry, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
+	totalBatch.Entries = make([]*Entry, 0, len(batches))
 	for _, batch := range batches {
 		for i := 0; i < batch.Len(); i++ {
+			totalBatch.Entries = append(totalBatch.Entries, &Entry{Column: batch.RangeValue(i), Value: batch.Value(i)})
+
 			if filter(batch.RangeValue(i), batch.Value(i)) {
-				finalBatch = append(finalBatch, cell{column: batch.RangeValue(i), value: batch.Value(i)})
+				filteredBatch.Entries = append(filteredBatch.Entries, &Entry{Column: batch.RangeValue(i), Value: batch.Value(i)})
 			}
 		}
 	}
 
-	return finalBatch
+	return
+}
+
+func hashKey(key string) string {
+	hasher := fnv.New64a()
+	hasher.Write([]byte(key)) // This'll never error.
+
+	// Hex because memcache errors for the bytes produced by the hash.
+	return hex.EncodeToString(hasher.Sum(nil))
 }
diff --git a/storage/caching_storage_client.pb.go b/storage/caching_storage_client.pb.go
new file mode 100644
index 0000000000000..47c09c47d918e
--- /dev/null
+++ b/storage/caching_storage_client.pb.go
@@ -0,0 +1,714 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: caching_storage_client.proto
+
+/*
+	Package storage is a generated protocol buffer package.
+
+	It is generated from these files:
+		caching_storage_client.proto
+
+	It has these top-level messages:
+		Entry
+		ReadBatch
+*/
+package storage
+
+import proto "github.com/gogo/protobuf/proto"
+import fmt "fmt"
+import math "math"
+
+import bytes "bytes"
+
+import strings "strings"
+import reflect "reflect"
+
+import io "io"
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+
+type Entry struct {
+	Column []byte `protobuf:"bytes,1,opt,name=Column,json=column,proto3" json:"Column,omitempty"`
+	Value  []byte `protobuf:"bytes,2,opt,name=Value,json=value,proto3" json:"Value,omitempty"`
+}
+
+func (m *Entry) Reset()                    { *m = Entry{} }
+func (*Entry) ProtoMessage()               {}
+func (*Entry) Descriptor() ([]byte, []int) { return fileDescriptorCachingStorageClient, []int{0} }
+
+func (m *Entry) GetColumn() []byte {
+	if m != nil {
+		return m.Column
+	}
+	return nil
+}
+
+func (m *Entry) GetValue() []byte {
+	if m != nil {
+		return m.Value
+	}
+	return nil
+}
+
+type ReadBatch struct {
+	Entries []*Entry `protobuf:"bytes,1,rep,name=entries" json:"entries,omitempty"`
+	Key     string   `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+	// The time at which the key expires.
+	Expiry int64 `protobuf:"varint,3,opt,name=expiry,proto3" json:"expiry,omitempty"`
+}
+
+func (m *ReadBatch) Reset()                    { *m = ReadBatch{} }
+func (*ReadBatch) ProtoMessage()               {}
+func (*ReadBatch) Descriptor() ([]byte, []int) { return fileDescriptorCachingStorageClient, []int{1} }
+
+func (m *ReadBatch) GetEntries() []*Entry {
+	if m != nil {
+		return m.Entries
+	}
+	return nil
+}
+
+func (m *ReadBatch) GetKey() string {
+	if m != nil {
+		return m.Key
+	}
+	return ""
+}
+
+func (m *ReadBatch) GetExpiry() int64 {
+	if m != nil {
+		return m.Expiry
+	}
+	return 0
+}
+
+func init() {
+	proto.RegisterType((*Entry)(nil), "storage.Entry")
+	proto.RegisterType((*ReadBatch)(nil), "storage.ReadBatch")
+}
+func (this *Entry) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Entry)
+	if !ok {
+		that2, ok := that.(Entry)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !bytes.Equal(this.Column, that1.Column) {
+		return false
+	}
+	if !bytes.Equal(this.Value, that1.Value) {
+		return false
+	}
+	return true
+}
+func (this *ReadBatch) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ReadBatch)
+	if !ok {
+		that2, ok := that.(ReadBatch)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Entries) != len(that1.Entries) {
+		return false
+	}
+	for i := range this.Entries {
+		if !this.Entries[i].Equal(that1.Entries[i]) {
+			return false
+		}
+	}
+	if this.Key != that1.Key {
+		return false
+	}
+	if this.Expiry != that1.Expiry {
+		return false
+	}
+	return true
+}
+func (this *Entry) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&storage.Entry{")
+	s = append(s, "Column: "+fmt.Sprintf("%#v", this.Column)+",\n")
+	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ReadBatch) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 7)
+	s = append(s, "&storage.ReadBatch{")
+	if this.Entries != nil {
+		s = append(s, "Entries: "+fmt.Sprintf("%#v", this.Entries)+",\n")
+	}
+	s = append(s, "Key: "+fmt.Sprintf("%#v", this.Key)+",\n")
+	s = append(s, "Expiry: "+fmt.Sprintf("%#v", this.Expiry)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func valueToGoStringCachingStorageClient(v interface{}, typ string) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
+}
+func (m *Entry) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalTo(dAtA)
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
+	var i int
+	_ = i
+	var l int
+	_ = l
+	if len(m.Column) > 0 {
+		dAtA[i] = 0xa
+		i++
+		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Column)))
+		i += copy(dAtA[i:], m.Column)
+	}
+	if len(m.Value) > 0 {
+		dAtA[i] = 0x12
+		i++
+		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Value)))
+		i += copy(dAtA[i:], m.Value)
+	}
+	return i, nil
+}
+
+func (m *ReadBatch) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalTo(dAtA)
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ReadBatch) MarshalTo(dAtA []byte) (int, error) {
+	var i int
+	_ = i
+	var l int
+	_ = l
+	if len(m.Entries) > 0 {
+		for _, msg := range m.Entries {
+			dAtA[i] = 0xa
+			i++
+			i = encodeVarintCachingStorageClient(dAtA, i, uint64(msg.Size()))
+			n, err := msg.MarshalTo(dAtA[i:])
+			if err != nil {
+				return 0, err
+			}
+			i += n
+		}
+	}
+	if len(m.Key) > 0 {
+		dAtA[i] = 0x12
+		i++
+		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Key)))
+		i += copy(dAtA[i:], m.Key)
+	}
+	if m.Expiry != 0 {
+		dAtA[i] = 0x18
+		i++
+		i = encodeVarintCachingStorageClient(dAtA, i, uint64(m.Expiry))
+	}
+	return i, nil
+}
+
+func encodeVarintCachingStorageClient(dAtA []byte, offset int, v uint64) int {
+	for v >= 1<<7 {
+		dAtA[offset] = uint8(v&0x7f | 0x80)
+		v >>= 7
+		offset++
+	}
+	dAtA[offset] = uint8(v)
+	return offset + 1
+}
+func (m *Entry) Size() (n int) {
+	var l int
+	_ = l
+	l = len(m.Column)
+	if l > 0 {
+		n += 1 + l + sovCachingStorageClient(uint64(l))
+	}
+	l = len(m.Value)
+	if l > 0 {
+		n += 1 + l + sovCachingStorageClient(uint64(l))
+	}
+	return n
+}
+
+func (m *ReadBatch) Size() (n int) {
+	var l int
+	_ = l
+	if len(m.Entries) > 0 {
+		for _, e := range m.Entries {
+			l = e.Size()
+			n += 1 + l + sovCachingStorageClient(uint64(l))
+		}
+	}
+	l = len(m.Key)
+	if l > 0 {
+		n += 1 + l + sovCachingStorageClient(uint64(l))
+	}
+	if m.Expiry != 0 {
+		n += 1 + sovCachingStorageClient(uint64(m.Expiry))
+	}
+	return n
+}
+
+func sovCachingStorageClient(x uint64) (n int) {
+	for {
+		n++
+		x >>= 7
+		if x == 0 {
+			break
+		}
+	}
+	return n
+}
+func sozCachingStorageClient(x uint64) (n int) {
+	return sovCachingStorageClient(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (this *Entry) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Entry{`,
+		`Column:` + fmt.Sprintf("%v", this.Column) + `,`,
+		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ReadBatch) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&ReadBatch{`,
+		`Entries:` + strings.Replace(fmt.Sprintf("%v", this.Entries), "Entry", "Entry", 1) + `,`,
+		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
+		`Expiry:` + fmt.Sprintf("%v", this.Expiry) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func valueToStringCachingStorageClient(v interface{}) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("*%v", pv)
+}
+func (m *Entry) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowCachingStorageClient
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Entry: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Column", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Column = append(m.Column[:0], dAtA[iNdEx:postIndex]...)
+			if m.Column == nil {
+				m.Column = []byte{}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
+			if m.Value == nil {
+				m.Value = []byte{}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipCachingStorageClient(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ReadBatch) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowCachingStorageClient
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ReadBatch: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ReadBatch: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			postIndex := iNdEx + msglen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Entries = append(m.Entries, &Entry{})
+			if err := m.Entries[len(m.Entries)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= (uint64(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Key = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expiry", wireType)
+			}
+			m.Expiry = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Expiry |= (int64(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := skipCachingStorageClient(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthCachingStorageClient
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func skipCachingStorageClient(dAtA []byte) (n int, err error) {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return 0, ErrIntOverflowCachingStorageClient
+			}
+			if iNdEx >= l {
+				return 0, io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		wireType := int(wire & 0x7)
+		switch wireType {
+		case 0:
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				iNdEx++
+				if dAtA[iNdEx-1] < 0x80 {
+					break
+				}
+			}
+			return iNdEx, nil
+		case 1:
+			iNdEx += 8
+			return iNdEx, nil
+		case 2:
+			var length int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowCachingStorageClient
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				length |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			iNdEx += length
+			if length < 0 {
+				return 0, ErrInvalidLengthCachingStorageClient
+			}
+			return iNdEx, nil
+		case 3:
+			for {
+				var innerWire uint64
+				var start int = iNdEx
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return 0, ErrIntOverflowCachingStorageClient
+					}
+					if iNdEx >= l {
+						return 0, io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					innerWire |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				innerWireType := int(innerWire & 0x7)
+				if innerWireType == 4 {
+					break
+				}
+				next, err := skipCachingStorageClient(dAtA[start:])
+				if err != nil {
+					return 0, err
+				}
+				iNdEx = start + next
+			}
+			return iNdEx, nil
+		case 4:
+			return iNdEx, nil
+		case 5:
+			iNdEx += 4
+			return iNdEx, nil
+		default:
+			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+		}
+	}
+	panic("unreachable")
+}
+
+var (
+	ErrInvalidLengthCachingStorageClient = fmt.Errorf("proto: negative length found during unmarshaling")
+	ErrIntOverflowCachingStorageClient   = fmt.Errorf("proto: integer overflow")
+)
+
+func init() { proto.RegisterFile("caching_storage_client.proto", fileDescriptorCachingStorageClient) }
+
+var fileDescriptorCachingStorageClient = []byte{
+	// 229 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x92, 0x49, 0x4e, 0x4c, 0xce,
+	0xc8, 0xcc, 0x4b, 0x8f, 0x2f, 0x2e, 0xc9, 0x2f, 0x4a, 0x4c, 0x4f, 0x8d, 0x4f, 0xce, 0xc9, 0x4c,
+	0xcd, 0x2b, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x87, 0x8a, 0x2a, 0x99, 0x72, 0xb1,
+	0xba, 0xe6, 0x95, 0x14, 0x55, 0x0a, 0x89, 0x71, 0xb1, 0x39, 0xe7, 0xe7, 0x94, 0xe6, 0xe6, 0x49,
+	0x30, 0x2a, 0x30, 0x6a, 0xf0, 0x04, 0xb1, 0x25, 0x83, 0x79, 0x42, 0x22, 0x5c, 0xac, 0x61, 0x89,
+	0x39, 0xa5, 0xa9, 0x12, 0x4c, 0x60, 0x61, 0xd6, 0x32, 0x10, 0x47, 0x29, 0x9e, 0x8b, 0x33, 0x28,
+	0x35, 0x31, 0xc5, 0x29, 0xb1, 0x24, 0x39, 0x43, 0x48, 0x83, 0x8b, 0x3d, 0x35, 0xaf, 0xa4, 0x28,
+	0x33, 0xb5, 0x58, 0x82, 0x51, 0x81, 0x59, 0x83, 0xdb, 0x88, 0x4f, 0x0f, 0x6a, 0xbc, 0x1e, 0xd8,
+	0xec, 0x20, 0x98, 0xb4, 0x90, 0x00, 0x17, 0x73, 0x76, 0x6a, 0x25, 0xd8, 0x28, 0xce, 0x20, 0x10,
+	0x13, 0x64, 0x6d, 0x6a, 0x45, 0x41, 0x66, 0x51, 0xa5, 0x04, 0xb3, 0x02, 0xa3, 0x06, 0x73, 0x10,
+	0x94, 0xe7, 0xa4, 0x73, 0xe1, 0xa1, 0x1c, 0xc3, 0x8d, 0x87, 0x72, 0x0c, 0x1f, 0x1e, 0xca, 0x31,
+	0x36, 0x3c, 0x92, 0x63, 0x5c, 0xf1, 0x48, 0x8e, 0xf1, 0xc4, 0x23, 0x39, 0xc6, 0x0b, 0x8f, 0xe4,
+	0x18, 0x1f, 0x3c, 0x92, 0x63, 0x7c, 0xf1, 0x48, 0x8e, 0xe1, 0xc3, 0x23, 0x39, 0xc6, 0x09, 0x8f,
+	0xe5, 0x18, 0x92, 0xd8, 0xc0, 0xbe, 0x32, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, 0x00, 0x1b, 0x46,
+	0xe1, 0xf5, 0x00, 0x00, 0x00,
+}
diff --git a/storage/caching_storage_client.proto b/storage/caching_storage_client.proto
new file mode 100644
index 0000000000000..f10dbe3443719
--- /dev/null
+++ b/storage/caching_storage_client.proto
@@ -0,0 +1,15 @@
+syntax = "proto3";
+package storage;
+
+message Entry {
+    bytes Column = 1;
+    bytes Value = 2;
+}
+
+message ReadBatch {
+    repeated Entry entries = 1;
+    string key = 2;
+
+	// The time at which the key expires.
+    int64 expiry = 3;
+}
\ No newline at end of file
diff --git a/storage/factory.go b/storage/factory.go
index 43f341d49a6b8..8cf7c34facc40 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -9,9 +9,9 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
-	"github.com/prometheus/common/model"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/aws"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
 	"github.com/weaveworks/cortex/pkg/chunk/gcp"
 	"github.com/weaveworks/cortex/pkg/util"
@@ -26,6 +26,7 @@ type Config struct {
 
 	IndexCacheSize     int
 	IndexCacheValidity time.Duration
+	memcacheClient     cache.MemcachedClientConfig
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -37,36 +38,47 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable.")
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
+	cfg.memcacheClient.RegisterFlagsWithPrefix("index", f)
 }
 
 // Opts makes the storage clients based on the configuration.
 func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	opts := []chunk.StorageOpt{}
-	client, err := newStorageClient(cfg, schemaCfg)
+	var caches []cache.Cache
+	if cfg.IndexCacheSize > 0 {
+		fifocache := cache.MetricsInstrument("fifo-index", cache.NewFifoCache("index", cfg.IndexCacheSize, cfg.IndexCacheValidity))
+		caches = append(caches, fifocache)
+	}
+
+	if cfg.memcacheClient.Host != "" {
+		client := cache.NewMemcachedClient(cfg.memcacheClient)
+		memcache := cache.MetricsInstrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
+			Expiration: cfg.IndexCacheValidity,
+		}, client))
+		caches = append(caches, cache.NewBackground(cache.BackgroundConfig{
+			WriteBackGoroutines: 10,
+			WriteBackBuffer:     100,
+		}, memcache))
+	}
+
+	opts, err := newStorageOpts(cfg, schemaCfg)
 	if err != nil {
 		return nil, errors.Wrap(err, "error creating storage client")
 	}
 
-	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
-	if cfg.StorageClient == "gcp" && schemaCfg.BigtableColumnKeyFrom.IsSet() {
-		client, err = gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-		if err != nil {
-			return nil, errors.Wrap(err, "error creating storage client")
+	if len(caches) > 0 {
+		tieredCache := cache.Instrument("tiered-index", cache.NewTiered(caches))
+		for i := range opts {
+			opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.IndexCacheValidity)
 		}
-
-		opts = append(opts, chunk.StorageOpt{
-			From:   schemaCfg.BigtableColumnKeyFrom.Time,
-			Client: newCachingStorageClient(client, cfg.IndexCacheSize, cfg.IndexCacheValidity),
-		})
 	}
 
 	return opts, nil
 }
 
-func newStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (client chunk.StorageClient, err error) {
+func newStorageOpts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
 	switch cfg.StorageClient {
 	case "inmemory":
-		client, err = chunk.NewMockStorage(), nil
+		return chunk.Opts()
 	case "aws":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
@@ -75,17 +87,14 @@ func newStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (client chunk.St
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		client, err = aws.NewStorageClient(cfg.AWSStorageConfig, schemaCfg)
+		return aws.Opts(cfg.AWSStorageConfig, schemaCfg)
 	case "gcp":
-		client, err = gcp.NewStorageClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		return gcp.Opts(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
-		client, err = cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+		return cassandra.Opts(cfg.CassandraStorageConfig, schemaCfg)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
 	}
-
-	client = newCachingStorageClient(client, cfg.IndexCacheSize, cfg.IndexCacheValidity)
-	return
 }
 
 // NewTableClient makes a new table client based on the configuration.

From 018b375bf695670e7aee91b5af5de97c210e8159 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 10 Sep 2018 17:01:22 +0530
Subject: [PATCH 147/660] Batch memcache requests

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/cache_test.go                           |  16 ++-
 cache/memcached.go                            | 132 +++++++++++++++++-
 ...ached_test.go => memcached_client_test.go} |   0
 3 files changed, 141 insertions(+), 7 deletions(-)
 rename cache/{memcached_test.go => memcached_client_test.go} (100%)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index a22fcb56b17e2..c2869030d0461 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -35,7 +35,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			promChunk[0],
 			ts,
@@ -142,8 +142,18 @@ func testCache(t *testing.T, cache cache.Cache) {
 }
 
 func TestMemcache(t *testing.T) {
-	cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache())
-	testCache(t, cache)
+	t.Run("Unbatched", func(t *testing.T) {
+		cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache())
+		testCache(t, cache)
+	})
+
+	t.Run("Batched", func(t *testing.T) {
+		cache := cache.NewMemcached(cache.MemcachedConfig{
+			BatchSize:   10,
+			Parallelism: 3,
+		}, newMockMemcache())
+		testCache(t, cache)
+	})
 }
 
 func TestDiskcache(t *testing.T) {
diff --git a/cache/memcached.go b/cache/memcached.go
index 4e477123826ec..4e46c7e65aee7 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -3,11 +3,15 @@ package cache
 import (
 	"context"
 	"flag"
+	"sync"
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	opentracing "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	instr "github.com/weaveworks/common/instrument"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
@@ -27,17 +31,25 @@ func init() {
 // MemcachedConfig is config to make a Memcached
 type MemcachedConfig struct {
 	Expiration time.Duration
+
+	BatchSize   int
+	Parallelism int
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedConfig) RegisterFlags(f *flag.FlagSet) {
-	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long chunks stay in the memcache.")
+	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long keys stay in the memcache.")
+	f.IntVar(&cfg.BatchSize, "memcached.batchsize", 0, "How many keys to fetch in each batch.")
+	f.IntVar(&cfg.Parallelism, "memcached.parallelism", 100, "Maximum active requests to memcache.")
 }
 
 // Memcached type caches chunks in memcached
 type Memcached struct {
 	cfg      MemcachedConfig
 	memcache MemcachedClient
+
+	wg      sync.WaitGroup
+	inputCh chan *work
 }
 
 // NewMemcached makes a new Memcache
@@ -46,9 +58,46 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient) *Memcached {
 		cfg:      cfg,
 		memcache: client,
 	}
+
+	if cfg.BatchSize == 0 || cfg.Parallelism == 0 {
+		return c
+	}
+
+	c.inputCh = make(chan *work)
+	c.wg.Add(cfg.Parallelism)
+
+	for i := 0; i < cfg.Parallelism; i++ {
+		go func() {
+			for input := range c.inputCh {
+				res := &result{
+					batchID: input.batchID,
+				}
+				res.found, res.bufs, res.missed, res.err = c.fetch(input.ctx, input.keys)
+				input.resultCh <- res
+			}
+
+			c.wg.Done()
+		}()
+	}
+
 	return c
 }
 
+type work struct {
+	keys     []string
+	ctx      context.Context
+	resultCh chan<- *result
+	batchID  int // For ordering results.
+}
+
+type result struct {
+	found   []string
+	bufs    [][]byte
+	missed  []string
+	err     error
+	batchID int // For ordering results.
+}
+
 func memcacheStatusCode(err error) string {
 	// See https://godoc.org/github.com/bradfitz/gomemcache/memcache#pkg-variables
 	switch err {
@@ -63,14 +112,36 @@ func memcacheStatusCode(err error) string {
 	}
 }
 
-// Fetch gets keys from the cache.
+// Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+	err = instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
+		sp := opentracing.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("keys requested", len(keys)))
+		defer func() {
+			sp.LogFields(otlog.Int("keys found", len(found)), otlog.Int("keys missing", len(missed)))
+		}()
+
+		var err error
+		if c.cfg.BatchSize == 0 {
+			found, bufs, missed, err = c.fetch(ctx, keys)
+			return err
+		}
+
+		found, bufs, missed, err = c.fetchKeysBatched(ctx, keys)
+		return err
+	})
+
+	return
+}
+
+func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
 	var items map[string]*memcache.Item
-	err = instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+	err = UntracedCollectedRequest(ctx, "Memcache.GetMulti", instr.NewHistogramCollector(memcacheRequestDuration), memcacheStatusCode, func(_ context.Context) error {
 		var err error
 		items, err = c.memcache.GetMulti(keys)
 		return err
 	})
+
 	if err != nil {
 		return
 	}
@@ -86,6 +157,53 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 	return
 }
 
+func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+	resultsCh := make(chan *result)
+	batchSize := c.cfg.BatchSize
+
+	go func() {
+		for i, j := 0, 0; i < len(keys); i += batchSize {
+			batchKeys := keys[i:util.Min(i+batchSize, len(keys))]
+			c.inputCh <- &work{
+				keys:     batchKeys,
+				ctx:      ctx,
+				resultCh: resultsCh,
+				batchID:  j,
+			}
+			j++
+		}
+	}()
+
+	// Read all values from this channel to avoid blocking upstream.
+	numResults := len(keys) / batchSize
+	if len(keys)%batchSize != 0 {
+		numResults++
+	}
+
+	// We need to order found by the input keys order.
+	results := make([]*result, numResults)
+	for i := 0; i < numResults; i++ {
+		result := <-resultsCh
+		results[result.batchID] = result
+	}
+	close(resultsCh)
+
+	for _, result := range results {
+		// TODO(gouthamve): One call may fail while everything else succeeds. Put the
+		// failed call keys in missed then.
+		if result.err != nil {
+			err = result.err
+			continue
+		}
+
+		found = append(found, result.found...)
+		bufs = append(bufs, result.bufs...)
+		missed = append(missed, result.missed...)
+	}
+
+	return
+}
+
 // Store stores the key in the cache.
 func (c *Memcached) Store(ctx context.Context, key string, buf []byte) error {
 	return instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
@@ -99,6 +217,12 @@ func (c *Memcached) Store(ctx context.Context, key string, buf []byte) error {
 }
 
 // Stop does nothing.
-func (*Memcached) Stop() error {
+func (c *Memcached) Stop() error {
+	if c.inputCh == nil {
+		return nil
+	}
+
+	close(c.inputCh)
+	c.wg.Wait()
 	return nil
 }
diff --git a/cache/memcached_test.go b/cache/memcached_client_test.go
similarity index 100%
rename from cache/memcached_test.go
rename to cache/memcached_client_test.go

From f39dff635a4a8f87d7cc2dcc17427049388f1f3b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 11 Sep 2018 15:45:55 +0530
Subject: [PATCH 148/660] Add a simple test for just the cache.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/memcached_test.go | 59 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 cache/memcached_test.go

diff --git a/cache/memcached_test.go b/cache/memcached_test.go
new file mode 100644
index 0000000000000..fdafbdb0ebced
--- /dev/null
+++ b/cache/memcached_test.go
@@ -0,0 +1,59 @@
+package cache_test
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+)
+
+func TestMemcached(t *testing.T) {
+	t.Run("unbatched", func(t *testing.T) {
+		client := newMockMemcache()
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client)
+
+		testMemcache(t, memcache)
+	})
+
+	t.Run("batched", func(t *testing.T) {
+		client := newMockMemcache()
+		memcache := cache.NewMemcached(cache.MemcachedConfig{
+			BatchSize:   10,
+			Parallelism: 5,
+		}, client)
+
+		testMemcache(t, memcache)
+	})
+}
+
+func testMemcache(t *testing.T, memcache *cache.Memcached) {
+	numKeys := 1000
+
+	ctx := context.Background()
+	keys := make([]string, 0, numKeys)
+	// Insert 1000 keys skipping all multiples of 5.
+	for i := 0; i < numKeys; i++ {
+		keys = append(keys, string(i))
+		if i%5 == 0 {
+			continue
+		}
+
+		require.NoError(t, memcache.Store(ctx, string(i), []byte(string(i))))
+	}
+
+	found, bufs, missing, err := memcache.Fetch(ctx, keys)
+	require.NoError(t, err)
+	for i := 0; i < numKeys; i++ {
+		if i%5 == 0 {
+			require.Equal(t, string(i), missing[0])
+			missing = missing[1:]
+			continue
+		}
+
+		require.Equal(t, string(i), found[0])
+		require.Equal(t, string(i), string(bufs[0]))
+		found = found[1:]
+		bufs = bufs[1:]
+	}
+}

From 65281a1e9105f5d8df579987187dbb423b530993 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 3 Sep 2018 13:02:56 +0100
Subject: [PATCH 149/660] Revert "Revert "Merge pull request #971 from
 grafana/batch-index-lookups""

This reverts commit 8b74f9053037c4590ec1512b65288e4869d5e748.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go             |  32 +++++--
 cassandra/storage_client.go       |  28 +++---
 chunk_store.go                    |  45 ++-------
 gcp/storage_client.go             | 149 ++++++++++++++++++++++++------
 inmemory_storage_client.go        |  47 +++++++---
 storage/caching_storage_client.go | 127 ++++++++-----------------
 storage/index_test.go             |  22 +++--
 storage_client.go                 |   8 +-
 util/util.go                      |  77 +++++++++++++++
 9 files changed, 329 insertions(+), 206 deletions(-)
 create mode 100644 util/util.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 9353280ead53b..b8d5f7b3f92c1 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -30,6 +30,7 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
@@ -301,7 +302,11 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (a storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
+}
+
+func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -371,7 +376,7 @@ func (a storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, c
 	return nil
 }
 
-func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (dynamoDBReadResponse, error) {
+func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -401,7 +406,10 @@ func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput
 		}
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		return dynamoDBReadResponse(queryOutput.Items), nil
+		return &dynamoDBReadResponse{
+			i:     -1,
+			items: queryOutput.Items,
+		}, nil
 	}
 	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
 }
@@ -785,18 +793,22 @@ func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) e
 }
 
 // Slice of values returned; map key is attribute name
-type dynamoDBReadResponse []map[string]*dynamodb.AttributeValue
+type dynamoDBReadResponse struct {
+	i     int
+	items []map[string]*dynamodb.AttributeValue
+}
 
-func (b dynamoDBReadResponse) Len() int {
-	return len(b)
+func (b *dynamoDBReadResponse) Next() bool {
+	b.i++
+	return b.i < len(b.items)
 }
 
-func (b dynamoDBReadResponse) RangeValue(i int) []byte {
-	return b[i][rangeKey].B
+func (b *dynamoDBReadResponse) RangeValue() []byte {
+	return b.items[b.i][rangeKey].B
 }
 
-func (b dynamoDBReadResponse) Value(i int) []byte {
-	chunkValue, ok := b[i][valueKey]
+func (b *dynamoDBReadResponse) Value() []byte {
+	chunkValue, ok := b.items[b.i][valueKey]
 	if !ok {
 		return nil
 	}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 88d443db8bf55..13078b2e227a6 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -12,6 +12,7 @@ import (
 	"github.com/prometheus/common/model"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 const (
@@ -185,7 +186,11 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	var q *gocql.Query
 
 	switch {
@@ -218,7 +223,7 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 	defer iter.Close()
 	scanner := iter.Scanner()
 	for scanner.Next() {
-		var b readBatch
+		b := &readBatch{}
 		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
 			return errors.WithStack(err)
 		}
@@ -231,27 +236,26 @@ func (s *storageClient) QueryPages(ctx context.Context, query chunk.IndexQuery,
 
 // readBatch represents a batch of rows read from Cassandra.
 type readBatch struct {
+	consumed   bool
 	rangeValue []byte
 	value      []byte
 }
 
 // Len implements chunk.ReadBatch; in Cassandra we 'stream' results back
 // one-by-one, so this always returns 1.
-func (readBatch) Len() int {
-	return 1
+func (b *readBatch) Next() bool {
+	if b.consumed {
+		return false
+	}
+	b.consumed = true
+	return true
 }
 
-func (b readBatch) RangeValue(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
+func (b *readBatch) RangeValue() []byte {
 	return b.rangeValue
 }
 
-func (b readBatch) Value(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
+func (b *readBatch) Value() []byte {
 	return b.value
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index c8abb23441c12..f811a9cf7b8b8 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -347,53 +347,22 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
-	incomingEntries := make(chan []IndexEntry)
-	incomingErrors := make(chan error)
-	for _, query := range queries {
-		go func(query IndexQuery) {
-			entries, err := c.lookupEntriesByQuery(ctx, query)
-			if err != nil {
-				incomingErrors <- err
-			} else {
-				incomingEntries <- entries
-			}
-		}(query)
-	}
-
-	// Combine the results into one slice
-	var entries []IndexEntry
-	var lastErr error
-	for i := 0; i < len(queries); i++ {
-		select {
-		case incoming := <-incomingEntries:
-			entries = append(entries, incoming...)
-		case err := <-incomingErrors:
-			lastErr = err
-		}
-	}
-
-	return entries, lastErr
-}
-
-func (c *store) lookupEntriesByQuery(ctx context.Context, query IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
-
-	if err := c.storage.QueryPages(ctx, query, func(resp ReadBatch) (shouldContinue bool) {
-		for i := 0; i < resp.Len(); i++ {
+	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
+		for resp.Next() {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
 				HashValue:  query.HashValue,
-				RangeValue: resp.RangeValue(i),
-				Value:      resp.Value(i),
+				RangeValue: resp.RangeValue(),
+				Value:      resp.Value(),
 			})
 		}
 		return true
-	}); err != nil {
+	})
+	if err != nil {
 		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
-		return nil, err
 	}
-
-	return entries, nil
+	return entries, err
 }
 
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 9135236acc752..ec0597bc22a96 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -14,11 +14,13 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/weaveworks/cortex/pkg/chunk"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
 	columnFamily = "f"
+	columnPrefix = columnFamily + ":"
 	column       = "c"
 	separator    = "\000"
 	maxRowReads  = 100
@@ -187,7 +189,88 @@ func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.Wri
 	return nil
 }
 
-func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages")
+	defer sp.Finish()
+
+	// A limitation of this approach is that this only fetches whole rows; but
+	// whatever, we filter them in the cache on the client.  But for unit tests to
+	// pass, we must do this.
+	callback = chunk_util.QueryFilter(callback)
+
+	type tableQuery struct {
+		name    string
+		queries map[string]chunk.IndexQuery
+		rows    bigtable.RowList
+	}
+
+	tableQueries := map[string]tableQuery{}
+	for _, query := range queries {
+		tq, ok := tableQueries[query.TableName]
+		if !ok {
+			tq = tableQuery{
+				name:    query.TableName,
+				queries: map[string]chunk.IndexQuery{},
+			}
+		}
+		tq.queries[query.HashValue] = query
+		tq.rows = append(tq.rows, query.HashValue)
+		tableQueries[query.TableName] = tq
+	}
+
+	errs := make(chan error)
+
+	for _, tq := range tableQueries {
+
+		table := s.client.Open(tq.name)
+		for i := 0; i < len(tq.rows); i += maxRowReads {
+
+			page := tq.rows[i:util.Min(i+maxRowReads, len(tq.rows))]
+			go func(page bigtable.RowList, tq tableQuery) {
+				var processingErr error
+				// rows are returned in key order, not order in row list
+				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
+
+					query, ok := tq.queries[row.Key()]
+					if !ok {
+						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
+						return false
+					}
+
+					val, ok := row[columnFamily]
+					if !ok {
+						// There are no matching rows.
+						return true
+					}
+
+					return callback(query, &bigtableReadBatchColumnKey{
+						i:     -1,
+						items: val,
+					})
+				})
+
+				if processingErr != nil {
+					errs <- processingErr
+				} else {
+					errs <- err
+				}
+			}(page, tq)
+		}
+	}
+
+	var lastErr error
+	for _, tq := range tableQueries {
+		for i := 0; i < len(tq.rows); i += maxRowReads {
+			err := <-errs
+			if err != nil {
+				lastErr = err
+			}
+		}
+	}
+	return lastErr
+}
+
+func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -227,31 +310,30 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, query chunk.Ind
 
 		val = filteredItems
 	}
-	callback(bigtableReadBatchColumnKey{
-		items:        val,
-		columnPrefix: columnFamily + ":",
+	callback(&bigtableReadBatchColumnKey{
+		i:     -1,
+		items: val,
 	})
 	return nil
 }
 
 // bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
 type bigtableReadBatchColumnKey struct {
-	items        []bigtable.ReadItem
-	columnPrefix string
+	i     int
+	items []bigtable.ReadItem
 }
 
-func (b bigtableReadBatchColumnKey) Len() int {
-	return len(b.items)
+func (b *bigtableReadBatchColumnKey) Next() bool {
+	b.i++
+	return b.i < len(b.items)
 }
 
-func (b bigtableReadBatchColumnKey) RangeValue(index int) []byte {
-	return []byte(
-		strings.TrimPrefix(b.items[index].Column, b.columnPrefix),
-	)
+func (b *bigtableReadBatchColumnKey) RangeValue() []byte {
+	return []byte(strings.TrimPrefix(b.items[b.i].Column, columnPrefix))
 }
 
-func (b bigtableReadBatchColumnKey) Value(index int) []byte {
-	return b.items[index].Value
+func (b *bigtableReadBatchColumnKey) Value() []byte {
+	return b.items[b.i].Value
 }
 
 func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
@@ -368,7 +450,11 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 	return output, nil
 }
 
-func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -398,7 +484,9 @@ func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(bigtableReadBatchV1(r))
+			return callback(&bigtableReadBatchV1{
+				row: r,
+			})
 		}
 
 		return true
@@ -413,24 +501,27 @@ func (s *storageClientV1) QueryPages(ctx context.Context, query chunk.IndexQuery
 // bigtableReadBatchV1 represents a batch of rows read from Bigtable.  As the
 // bigtable interface gives us rows one-by-one, a batch always only contains
 // a single row.
-type bigtableReadBatchV1 bigtable.Row
-
-func (bigtableReadBatchV1) Len() int {
-	return 1
+type bigtableReadBatchV1 struct {
+	consumed bool
+	row      bigtable.Row
 }
-func (b bigtableReadBatchV1) RangeValue(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
+
+func (b *bigtableReadBatchV1) Next() bool {
+	if b.consumed {
+		return false
 	}
+	b.consumed = true
+	return true
+}
+
+func (b *bigtableReadBatchV1) RangeValue() []byte {
 	// String before the first separator is the hashkey
-	parts := strings.SplitN(bigtable.Row(b).Key(), separator, 2)
+	parts := strings.SplitN(b.row.Key(), separator, 2)
 	return []byte(parts[1])
 }
-func (b bigtableReadBatchV1) Value(index int) []byte {
-	if index != 0 {
-		panic("index != 0")
-	}
-	cf, ok := b[columnFamily]
+
+func (b *bigtableReadBatchV1) Value() []byte {
+	cf, ok := b.row[columnFamily]
 	if !ok || len(cf) != 1 {
 		panic("bad response from bigtable")
 	}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 0add546b2a4b0..a47c32601a9cd 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -169,13 +169,26 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 }
 
 // QueryPages implements StorageClient.
-func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error {
-	logger := util.WithContext(ctx, util.Logger)
-	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
-
+func (m *MockStorage) QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	for _, query := range queries {
+		err := m.query(ctx, query, func(b ReadBatch) bool {
+			return callback(query, b)
+		})
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func(ReadBatch) (shouldContinue bool)) error {
+	logger := util.WithContext(ctx, util.Logger)
+	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
+
 	table, ok := m.tables[query.TableName]
 	if !ok {
 		return fmt.Errorf("table not found")
@@ -241,12 +254,14 @@ func (m *MockStorage) QueryPages(ctx context.Context, query IndexQuery, callback
 		items = filtered
 	}
 
-	result := mockReadBatch{}
+	result := mockReadBatch{
+		index: -1,
+	}
 	for _, item := range items {
-		result = append(result, item)
+		result.items = append(result.items, item)
 	}
 
-	callback(result)
+	callback(&result)
 	return nil
 }
 
@@ -300,16 +315,20 @@ func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, val
 	}{tableName, hashValue, rangeValue, value})
 }
 
-type mockReadBatch []mockItem
+type mockReadBatch struct {
+	index int
+	items []mockItem
+}
 
-func (b mockReadBatch) Len() int {
-	return len(b)
+func (b *mockReadBatch) Next() bool {
+	b.index++
+	return b.index < len(b.items)
 }
 
-func (b mockReadBatch) RangeValue(i int) []byte {
-	return b[i].rangeValue
+func (b *mockReadBatch) RangeValue() []byte {
+	return b.items[b.index].rangeValue
 }
 
-func (b mockReadBatch) Value(i int) []byte {
-	return b[i].value
+func (b *mockReadBatch) Value() []byte {
+	return b.items[b.index].value
 }
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 00e9ed922e4d1..c7f7ecb3b3754 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -1,11 +1,8 @@
 package storage
 
 import (
-	"bytes"
 	"context"
-	"encoding/hex"
-	"hash/fnv"
-	"strings"
+	"sync"
 	"time"
 
 	proto "github.com/golang/protobuf/proto"
@@ -14,6 +11,7 @@ import (
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
+	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
 )
 
 var (
@@ -104,102 +102,53 @@ func newCachingStorageClient(client chunk.StorageClient, cache cache.Cache, vali
 	}
 }
 
-func (s *cachingStorageClient) QueryPages(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	value, ok, err := s.cache.Fetch(ctx, queryKey(query))
-	if err != nil {
-		cacheCorruptErrs.Inc()
-	}
-
-	if ok && err == nil {
-		filteredBatch, _ := filterBatchByQuery(query, []chunk.ReadBatch{value})
-		callback(filteredBatch)
+func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	// We cache the entire row, so filter client side.
+	callback = chunk_util.QueryFilter(callback)
+	cacheableMissed := []chunk.IndexQuery{}
+	missed := map[string]chunk.IndexQuery{}
+
+	for _, query := range queries {
+		value, ok := s.cache.Get(ctx, queryKey(query))
+		if !ok {
+			cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
+				TableName: query.TableName,
+				HashValue: query.HashValue,
+			})
+			missed[queryKey(query)] = query
+			continue
+		}
 
-		return nil
+		for _, batch := range value.([]chunk.ReadBatch) {
+			callback(query, batch)
+		}
 	}
 
-	batches := []chunk.ReadBatch{}
-	cacheableQuery := chunk.IndexQuery{
-		TableName: query.TableName,
-		HashValue: query.HashValue,
-	} // Just reads the entire row and caches it.
-
-	expiryTime := time.Now().Add(s.validity)
-	err = s.StorageClient.QueryPages(ctx, cacheableQuery, copyingCallback(&batches))
+	var resultsMtx sync.Mutex
+	results := map[string][]chunk.ReadBatch{}
+	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
+		resultsMtx.Lock()
+		defer resultsMtx.Unlock()
+		key := queryKey(cacheableQuery)
+		results[key] = append(results[key], r)
+		return true
+	})
 	if err != nil {
 		return err
 	}
 
-	filteredBatch, totalBatches := filterBatchByQuery(query, batches)
-	callback(filteredBatch)
-
-	totalBatches.Key = queryKey(query)
-	totalBatches.Expiry = expiryTime.UnixNano()
-
-	s.cache.Store(ctx, totalBatches.Key, totalBatches)
-	return nil
-}
-
-// Len implements chunk.ReadBatch.
-func (b ReadBatch) Len() int { return len(b.Entries) }
-
-// RangeValue implements chunk.ReadBatch.
-func (b ReadBatch) RangeValue(i int) []byte { return b.Entries[i].Column }
-
-// Value implements chunk.ReadBatch.
-func (b ReadBatch) Value(i int) []byte { return b.Entries[i].Value }
-
-func copyingCallback(readBatches *[]chunk.ReadBatch) func(chunk.ReadBatch) bool {
-	return func(result chunk.ReadBatch) bool {
-		*readBatches = append(*readBatches, result)
-		return true
+	resultsMtx.Lock()
+	defer resultsMtx.Unlock()
+	for key, batches := range results {
+		query := missed[key]
+		for _, batch := range batches {
+			callback(query, batch)
+		}
 	}
+	return nil
 }
 
 func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
-
-func filterBatchByQuery(query chunk.IndexQuery, batches []chunk.ReadBatch) (filteredBatch, totalBatch ReadBatch) {
-	filter := func([]byte, []byte) bool { return true }
-
-	if len(query.RangeValuePrefix) != 0 {
-		filter = func(rangeValue []byte, value []byte) bool {
-			return strings.HasPrefix(string(rangeValue), string(query.RangeValuePrefix))
-		}
-	}
-	if len(query.RangeValueStart) != 0 {
-		filter = func(rangeValue []byte, value []byte) bool {
-			return string(rangeValue) >= string(query.RangeValueStart)
-		}
-	}
-	if len(query.ValueEqual) != 0 {
-		// This is on top of the existing filters.
-		existingFilter := filter
-		filter = func(rangeValue []byte, value []byte) bool {
-			return existingFilter(rangeValue, value) && bytes.Equal(value, query.ValueEqual)
-		}
-	}
-
-	filteredBatch.Entries = make([]*Entry, 0, len(batches)) // On the higher side for most queries. On the lower side for column key schema.
-	totalBatch.Entries = make([]*Entry, 0, len(batches))
-	for _, batch := range batches {
-		for i := 0; i < batch.Len(); i++ {
-			totalBatch.Entries = append(totalBatch.Entries, &Entry{Column: batch.RangeValue(i), Value: batch.Value(i)})
-
-			if filter(batch.RangeValue(i), batch.Value(i)) {
-				filteredBatch.Entries = append(filteredBatch.Entries, &Entry{Column: batch.RangeValue(i), Value: batch.Value(i)})
-			}
-		}
-	}
-
-	return
-}
-
-func hashKey(key string) string {
-	hasher := fnv.New64a()
-	hasher.Write([]byte(key)) // This'll never error.
-
-	// Hex because memcache errors for the bytes produced by the hash.
-	return hex.EncodeToString(hasher.Sum(nil))
-}
diff --git a/storage/index_test.go b/storage/index_test.go
index 91d8532bb96b4..260d0fb563256 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -21,15 +21,17 @@ func TestIndexBasic(t *testing.T) {
 
 		// Make sure we get back the correct entries by hash value.
 		for i := 0; i < 30; i++ {
-			entry := chunk.IndexQuery{
-				TableName: tableName,
-				HashValue: fmt.Sprintf("hash%d", i),
+			entries := []chunk.IndexQuery{
+				{
+					TableName: tableName,
+					HashValue: fmt.Sprintf("hash%d", i),
+				},
 			}
 			var have []chunk.IndexEntry
-			err := client.QueryPages(context.Background(), entry, func(read chunk.ReadBatch) bool {
-				for j := 0; j < read.Len(); j++ {
+			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+				for read.Next() {
 					have = append(have, chunk.IndexEntry{
-						RangeValue: read.RangeValue(j),
+						RangeValue: read.RangeValue(),
 					})
 				}
 				return true
@@ -167,13 +169,13 @@ func TestQueryPages(t *testing.T) {
 				run := true
 				for run {
 					var have []chunk.IndexEntry
-					err = client.QueryPages(context.Background(), tt.query, func(read chunk.ReadBatch) bool {
-						for i := 0; i < read.Len(); i++ {
+					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+						for read.Next() {
 							have = append(have, chunk.IndexEntry{
 								TableName:  tt.query.TableName,
 								HashValue:  tt.query.HashValue,
-								RangeValue: read.RangeValue(i),
-								Value:      read.Value(i),
+								RangeValue: read.RangeValue(),
+								Value:      read.Value(),
 							})
 						}
 						return true
diff --git a/storage_client.go b/storage_client.go
index c86f573b77bca..d867def5ab31d 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -9,7 +9,7 @@ type StorageClient interface {
 	BatchWrite(context.Context, WriteBatch) error
 
 	// For the read path.
-	QueryPages(ctx context.Context, query IndexQuery, callback func(result ReadBatch) (shouldContinue bool)) error
+	QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error
 
 	// For storing and retrieving chunks.
 	PutChunks(ctx context.Context, chunks []Chunk) error
@@ -23,7 +23,7 @@ type WriteBatch interface {
 
 // ReadBatch represents the results of a QueryPages.
 type ReadBatch interface {
-	Len() int
-	RangeValue(index int) []byte
-	Value(index int) []byte
+	Next() bool
+	RangeValue() []byte
+	Value() []byte
 }
diff --git a/util/util.go b/util/util.go
new file mode 100644
index 0000000000000..6a6c412d30318
--- /dev/null
+++ b/util/util.go
@@ -0,0 +1,77 @@
+package util
+
+import (
+	"bytes"
+	"context"
+	"strings"
+
+	"github.com/weaveworks/cortex/pkg/chunk"
+)
+
+// DoSingleQuery is the interface for indexes that don't support batching yet.
+type DoSingleQuery func(
+	ctx context.Context, query chunk.IndexQuery,
+	callback func(chunk.ReadBatch) bool,
+) error
+
+// DoParallelQueries translates between our interface for query batching,
+// and indexes that don't yet support batching.
+func DoParallelQueries(
+	ctx context.Context, doSingleQuery DoSingleQuery, queries []chunk.IndexQuery,
+	callback func(chunk.IndexQuery, chunk.ReadBatch) bool,
+) error {
+	incomingErrors := make(chan error)
+	for _, query := range queries {
+		go func(query chunk.IndexQuery) {
+			incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
+				return callback(query, r)
+			})
+		}(query)
+	}
+	var lastErr error
+	for i := 0; i < len(queries); i++ {
+		err := <-incomingErrors
+		if err != nil {
+
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+// Callback from an IndexQuery.
+type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
+
+type filteringBatch struct {
+	query chunk.IndexQuery
+	chunk.ReadBatch
+}
+
+func (f *filteringBatch) Next() bool {
+	for f.ReadBatch.Next() {
+		rangeValue, value := f.ReadBatch.RangeValue(), f.ReadBatch.Value()
+
+		if len(f.query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(f.query.RangeValuePrefix)) {
+			continue
+		}
+		if len(f.query.RangeValueStart) != 0 && string(rangeValue) < string(f.query.RangeValueStart) {
+			continue
+		}
+		if len(f.query.ValueEqual) != 0 && !bytes.Equal(value, f.query.ValueEqual) {
+			continue
+		}
+
+		return true
+	}
+
+	return false
+}
+
+// QueryFilter wraps a callback to ensure the results are filtered correctly;
+// useful for the cache and BigTable backend, which only ever fetches the whole
+// row.
+func QueryFilter(callback Callback) Callback {
+	return func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		return callback(query, &filteringBatch{query, batch})
+	}
+}

From d1469ad850791e753e6d51d9f8fe2a843f92def3 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 3 Sep 2018 13:02:56 +0100
Subject: [PATCH 150/660] Fix index caching with batch index accessors.

- Use iterators on the batches, so the batches themselves aren't mutated and can be cached.
- Actually write back to the cache in the index caching!
- Add a test that we've written back to the cache.
- Use bytes.Compare, bytes.Equal etc in the filteringBatchIter to reduce copies.

This reverts commit 8b74f9053037c4590ec1512b65288e4869d5e748.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go                  | 20 ++++--
 cassandra/storage_client.go            | 19 ++++--
 chunk_store.go                         |  7 +-
 gcp/storage_client.go                  | 61 ++++++++++++------
 inmemory_storage_client.go             | 23 +++++--
 storage/caching_storage_client.go      | 88 +++++++++++++++++++++-----
 storage/caching_storage_client_test.go | 72 +++++++++++++++++++++
 storage/index_test.go                  | 12 ++--
 storage_client.go                      |  5 ++
 util/util.go                           | 23 +++++--
 10 files changed, 262 insertions(+), 68 deletions(-)
 create mode 100644 storage/caching_storage_client_test.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index b8d5f7b3f92c1..e47b9a666f693 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -407,7 +407,6 @@ func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput
 
 		queryOutput := page.Data().(*dynamodb.QueryOutput)
 		return &dynamoDBReadResponse{
-			i:     -1,
 			items: queryOutput.Items,
 		}, nil
 	}
@@ -794,20 +793,31 @@ func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) e
 
 // Slice of values returned; map key is attribute name
 type dynamoDBReadResponse struct {
-	i     int
 	items []map[string]*dynamodb.AttributeValue
 }
 
-func (b *dynamoDBReadResponse) Next() bool {
+func (b *dynamoDBReadResponse) Iterator() chunk.ReadBatchIterator {
+	return &dynamoDBReadResponseIterator{
+		i:                    -1,
+		dynamoDBReadResponse: b,
+	}
+}
+
+type dynamoDBReadResponseIterator struct {
+	i int
+	*dynamoDBReadResponse
+}
+
+func (b *dynamoDBReadResponseIterator) Next() bool {
 	b.i++
 	return b.i < len(b.items)
 }
 
-func (b *dynamoDBReadResponse) RangeValue() []byte {
+func (b *dynamoDBReadResponseIterator) RangeValue() []byte {
 	return b.items[b.i][rangeKey].B
 }
 
-func (b *dynamoDBReadResponse) Value() []byte {
+func (b *dynamoDBReadResponseIterator) Value() []byte {
 	chunkValue, ok := b.items[b.i][valueKey]
 	if !ok {
 		return nil
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 13078b2e227a6..b0a6922770bce 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -241,9 +241,18 @@ type readBatch struct {
 	value      []byte
 }
 
-// Len implements chunk.ReadBatch; in Cassandra we 'stream' results back
-// one-by-one, so this always returns 1.
-func (b *readBatch) Next() bool {
+func (r *readBatch) Iterator() chunk.ReadBatchIterator {
+	return &readBatchIter{
+		readBatch: r,
+	}
+}
+
+type readBatchIter struct {
+	consumed bool
+	*readBatch
+}
+
+func (b *readBatchIter) Next() bool {
 	if b.consumed {
 		return false
 	}
@@ -251,11 +260,11 @@ func (b *readBatch) Next() bool {
 	return true
 }
 
-func (b *readBatch) RangeValue() []byte {
+func (b *readBatchIter) RangeValue() []byte {
 	return b.rangeValue
 }
 
-func (b *readBatch) Value() []byte {
+func (b *readBatchIter) Value() []byte {
 	return b.value
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index f811a9cf7b8b8..a0d0819e3f454 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -349,12 +349,13 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	var entries []IndexEntry
 	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
-		for resp.Next() {
+		iter := resp.Iterator()
+		for iter.Next() {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
 				HashValue:  query.HashValue,
-				RangeValue: resp.RangeValue(),
-				Value:      resp.Value(),
+				RangeValue: iter.RangeValue(),
+				Value:      iter.Value(),
 			})
 		}
 		return true
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index ec0597bc22a96..13728294ee4a7 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -243,8 +243,7 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 						return true
 					}
 
-					return callback(query, &bigtableReadBatchColumnKey{
-						i:     -1,
+					return callback(query, &columnKeyBatch{
 						items: val,
 					})
 				})
@@ -310,30 +309,40 @@ func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQue
 
 		val = filteredItems
 	}
-	callback(&bigtableReadBatchColumnKey{
-		i:     -1,
+	callback(&columnKeyBatch{
 		items: val,
 	})
 	return nil
 }
 
-// bigtableReadBatchColumnKey represents a batch of values read from Bigtable.
-type bigtableReadBatchColumnKey struct {
-	i     int
+// columnKeyBatch represents a batch of values read from Bigtable.
+type columnKeyBatch struct {
 	items []bigtable.ReadItem
 }
 
-func (b *bigtableReadBatchColumnKey) Next() bool {
-	b.i++
-	return b.i < len(b.items)
+func (c *columnKeyBatch) Iterator() chunk.ReadBatchIterator {
+	return &columnKeyIterator{
+		i:              -1,
+		columnKeyBatch: c,
+	}
+}
+
+type columnKeyIterator struct {
+	i int
+	*columnKeyBatch
+}
+
+func (c *columnKeyIterator) Next() bool {
+	c.i++
+	return c.i < len(c.items)
 }
 
-func (b *bigtableReadBatchColumnKey) RangeValue() []byte {
-	return []byte(strings.TrimPrefix(b.items[b.i].Column, columnPrefix))
+func (c *columnKeyIterator) RangeValue() []byte {
+	return []byte(strings.TrimPrefix(c.items[c.i].Column, columnPrefix))
 }
 
-func (b *bigtableReadBatchColumnKey) Value() []byte {
-	return b.items[b.i].Value
+func (c *columnKeyIterator) Value() []byte {
+	return c.items[c.i].Value
 }
 
 func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
@@ -484,7 +493,7 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(&bigtableReadBatchV1{
+			return callback(&rowBatch{
 				row: r,
 			})
 		}
@@ -498,15 +507,25 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 	return nil
 }
 
-// bigtableReadBatchV1 represents a batch of rows read from Bigtable.  As the
+// rowBatch represents a batch of rows read from Bigtable.  As the
 // bigtable interface gives us rows one-by-one, a batch always only contains
 // a single row.
-type bigtableReadBatchV1 struct {
+type rowBatch struct {
+	row bigtable.Row
+}
+
+func (b *rowBatch) Iterator() chunk.ReadBatchIterator {
+	return &rowBatchIterator{
+		rowBatch: b,
+	}
+}
+
+type rowBatchIterator struct {
 	consumed bool
-	row      bigtable.Row
+	*rowBatch
 }
 
-func (b *bigtableReadBatchV1) Next() bool {
+func (b *rowBatchIterator) Next() bool {
 	if b.consumed {
 		return false
 	}
@@ -514,13 +533,13 @@ func (b *bigtableReadBatchV1) Next() bool {
 	return true
 }
 
-func (b *bigtableReadBatchV1) RangeValue() []byte {
+func (b *rowBatchIterator) RangeValue() []byte {
 	// String before the first separator is the hashkey
 	parts := strings.SplitN(b.row.Key(), separator, 2)
 	return []byte(parts[1])
 }
 
-func (b *bigtableReadBatchV1) Value() []byte {
+func (b *rowBatchIterator) Value() []byte {
 	cf, ok := b.row[columnFamily]
 	if !ok || len(cf) != 1 {
 		panic("bad response from bigtable")
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index a47c32601a9cd..4978dede575cd 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -254,9 +254,7 @@ func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func
 		items = filtered
 	}
 
-	result := mockReadBatch{
-		index: -1,
-	}
+	result := mockReadBatch{}
 	for _, item := range items {
 		result.items = append(result.items, item)
 	}
@@ -316,19 +314,30 @@ func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, val
 }
 
 type mockReadBatch struct {
-	index int
 	items []mockItem
 }
 
-func (b *mockReadBatch) Next() bool {
+func (b *mockReadBatch) Iterator() ReadBatchIterator {
+	return &mockReadBatchIter{
+		index:         -1,
+		mockReadBatch: b,
+	}
+}
+
+type mockReadBatchIter struct {
+	index int
+	*mockReadBatch
+}
+
+func (b *mockReadBatchIter) Next() bool {
 	b.index++
 	return b.index < len(b.items)
 }
 
-func (b *mockReadBatch) RangeValue() []byte {
+func (b *mockReadBatchIter) RangeValue() []byte {
 	return b.items[b.index].rangeValue
 }
 
-func (b *mockReadBatch) Value() []byte {
+func (b *mockReadBatchIter) Value() []byte {
 	return b.items[b.index].value
 }
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index c7f7ecb3b3754..05fc67d5d53cc 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -2,6 +2,8 @@ package storage
 
 import (
 	"context"
+	"encoding/hex"
+	"hash/fnv"
 	"sync"
 	"time"
 
@@ -109,28 +111,45 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	missed := map[string]chunk.IndexQuery{}
 
 	for _, query := range queries {
-		value, ok := s.cache.Get(ctx, queryKey(query))
-		if !ok {
-			cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
-				TableName: query.TableName,
-				HashValue: query.HashValue,
-			})
-			missed[queryKey(query)] = query
+		key := queryKey(query)
+		batch, ok, err := s.cache.Fetch(ctx, key)
+		if err != nil {
+			cacheCorruptErrs.Inc()
+		} else if ok {
+			callback(query, batch)
 			continue
 		}
 
-		for _, batch := range value.([]chunk.ReadBatch) {
-			callback(query, batch)
-		}
+		// Just reads the entire row and caches it; filter client side.
+		cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
+			TableName: query.TableName,
+			HashValue: query.HashValue,
+		})
+		missed[key] = query
+	}
+
+	if len(cacheableMissed) == 0 {
+		return nil
 	}
 
 	var resultsMtx sync.Mutex
-	results := map[string][]chunk.ReadBatch{}
+	results := map[string]ReadBatch{}
+	expiryTime := time.Now().Add(s.validity)
 	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
 		resultsMtx.Lock()
 		defer resultsMtx.Unlock()
 		key := queryKey(cacheableQuery)
-		results[key] = append(results[key], r)
+		existing, ok := results[key]
+		if !ok {
+			existing = ReadBatch{
+				Key:    key,
+				Expiry: expiryTime.UnixNano(),
+			}
+		}
+		for iter := r.Iterator(); iter.Next(); {
+			existing.Entries = append(existing.Entries, &Entry{Column: iter.RangeValue(), Value: iter.Value()})
+		}
+		results[key] = existing
 		return true
 	})
 	if err != nil {
@@ -139,16 +158,53 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 
 	resultsMtx.Lock()
 	defer resultsMtx.Unlock()
-	for key, batches := range results {
+	for key, batch := range results {
 		query := missed[key]
-		for _, batch := range batches {
-			callback(query, batch)
-		}
+		callback(query, batch)
+		s.cache.Store(ctx, queryKey(query), batch)
+	}
+	return nil
+}
+
+// Iter implements chunk.ReadBatch.
+func (b ReadBatch) Iterator() chunk.ReadBatchIterator {
+	return &readBatchIterator{
+		index:     -1,
+		readBatch: b,
 	}
 	return nil
 }
 
+type readBatchIterator struct {
+	index     int
+	readBatch ReadBatch
+}
+
+// Len implements chunk.ReadBatchIterator.
+func (b *readBatchIterator) Next() bool {
+	b.index++
+	return b.index < len(b.readBatch.Entries)
+}
+
+// RangeValue implements chunk.ReadBatchIterator.
+func (b *readBatchIterator) RangeValue() []byte {
+	return b.readBatch.Entries[b.index].Column
+}
+
+// Value implements chunk.ReadBatchIterator.
+func (b *readBatchIterator) Value() []byte {
+	return b.readBatch.Entries[b.index].Value
+}
+
 func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
+
+func hashKey(key string) string {
+	hasher := fnv.New64a()
+	hasher.Write([]byte(key)) // This'll never error.
+
+	// Hex because memcache errors for the bytes produced by the hash.
+	return hex.EncodeToString(hasher.Sum(nil))
+}
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
new file mode 100644
index 0000000000000..00be68793da41
--- /dev/null
+++ b/storage/caching_storage_client_test.go
@@ -0,0 +1,72 @@
+package storage
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
+)
+
+type mockStore struct {
+	chunk.StorageClient
+	queries int
+}
+
+func (m *mockStore) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	m.queries++
+	for _, query := range queries {
+		callback(query, mockReadBatch{})
+	}
+	return nil
+}
+
+type mockReadBatch struct{}
+
+func (mockReadBatch) Iterator() chunk.ReadBatchIterator {
+	return &mockReadBatchIterator{}
+}
+
+type mockReadBatchIterator struct {
+	consumed bool
+}
+
+func (m *mockReadBatchIterator) Next() bool {
+	if m.consumed {
+		return false
+	}
+	m.consumed = true
+	return true
+}
+
+func (mockReadBatchIterator) RangeValue() []byte {
+	return []byte("foo")
+}
+
+func (mockReadBatchIterator) Value() []byte {
+	return []byte("bar")
+}
+
+func TestCachingStorageClient(t *testing.T) {
+	mock := &mockStore{}
+	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	client := newCachingStorageClient(mock, cache, 1*time.Second)
+	queries := []chunk.IndexQuery{{
+		TableName: "table",
+		HashValue: "baz",
+	}}
+	err := client.QueryPages(context.Background(), queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
+		return true
+	})
+	require.NoError(t, err)
+	require.EqualValues(t, 1, mock.queries)
+
+	// If we do the query to the cache again, the underlying store shouldn't see it.
+	err = client.QueryPages(context.Background(), queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
+		return true
+	})
+	require.NoError(t, err)
+	require.EqualValues(t, 1, mock.queries)
+}
diff --git a/storage/index_test.go b/storage/index_test.go
index 260d0fb563256..24bca687ed21c 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -29,9 +29,10 @@ func TestIndexBasic(t *testing.T) {
 			}
 			var have []chunk.IndexEntry
 			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-				for read.Next() {
+				iter := read.Iterator()
+				for iter.Next() {
 					have = append(have, chunk.IndexEntry{
-						RangeValue: read.RangeValue(),
+						RangeValue: iter.RangeValue(),
 					})
 				}
 				return true
@@ -170,12 +171,13 @@ func TestQueryPages(t *testing.T) {
 				for run {
 					var have []chunk.IndexEntry
 					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
-						for read.Next() {
+						iter := read.Iterator()
+						for iter.Next() {
 							have = append(have, chunk.IndexEntry{
 								TableName:  tt.query.TableName,
 								HashValue:  tt.query.HashValue,
-								RangeValue: read.RangeValue(),
-								Value:      read.Value(),
+								RangeValue: iter.RangeValue(),
+								Value:      iter.Value(),
 							})
 						}
 						return true
diff --git a/storage_client.go b/storage_client.go
index d867def5ab31d..ecf83f7d62262 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -23,6 +23,11 @@ type WriteBatch interface {
 
 // ReadBatch represents the results of a QueryPages.
 type ReadBatch interface {
+	Iterator() ReadBatchIterator
+}
+
+// ReadBatchIterator is an iterator over a ReadBatch.
+type ReadBatchIterator interface {
 	Next() bool
 	RangeValue() []byte
 	Value() []byte
diff --git a/util/util.go b/util/util.go
index 6a6c412d30318..4470de21b3754 100644
--- a/util/util.go
+++ b/util/util.go
@@ -3,7 +3,6 @@ package util
 import (
 	"bytes"
 	"context"
-	"strings"
 
 	"github.com/weaveworks/cortex/pkg/chunk"
 )
@@ -47,14 +46,26 @@ type filteringBatch struct {
 	chunk.ReadBatch
 }
 
-func (f *filteringBatch) Next() bool {
-	for f.ReadBatch.Next() {
-		rangeValue, value := f.ReadBatch.RangeValue(), f.ReadBatch.Value()
+func (f filteringBatch) Iterator() chunk.ReadBatchIterator {
+	return &filteringBatchIter{
+		query:             f.query,
+		ReadBatchIterator: f.ReadBatch.Iterator(),
+	}
+}
+
+type filteringBatchIter struct {
+	query chunk.IndexQuery
+	chunk.ReadBatchIterator
+}
+
+func (f *filteringBatchIter) Next() bool {
+	for f.ReadBatchIterator.Next() {
+		rangeValue, value := f.ReadBatchIterator.RangeValue(), f.ReadBatchIterator.Value()
 
-		if len(f.query.RangeValuePrefix) != 0 && !strings.HasPrefix(string(rangeValue), string(f.query.RangeValuePrefix)) {
+		if len(f.query.RangeValuePrefix) != 0 && !bytes.HasPrefix(rangeValue, f.query.RangeValuePrefix) {
 			continue
 		}
-		if len(f.query.RangeValueStart) != 0 && string(rangeValue) < string(f.query.RangeValueStart) {
+		if len(f.query.RangeValueStart) != 0 && bytes.Compare(f.query.RangeValueStart, rangeValue) > 0 {
 			continue
 		}
 		if len(f.query.ValueEqual) != 0 && !bytes.Equal(value, f.query.ValueEqual) {

From ab9428c69045f46066e099846e4fdea040bf7c91 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 11 Sep 2018 15:29:45 +0100
Subject: [PATCH 151/660] Don't commit generated code; use gogoproto
 optimisations on cache proto.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go    |  18 +-
 storage/caching_storage_client.pb.go | 714 ---------------------------
 storage/caching_storage_client.proto |  16 +-
 3 files changed, 20 insertions(+), 728 deletions(-)
 delete mode 100644 storage/caching_storage_client.pb.go

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 05fc67d5d53cc..597a128a0071c 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -86,6 +86,14 @@ func (c *indexCache) Fetch(ctx context.Context, key string) (ReadBatch, bool, er
 	return ReadBatch{}, false, nil
 }
 
+func hashKey(key string) string {
+	hasher := fnv.New64a()
+	hasher.Write([]byte(key)) // This'll never error.
+
+	// Hex because memcache errors for the bytes produced by the hash.
+	return hex.EncodeToString(hasher.Sum(nil))
+}
+
 type cachingStorageClient struct {
 	chunk.StorageClient
 	cache    IndexCache
@@ -147,7 +155,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 			}
 		}
 		for iter := r.Iterator(); iter.Next(); {
-			existing.Entries = append(existing.Entries, &Entry{Column: iter.RangeValue(), Value: iter.Value()})
+			existing.Entries = append(existing.Entries, Entry{Column: iter.RangeValue(), Value: iter.Value()})
 		}
 		results[key] = existing
 		return true
@@ -200,11 +208,3 @@ func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
-
-func hashKey(key string) string {
-	hasher := fnv.New64a()
-	hasher.Write([]byte(key)) // This'll never error.
-
-	// Hex because memcache errors for the bytes produced by the hash.
-	return hex.EncodeToString(hasher.Sum(nil))
-}
diff --git a/storage/caching_storage_client.pb.go b/storage/caching_storage_client.pb.go
deleted file mode 100644
index 47c09c47d918e..0000000000000
--- a/storage/caching_storage_client.pb.go
+++ /dev/null
@@ -1,714 +0,0 @@
-// Code generated by protoc-gen-gogo. DO NOT EDIT.
-// source: caching_storage_client.proto
-
-/*
-	Package storage is a generated protocol buffer package.
-
-	It is generated from these files:
-		caching_storage_client.proto
-
-	It has these top-level messages:
-		Entry
-		ReadBatch
-*/
-package storage
-
-import proto "github.com/gogo/protobuf/proto"
-import fmt "fmt"
-import math "math"
-
-import bytes "bytes"
-
-import strings "strings"
-import reflect "reflect"
-
-import io "io"
-
-// Reference imports to suppress errors if they are not otherwise used.
-var _ = proto.Marshal
-var _ = fmt.Errorf
-var _ = math.Inf
-
-// This is a compile-time assertion to ensure that this generated file
-// is compatible with the proto package it is being compiled against.
-// A compilation error at this line likely means your copy of the
-// proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
-
-type Entry struct {
-	Column []byte `protobuf:"bytes,1,opt,name=Column,json=column,proto3" json:"Column,omitempty"`
-	Value  []byte `protobuf:"bytes,2,opt,name=Value,json=value,proto3" json:"Value,omitempty"`
-}
-
-func (m *Entry) Reset()                    { *m = Entry{} }
-func (*Entry) ProtoMessage()               {}
-func (*Entry) Descriptor() ([]byte, []int) { return fileDescriptorCachingStorageClient, []int{0} }
-
-func (m *Entry) GetColumn() []byte {
-	if m != nil {
-		return m.Column
-	}
-	return nil
-}
-
-func (m *Entry) GetValue() []byte {
-	if m != nil {
-		return m.Value
-	}
-	return nil
-}
-
-type ReadBatch struct {
-	Entries []*Entry `protobuf:"bytes,1,rep,name=entries" json:"entries,omitempty"`
-	Key     string   `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
-	// The time at which the key expires.
-	Expiry int64 `protobuf:"varint,3,opt,name=expiry,proto3" json:"expiry,omitempty"`
-}
-
-func (m *ReadBatch) Reset()                    { *m = ReadBatch{} }
-func (*ReadBatch) ProtoMessage()               {}
-func (*ReadBatch) Descriptor() ([]byte, []int) { return fileDescriptorCachingStorageClient, []int{1} }
-
-func (m *ReadBatch) GetEntries() []*Entry {
-	if m != nil {
-		return m.Entries
-	}
-	return nil
-}
-
-func (m *ReadBatch) GetKey() string {
-	if m != nil {
-		return m.Key
-	}
-	return ""
-}
-
-func (m *ReadBatch) GetExpiry() int64 {
-	if m != nil {
-		return m.Expiry
-	}
-	return 0
-}
-
-func init() {
-	proto.RegisterType((*Entry)(nil), "storage.Entry")
-	proto.RegisterType((*ReadBatch)(nil), "storage.ReadBatch")
-}
-func (this *Entry) Equal(that interface{}) bool {
-	if that == nil {
-		return this == nil
-	}
-
-	that1, ok := that.(*Entry)
-	if !ok {
-		that2, ok := that.(Entry)
-		if ok {
-			that1 = &that2
-		} else {
-			return false
-		}
-	}
-	if that1 == nil {
-		return this == nil
-	} else if this == nil {
-		return false
-	}
-	if !bytes.Equal(this.Column, that1.Column) {
-		return false
-	}
-	if !bytes.Equal(this.Value, that1.Value) {
-		return false
-	}
-	return true
-}
-func (this *ReadBatch) Equal(that interface{}) bool {
-	if that == nil {
-		return this == nil
-	}
-
-	that1, ok := that.(*ReadBatch)
-	if !ok {
-		that2, ok := that.(ReadBatch)
-		if ok {
-			that1 = &that2
-		} else {
-			return false
-		}
-	}
-	if that1 == nil {
-		return this == nil
-	} else if this == nil {
-		return false
-	}
-	if len(this.Entries) != len(that1.Entries) {
-		return false
-	}
-	for i := range this.Entries {
-		if !this.Entries[i].Equal(that1.Entries[i]) {
-			return false
-		}
-	}
-	if this.Key != that1.Key {
-		return false
-	}
-	if this.Expiry != that1.Expiry {
-		return false
-	}
-	return true
-}
-func (this *Entry) GoString() string {
-	if this == nil {
-		return "nil"
-	}
-	s := make([]string, 0, 6)
-	s = append(s, "&storage.Entry{")
-	s = append(s, "Column: "+fmt.Sprintf("%#v", this.Column)+",\n")
-	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
-	s = append(s, "}")
-	return strings.Join(s, "")
-}
-func (this *ReadBatch) GoString() string {
-	if this == nil {
-		return "nil"
-	}
-	s := make([]string, 0, 7)
-	s = append(s, "&storage.ReadBatch{")
-	if this.Entries != nil {
-		s = append(s, "Entries: "+fmt.Sprintf("%#v", this.Entries)+",\n")
-	}
-	s = append(s, "Key: "+fmt.Sprintf("%#v", this.Key)+",\n")
-	s = append(s, "Expiry: "+fmt.Sprintf("%#v", this.Expiry)+",\n")
-	s = append(s, "}")
-	return strings.Join(s, "")
-}
-func valueToGoStringCachingStorageClient(v interface{}, typ string) string {
-	rv := reflect.ValueOf(v)
-	if rv.IsNil() {
-		return "nil"
-	}
-	pv := reflect.Indirect(rv).Interface()
-	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
-}
-func (m *Entry) Marshal() (dAtA []byte, err error) {
-	size := m.Size()
-	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
-	if err != nil {
-		return nil, err
-	}
-	return dAtA[:n], nil
-}
-
-func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
-	var i int
-	_ = i
-	var l int
-	_ = l
-	if len(m.Column) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Column)))
-		i += copy(dAtA[i:], m.Column)
-	}
-	if len(m.Value) > 0 {
-		dAtA[i] = 0x12
-		i++
-		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Value)))
-		i += copy(dAtA[i:], m.Value)
-	}
-	return i, nil
-}
-
-func (m *ReadBatch) Marshal() (dAtA []byte, err error) {
-	size := m.Size()
-	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
-	if err != nil {
-		return nil, err
-	}
-	return dAtA[:n], nil
-}
-
-func (m *ReadBatch) MarshalTo(dAtA []byte) (int, error) {
-	var i int
-	_ = i
-	var l int
-	_ = l
-	if len(m.Entries) > 0 {
-		for _, msg := range m.Entries {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintCachingStorageClient(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
-			}
-			i += n
-		}
-	}
-	if len(m.Key) > 0 {
-		dAtA[i] = 0x12
-		i++
-		i = encodeVarintCachingStorageClient(dAtA, i, uint64(len(m.Key)))
-		i += copy(dAtA[i:], m.Key)
-	}
-	if m.Expiry != 0 {
-		dAtA[i] = 0x18
-		i++
-		i = encodeVarintCachingStorageClient(dAtA, i, uint64(m.Expiry))
-	}
-	return i, nil
-}
-
-func encodeVarintCachingStorageClient(dAtA []byte, offset int, v uint64) int {
-	for v >= 1<<7 {
-		dAtA[offset] = uint8(v&0x7f | 0x80)
-		v >>= 7
-		offset++
-	}
-	dAtA[offset] = uint8(v)
-	return offset + 1
-}
-func (m *Entry) Size() (n int) {
-	var l int
-	_ = l
-	l = len(m.Column)
-	if l > 0 {
-		n += 1 + l + sovCachingStorageClient(uint64(l))
-	}
-	l = len(m.Value)
-	if l > 0 {
-		n += 1 + l + sovCachingStorageClient(uint64(l))
-	}
-	return n
-}
-
-func (m *ReadBatch) Size() (n int) {
-	var l int
-	_ = l
-	if len(m.Entries) > 0 {
-		for _, e := range m.Entries {
-			l = e.Size()
-			n += 1 + l + sovCachingStorageClient(uint64(l))
-		}
-	}
-	l = len(m.Key)
-	if l > 0 {
-		n += 1 + l + sovCachingStorageClient(uint64(l))
-	}
-	if m.Expiry != 0 {
-		n += 1 + sovCachingStorageClient(uint64(m.Expiry))
-	}
-	return n
-}
-
-func sovCachingStorageClient(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
-}
-func sozCachingStorageClient(x uint64) (n int) {
-	return sovCachingStorageClient(uint64((x << 1) ^ uint64((int64(x) >> 63))))
-}
-func (this *Entry) String() string {
-	if this == nil {
-		return "nil"
-	}
-	s := strings.Join([]string{`&Entry{`,
-		`Column:` + fmt.Sprintf("%v", this.Column) + `,`,
-		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
-		`}`,
-	}, "")
-	return s
-}
-func (this *ReadBatch) String() string {
-	if this == nil {
-		return "nil"
-	}
-	s := strings.Join([]string{`&ReadBatch{`,
-		`Entries:` + strings.Replace(fmt.Sprintf("%v", this.Entries), "Entry", "Entry", 1) + `,`,
-		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
-		`Expiry:` + fmt.Sprintf("%v", this.Expiry) + `,`,
-		`}`,
-	}, "")
-	return s
-}
-func valueToStringCachingStorageClient(v interface{}) string {
-	rv := reflect.ValueOf(v)
-	if rv.IsNil() {
-		return "nil"
-	}
-	pv := reflect.Indirect(rv).Interface()
-	return fmt.Sprintf("*%v", pv)
-}
-func (m *Entry) Unmarshal(dAtA []byte) error {
-	l := len(dAtA)
-	iNdEx := 0
-	for iNdEx < l {
-		preIndex := iNdEx
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return ErrIntOverflowCachingStorageClient
-			}
-			if iNdEx >= l {
-				return io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		fieldNum := int32(wire >> 3)
-		wireType := int(wire & 0x7)
-		if wireType == 4 {
-			return fmt.Errorf("proto: Entry: wiretype end group for non-group")
-		}
-		if fieldNum <= 0 {
-			return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire)
-		}
-		switch fieldNum {
-		case 1:
-			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field Column", wireType)
-			}
-			var byteLen int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				byteLen |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if byteLen < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			postIndex := iNdEx + byteLen
-			if postIndex > l {
-				return io.ErrUnexpectedEOF
-			}
-			m.Column = append(m.Column[:0], dAtA[iNdEx:postIndex]...)
-			if m.Column == nil {
-				m.Column = []byte{}
-			}
-			iNdEx = postIndex
-		case 2:
-			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
-			}
-			var byteLen int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				byteLen |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if byteLen < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			postIndex := iNdEx + byteLen
-			if postIndex > l {
-				return io.ErrUnexpectedEOF
-			}
-			m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
-			if m.Value == nil {
-				m.Value = []byte{}
-			}
-			iNdEx = postIndex
-		default:
-			iNdEx = preIndex
-			skippy, err := skipCachingStorageClient(dAtA[iNdEx:])
-			if err != nil {
-				return err
-			}
-			if skippy < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			if (iNdEx + skippy) > l {
-				return io.ErrUnexpectedEOF
-			}
-			iNdEx += skippy
-		}
-	}
-
-	if iNdEx > l {
-		return io.ErrUnexpectedEOF
-	}
-	return nil
-}
-func (m *ReadBatch) Unmarshal(dAtA []byte) error {
-	l := len(dAtA)
-	iNdEx := 0
-	for iNdEx < l {
-		preIndex := iNdEx
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return ErrIntOverflowCachingStorageClient
-			}
-			if iNdEx >= l {
-				return io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		fieldNum := int32(wire >> 3)
-		wireType := int(wire & 0x7)
-		if wireType == 4 {
-			return fmt.Errorf("proto: ReadBatch: wiretype end group for non-group")
-		}
-		if fieldNum <= 0 {
-			return fmt.Errorf("proto: ReadBatch: illegal tag %d (wire type %d)", fieldNum, wire)
-		}
-		switch fieldNum {
-		case 1:
-			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType)
-			}
-			var msglen int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				msglen |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			if msglen < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			postIndex := iNdEx + msglen
-			if postIndex > l {
-				return io.ErrUnexpectedEOF
-			}
-			m.Entries = append(m.Entries, &Entry{})
-			if err := m.Entries[len(m.Entries)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
-				return err
-			}
-			iNdEx = postIndex
-		case 2:
-			if wireType != 2 {
-				return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
-			}
-			var stringLen uint64
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				stringLen |= (uint64(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			intStringLen := int(stringLen)
-			if intStringLen < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			postIndex := iNdEx + intStringLen
-			if postIndex > l {
-				return io.ErrUnexpectedEOF
-			}
-			m.Key = string(dAtA[iNdEx:postIndex])
-			iNdEx = postIndex
-		case 3:
-			if wireType != 0 {
-				return fmt.Errorf("proto: wrong wireType = %d for field Expiry", wireType)
-			}
-			m.Expiry = 0
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				m.Expiry |= (int64(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-		default:
-			iNdEx = preIndex
-			skippy, err := skipCachingStorageClient(dAtA[iNdEx:])
-			if err != nil {
-				return err
-			}
-			if skippy < 0 {
-				return ErrInvalidLengthCachingStorageClient
-			}
-			if (iNdEx + skippy) > l {
-				return io.ErrUnexpectedEOF
-			}
-			iNdEx += skippy
-		}
-	}
-
-	if iNdEx > l {
-		return io.ErrUnexpectedEOF
-	}
-	return nil
-}
-func skipCachingStorageClient(dAtA []byte) (n int, err error) {
-	l := len(dAtA)
-	iNdEx := 0
-	for iNdEx < l {
-		var wire uint64
-		for shift := uint(0); ; shift += 7 {
-			if shift >= 64 {
-				return 0, ErrIntOverflowCachingStorageClient
-			}
-			if iNdEx >= l {
-				return 0, io.ErrUnexpectedEOF
-			}
-			b := dAtA[iNdEx]
-			iNdEx++
-			wire |= (uint64(b) & 0x7F) << shift
-			if b < 0x80 {
-				break
-			}
-		}
-		wireType := int(wire & 0x7)
-		switch wireType {
-		case 0:
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				iNdEx++
-				if dAtA[iNdEx-1] < 0x80 {
-					break
-				}
-			}
-			return iNdEx, nil
-		case 1:
-			iNdEx += 8
-			return iNdEx, nil
-		case 2:
-			var length int
-			for shift := uint(0); ; shift += 7 {
-				if shift >= 64 {
-					return 0, ErrIntOverflowCachingStorageClient
-				}
-				if iNdEx >= l {
-					return 0, io.ErrUnexpectedEOF
-				}
-				b := dAtA[iNdEx]
-				iNdEx++
-				length |= (int(b) & 0x7F) << shift
-				if b < 0x80 {
-					break
-				}
-			}
-			iNdEx += length
-			if length < 0 {
-				return 0, ErrInvalidLengthCachingStorageClient
-			}
-			return iNdEx, nil
-		case 3:
-			for {
-				var innerWire uint64
-				var start int = iNdEx
-				for shift := uint(0); ; shift += 7 {
-					if shift >= 64 {
-						return 0, ErrIntOverflowCachingStorageClient
-					}
-					if iNdEx >= l {
-						return 0, io.ErrUnexpectedEOF
-					}
-					b := dAtA[iNdEx]
-					iNdEx++
-					innerWire |= (uint64(b) & 0x7F) << shift
-					if b < 0x80 {
-						break
-					}
-				}
-				innerWireType := int(innerWire & 0x7)
-				if innerWireType == 4 {
-					break
-				}
-				next, err := skipCachingStorageClient(dAtA[start:])
-				if err != nil {
-					return 0, err
-				}
-				iNdEx = start + next
-			}
-			return iNdEx, nil
-		case 4:
-			return iNdEx, nil
-		case 5:
-			iNdEx += 4
-			return iNdEx, nil
-		default:
-			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
-		}
-	}
-	panic("unreachable")
-}
-
-var (
-	ErrInvalidLengthCachingStorageClient = fmt.Errorf("proto: negative length found during unmarshaling")
-	ErrIntOverflowCachingStorageClient   = fmt.Errorf("proto: integer overflow")
-)
-
-func init() { proto.RegisterFile("caching_storage_client.proto", fileDescriptorCachingStorageClient) }
-
-var fileDescriptorCachingStorageClient = []byte{
-	// 229 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x92, 0x49, 0x4e, 0x4c, 0xce,
-	0xc8, 0xcc, 0x4b, 0x8f, 0x2f, 0x2e, 0xc9, 0x2f, 0x4a, 0x4c, 0x4f, 0x8d, 0x4f, 0xce, 0xc9, 0x4c,
-	0xcd, 0x2b, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x87, 0x8a, 0x2a, 0x99, 0x72, 0xb1,
-	0xba, 0xe6, 0x95, 0x14, 0x55, 0x0a, 0x89, 0x71, 0xb1, 0x39, 0xe7, 0xe7, 0x94, 0xe6, 0xe6, 0x49,
-	0x30, 0x2a, 0x30, 0x6a, 0xf0, 0x04, 0xb1, 0x25, 0x83, 0x79, 0x42, 0x22, 0x5c, 0xac, 0x61, 0x89,
-	0x39, 0xa5, 0xa9, 0x12, 0x4c, 0x60, 0x61, 0xd6, 0x32, 0x10, 0x47, 0x29, 0x9e, 0x8b, 0x33, 0x28,
-	0x35, 0x31, 0xc5, 0x29, 0xb1, 0x24, 0x39, 0x43, 0x48, 0x83, 0x8b, 0x3d, 0x35, 0xaf, 0xa4, 0x28,
-	0x33, 0xb5, 0x58, 0x82, 0x51, 0x81, 0x59, 0x83, 0xdb, 0x88, 0x4f, 0x0f, 0x6a, 0xbc, 0x1e, 0xd8,
-	0xec, 0x20, 0x98, 0xb4, 0x90, 0x00, 0x17, 0x73, 0x76, 0x6a, 0x25, 0xd8, 0x28, 0xce, 0x20, 0x10,
-	0x13, 0x64, 0x6d, 0x6a, 0x45, 0x41, 0x66, 0x51, 0xa5, 0x04, 0xb3, 0x02, 0xa3, 0x06, 0x73, 0x10,
-	0x94, 0xe7, 0xa4, 0x73, 0xe1, 0xa1, 0x1c, 0xc3, 0x8d, 0x87, 0x72, 0x0c, 0x1f, 0x1e, 0xca, 0x31,
-	0x36, 0x3c, 0x92, 0x63, 0x5c, 0xf1, 0x48, 0x8e, 0xf1, 0xc4, 0x23, 0x39, 0xc6, 0x0b, 0x8f, 0xe4,
-	0x18, 0x1f, 0x3c, 0x92, 0x63, 0x7c, 0xf1, 0x48, 0x8e, 0xe1, 0xc3, 0x23, 0x39, 0xc6, 0x09, 0x8f,
-	0xe5, 0x18, 0x92, 0xd8, 0xc0, 0xbe, 0x32, 0x06, 0x04, 0x00, 0x00, 0xff, 0xff, 0x00, 0x1b, 0x46,
-	0xe1, 0xf5, 0x00, 0x00, 0x00,
-}
diff --git a/storage/caching_storage_client.proto b/storage/caching_storage_client.proto
index f10dbe3443719..cc133b8288439 100644
--- a/storage/caching_storage_client.proto
+++ b/storage/caching_storage_client.proto
@@ -1,15 +1,21 @@
 syntax = "proto3";
+
 package storage;
 
+import "github.com/gogo/protobuf/gogoproto/gogo.proto";
+
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+
 message Entry {
-    bytes Column = 1;
-    bytes Value = 2;
+    bytes Column = 1 [(gogoproto.customtype) = "github.com/weaveworks/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
+    bytes Value = 2 [(gogoproto.customtype) = "github.com/weaveworks/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
 }
 
 message ReadBatch {
-    repeated Entry entries = 1;
+    repeated Entry entries = 1 [(gogoproto.nullable) = false];
     string key = 2;
 
-	// The time at which the key expires.
+    // The time at which the key expires.
     int64 expiry = 3;
-}
\ No newline at end of file
+}

From fe2a652f27a4bcb65381c9f5ff671212a4c979d2 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 11 Sep 2018 15:35:03 +0100
Subject: [PATCH 152/660] Review feedback & fix lint.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/storage_client.go             | 5 +----
 storage/caching_storage_client.go | 3 +--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 13728294ee4a7..aca2b4198eda2 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -219,18 +219,15 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 	}
 
 	errs := make(chan error)
-
 	for _, tq := range tableQueries {
-
 		table := s.client.Open(tq.name)
-		for i := 0; i < len(tq.rows); i += maxRowReads {
 
+		for i := 0; i < len(tq.rows); i += maxRowReads {
 			page := tq.rows[i:util.Min(i+maxRowReads, len(tq.rows))]
 			go func(page bigtable.RowList, tq tableQuery) {
 				var processingErr error
 				// rows are returned in key order, not order in row list
 				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
-
 					query, ok := tq.queries[row.Key()]
 					if !ok {
 						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 597a128a0071c..0b7991ba13fb4 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -174,13 +174,12 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	return nil
 }
 
-// Iter implements chunk.ReadBatch.
+// Iterator implements chunk.ReadBatch.
 func (b ReadBatch) Iterator() chunk.ReadBatchIterator {
 	return &readBatchIterator{
 		index:     -1,
 		readBatch: b,
 	}
-	return nil
 }
 
 type readBatchIterator struct {

From 3d9153ac8a7ec2a8821538190a3bb83ce28b25e1 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 30 Aug 2018 18:12:37 +0100
Subject: [PATCH 153/660] Adjust the histogram buckets for the query path so
 they're not crazy wrong.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 series_store.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/series_store.go b/series_store.go
index 082381237f155..03bb07b576a84 100644
--- a/series_store.go
+++ b/series_store.go
@@ -30,22 +30,22 @@ var (
 		Namespace: "cortex",
 		Name:      "chunk_store_series_pre_intersection_per_query",
 		Help:      "Distribution of #series (pre intersection) per query.",
-		// A reasonable upper bound is around 100k - 10*(8^8) = 167k.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 8),
+		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 5),
 	})
 	postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_series_post_intersection_per_query",
 		Help:      "Distribution of #series (post intersection) per query.",
-		// A reasonable upper bound is around 100k - 10*(8^8) = 167k.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 8),
+		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 5),
 	})
 	chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_chunks_per_query",
 		Help:      "Distribution of #chunks per query.",
-		// For v. high cardinality could go upto 1m chunks per query - 10*(8^9) = 1.3m.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 9),
+		// For 100k series for 7 week, could be 1.2m - 10*(8^6) = 2.6m.
+		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 )
 

From f7b554244726e0903b290fd3f0fd410e61d04a78 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 30 Aug 2018 18:57:06 +0100
Subject: [PATCH 154/660] Don't trace every cache lookup, thats too many

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/fifo_cache.go               | 12 +-----------
 storage/caching_storage_client.go |  6 +++++-
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index a5a2f8218f7fc..9d88e357bb362 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -5,8 +5,6 @@ import (
 	"sync"
 	"time"
 
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 )
@@ -132,9 +130,6 @@ func (c *FifoCache) Stop() error {
 
 // Put stores the value against the key.
 func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
-	span, ctx := ot.StartSpanFromContext(ctx, c.name+"-cache-put")
-	defer span.Finish()
-
 	c.entriesAdded.Inc()
 	if c.size == 0 {
 		return
@@ -202,9 +197,6 @@ func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
 
 // Get returns the stored value against the key and when the key was last updated.
 func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
-	span, ctx := ot.StartSpanFromContext(ctx, c.name+"-cache-get")
-	defer span.Finish()
-
 	c.totalGets.Inc()
 	if c.size == 0 {
 		return nil, false
@@ -217,17 +209,15 @@ func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
 	if ok {
 		updated := c.entries[index].updated
 		if time.Now().Sub(updated) < c.validity {
-			span.LogFields(otlog.Bool("hit", true))
+
 			return c.entries[index].value, true
 		}
 
 		c.totalMisses.Inc()
 		c.staleGets.Inc()
-		span.LogFields(otlog.Bool("hit", false), otlog.Bool("stale", true))
 		return nil, false
 	}
 
-	span.LogFields(otlog.Bool("hit", false), otlog.Bool("stale", false))
 	c.totalMisses.Inc()
 	return nil, false
 }
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 0b7991ba13fb4..939d70abf5a36 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -8,9 +8,10 @@ import (
 	"time"
 
 	proto "github.com/golang/protobuf/proto"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
-
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
@@ -118,6 +119,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	cacheableMissed := []chunk.IndexQuery{}
 	missed := map[string]chunk.IndexQuery{}
 
+	span, ctx := ot.StartSpanFromContext(ctx, "Index cache lookups")
 	for _, query := range queries {
 		key := queryKey(query)
 		batch, ok, err := s.cache.Fetch(ctx, key)
@@ -139,6 +141,8 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	if len(cacheableMissed) == 0 {
 		return nil
 	}
+	span.LogFields(otlog.Int("queries", len(queries)), otlog.Int("hits", len(queries)-len(missed)), otlog.Int("misses", len(missed)))
+	span.Finish()
 
 	var resultsMtx sync.Mutex
 	results := map[string]ReadBatch{}

From 2867bd5c5aaf6a674b7e4dcbe10315cf1b230a27 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 12 Sep 2018 17:07:15 +0530
Subject: [PATCH 155/660] lint: Make sure we follow go1.10 fmt rules

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/cache_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index c2869030d0461..7f759e3c78334 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -35,7 +35,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			promChunk[0],
 			ts,

From a9941d33f4bc462c5dc129cea3e2e6501a8bf63b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 12 Sep 2018 18:03:15 +0530
Subject: [PATCH 156/660] Add test for failing memcache calls.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/memcached.go      |  5 +--
 cache/memcached_test.go | 87 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index 4e46c7e65aee7..2f40891e4f93f 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -143,8 +143,10 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 	})
 
 	if err != nil {
+		missed = keys
 		return
 	}
+
 	for _, key := range keys {
 		item, ok := items[key]
 		if ok {
@@ -189,11 +191,8 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 	close(resultsCh)
 
 	for _, result := range results {
-		// TODO(gouthamve): One call may fail while everything else succeeds. Put the
-		// failed call keys in missed then.
 		if result.err != nil {
 			err = result.err
-			continue
 		}
 
 		found = append(found, result.found...)
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index fdafbdb0ebced..b003e6a2d07c9 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -2,8 +2,11 @@ package cache_test
 
 import (
 	"context"
+	"errors"
+	"sync/atomic"
 	"testing"
 
+	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
@@ -57,3 +60,87 @@ func testMemcache(t *testing.T, memcache *cache.Memcached) {
 		bufs = bufs[1:]
 	}
 }
+
+// mockMemcache whose calls fail 1/3rd of the time.
+type mockMemcacheFailing struct {
+	*mockMemcache
+	calls uint64
+}
+
+func newMockMemcacheFailing() *mockMemcacheFailing {
+	return &mockMemcacheFailing{
+		mockMemcache: newMockMemcache(),
+	}
+}
+
+func (c *mockMemcacheFailing) GetMulti(keys []string) (map[string]*memcache.Item, error) {
+	calls := atomic.AddUint64(&c.calls, 1)
+	if calls%3 == 0 {
+		return nil, errors.New("fail")
+	}
+
+	return c.mockMemcache.GetMulti(keys)
+}
+
+func TestMemcacheFailure(t *testing.T) {
+	t.Run("unbatched", func(t *testing.T) {
+		client := newMockMemcacheFailing()
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client)
+
+		testMemcacheFailing(t, memcache)
+	})
+
+	t.Run("batched", func(t *testing.T) {
+		client := newMockMemcacheFailing()
+		memcache := cache.NewMemcached(cache.MemcachedConfig{
+			BatchSize:   10,
+			Parallelism: 5,
+		}, client)
+
+		testMemcacheFailing(t, memcache)
+	})
+}
+
+func testMemcacheFailing(t *testing.T, memcache *cache.Memcached) {
+	numKeys := 1000
+
+	ctx := context.Background()
+	keys := make([]string, 0, numKeys)
+	// Insert 1000 keys skipping all multiples of 5.
+	for i := 0; i < numKeys; i++ {
+		keys = append(keys, string(i))
+		if i%5 == 0 {
+			continue
+		}
+
+		require.NoError(t, memcache.Store(ctx, string(i), []byte(string(i))))
+	}
+
+	for i := 0; i < 10; i++ {
+		found, bufs, missing, _ := memcache.Fetch(ctx, keys)
+
+		require.Equal(t, len(found), len(bufs))
+		for i := range found {
+			require.Equal(t, found[i], string(bufs[i]))
+		}
+
+		keysReturned := make(map[string]struct{})
+		for _, key := range found {
+			_, ok := keysReturned[key]
+			require.False(t, ok, "duplicate key returned")
+
+			keysReturned[key] = struct{}{}
+		}
+		for _, key := range missing {
+			_, ok := keysReturned[key]
+			require.False(t, ok, "duplicate key returned")
+
+			keysReturned[key] = struct{}{}
+		}
+
+		for _, key := range keys {
+			_, ok := keysReturned[key]
+			require.True(t, ok, "key missing %s", key)
+		}
+	}
+}

From 8d8a6682af45a605e32b8507771fe108a5cf6328 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 13 Sep 2018 14:15:05 +0100
Subject: [PATCH 157/660] Ingester: Defer converting from LabelPairs to Metric
 until flush (#997)

* Defer converting from LabelPairs to Metric until needed

Since we average hundreds of samples to a chunk, it saves a lot of
work to leave label name/value sets as the slice they come in as,
rather than converting to the map type that Prometheus uses.

They are converted on flush, to avoid changing the chunk store.

* Store labelPairs in series sorted, for faster lookup
* Extract BenchmarkMetric as re-usable fixture for tests
* Add benchmark for Ingester Push()
* Copy name/value bytes, to avoid keeping the gRPC buffer alive
---
 chunk_test.go | 21 +--------------------
 fixtures.go   | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 20 deletions(-)
 create mode 100644 fixtures.go

diff --git a/chunk_test.go b/chunk_test.go
index b486565dfc876..00438a4600aeb 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -198,26 +198,7 @@ func TestChunksToMatrix(t *testing.T) {
 }
 
 func benchmarkChunk(now model.Time) Chunk {
-	// This is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel:              "container_cpu_usage_seconds_total",
-		"beta_kubernetes_io_arch":          "amd64",
-		"beta_kubernetes_io_instance_type": "c3.somesize",
-		"beta_kubernetes_io_os":            "linux",
-		"container_name":                   "some-name",
-		"cpu":                              "cpu01",
-		"failure_domain_beta_kubernetes_io_region": "somewhere-1",
-		"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
-		"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
-		"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
-		"instance": "ip-111-11-1-11.ec2.internal",
-		"job":      "kubernetes-cadvisor",
-		"kubernetes_io_hostname": "ip-111-11-1-11",
-		"monitor":                "prod",
-		"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
-		"namespace":              "kube-system",
-		"pod_name":               "some-other-name-5j8s8",
-	})
+	return dummyChunkFor(now, BenchmarkMetric)
 }
 
 func BenchmarkEncode(b *testing.B) {
diff --git a/fixtures.go b/fixtures.go
new file mode 100644
index 0000000000000..330a4d1d5e4e2
--- /dev/null
+++ b/fixtures.go
@@ -0,0 +1,24 @@
+package chunk
+
+import "github.com/prometheus/common/model"
+
+// BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
+var BenchmarkMetric = model.Metric{
+	model.MetricNameLabel:              "container_cpu_usage_seconds_total",
+	"beta_kubernetes_io_arch":          "amd64",
+	"beta_kubernetes_io_instance_type": "c3.somesize",
+	"beta_kubernetes_io_os":            "linux",
+	"container_name":                   "some-name",
+	"cpu":                              "cpu01",
+	"failure_domain_beta_kubernetes_io_region": "somewhere-1",
+	"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
+	"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
+	"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
+	"instance": "ip-111-11-1-11.ec2.internal",
+	"job":      "kubernetes-cadvisor",
+	"kubernetes_io_hostname": "ip-111-11-1-11",
+	"monitor":                "prod",
+	"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
+	"namespace":              "kube-system",
+	"pod_name":               "some-other-name-5j8s8",
+}

From 0694a69aeaf728c5273592b3f500c1c0c58ddde3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Thu, 13 Sep 2018 16:09:23 +0000
Subject: [PATCH 158/660] Revert "Ingester: Defer converting from LabelPairs to
 Metric until flush (#997)"

Seeing `ValidationException` errors from DynamoDB.

This reverts commit 3e8dfbb27cfbe717511a59a8b9a6abc48bc11fc8.
---
 chunk_test.go | 21 ++++++++++++++++++++-
 fixtures.go   | 24 ------------------------
 2 files changed, 20 insertions(+), 25 deletions(-)
 delete mode 100644 fixtures.go

diff --git a/chunk_test.go b/chunk_test.go
index 00438a4600aeb..b486565dfc876 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -198,7 +198,26 @@ func TestChunksToMatrix(t *testing.T) {
 }
 
 func benchmarkChunk(now model.Time) Chunk {
-	return dummyChunkFor(now, BenchmarkMetric)
+	// This is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
+	return dummyChunkFor(now, model.Metric{
+		model.MetricNameLabel:              "container_cpu_usage_seconds_total",
+		"beta_kubernetes_io_arch":          "amd64",
+		"beta_kubernetes_io_instance_type": "c3.somesize",
+		"beta_kubernetes_io_os":            "linux",
+		"container_name":                   "some-name",
+		"cpu":                              "cpu01",
+		"failure_domain_beta_kubernetes_io_region": "somewhere-1",
+		"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
+		"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
+		"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
+		"instance": "ip-111-11-1-11.ec2.internal",
+		"job":      "kubernetes-cadvisor",
+		"kubernetes_io_hostname": "ip-111-11-1-11",
+		"monitor":                "prod",
+		"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
+		"namespace":              "kube-system",
+		"pod_name":               "some-other-name-5j8s8",
+	})
 }
 
 func BenchmarkEncode(b *testing.B) {
diff --git a/fixtures.go b/fixtures.go
deleted file mode 100644
index 330a4d1d5e4e2..0000000000000
--- a/fixtures.go
+++ /dev/null
@@ -1,24 +0,0 @@
-package chunk
-
-import "github.com/prometheus/common/model"
-
-// BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
-var BenchmarkMetric = model.Metric{
-	model.MetricNameLabel:              "container_cpu_usage_seconds_total",
-	"beta_kubernetes_io_arch":          "amd64",
-	"beta_kubernetes_io_instance_type": "c3.somesize",
-	"beta_kubernetes_io_os":            "linux",
-	"container_name":                   "some-name",
-	"cpu":                              "cpu01",
-	"failure_domain_beta_kubernetes_io_region": "somewhere-1",
-	"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
-	"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
-	"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
-	"instance": "ip-111-11-1-11.ec2.internal",
-	"job":      "kubernetes-cadvisor",
-	"kubernetes_io_hostname": "ip-111-11-1-11",
-	"monitor":                "prod",
-	"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
-	"namespace":              "kube-system",
-	"pod_name":               "some-other-name-5j8s8",
-}

From 40ff2e9bd0f1dc32fb431b9b4dc029c19c76a56f Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 13 Sep 2018 19:09:43 +0100
Subject: [PATCH 159/660] Batch cache lookups too, and be more careful about
 collisions.

---
 storage/caching_storage_client.go      | 109 ++++++++++++++++++-------
 storage/caching_storage_client_test.go |  74 +++++++++++++----
 2 files changed, 138 insertions(+), 45 deletions(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 939d70abf5a36..2d162ff9beaa1 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -7,14 +7,14 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-kit/kit/log/level"
 	proto "github.com/golang/protobuf/proto"
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
@@ -43,7 +43,7 @@ var (
 // IndexCache describes the cache for the Index.
 type IndexCache interface {
 	Store(ctx context.Context, key string, val ReadBatch)
-	Fetch(ctx context.Context, key string) (val ReadBatch, ok bool, err error)
+	Fetch(ctx context.Context, keys []string) (batches []ReadBatch, misses []string)
 	Stop() error
 }
 
@@ -65,26 +65,69 @@ func (c *indexCache) Store(ctx context.Context, key string, val ReadBatch) {
 	return
 }
 
-func (c *indexCache) Fetch(ctx context.Context, key string) (ReadBatch, bool, error) {
+func (c *indexCache) Fetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
 	cacheGets.Inc()
 
-	found, valBytes, _, err := c.Cache.Fetch(ctx, []string{hashKey(key)})
-	if len(found) != 1 || err != nil {
-		return ReadBatch{}, false, err
+	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
+	// the last hash.
+	hashedKeys := make(map[string]string, len(keys))
+	for _, key := range keys {
+		hashedKeys[hashKey(key)] = key
 	}
 
-	var rb ReadBatch
-	if err := proto.Unmarshal(valBytes[0], &rb); err != nil {
-		return rb, false, err
+	// Build a list of hashes; could be less than keys due to collisions.
+	hashes := make([]string, 0, len(keys))
+	for hash := range hashedKeys {
+		hashes = append(hashes, hash)
 	}
 
-	// Make sure the hash(key) is not a collision by looking at the key in the value.
-	if key == rb.Key && time.Now().Before(time.Unix(0, rb.Expiry)) {
+	// Look up the hashes in a single batch.  If we get an error, we just "miss" all
+	// of the keys.  Eventually I want to push all the errors to the leafs of the cache
+	// tree, to the caches only return found & missed.
+	foundHashes, bufs, _, err := c.Cache.Fetch(ctx, hashes)
+	if err != nil {
+		level.Warn(util.Logger).Log("msg", "error fetching index entries", "err", err)
+		return nil, keys
+	}
+
+	// Reverse the hash, unmarshal the index entries, check we got what we expected
+	// and that its still valid.
+	batches = make([]ReadBatch, 0, len(foundHashes))
+	for j, foundHash := range foundHashes {
+		key := hashedKeys[foundHash]
+		var readBatch ReadBatch
+
+		if err := proto.Unmarshal(bufs[j], &readBatch); err != nil {
+			level.Warn(util.Logger).Log("msg", "error unmarshalling index entry from cache", "err", err)
+			cacheCorruptErrs.Inc()
+			continue
+		}
+
+		// Make sure the hash(key) is not a collision in the cache by looking at the
+		// key in the value.
+		if key != readBatch.Key || time.Now().After(time.Unix(0, readBatch.Expiry)) {
+			cacheCorruptErrs.Inc()
+			continue
+		}
+
 		cacheHits.Inc()
-		return rb, true, nil
+		batches = append(batches, readBatch)
+	}
+
+	// Finally work out what we're missing.
+	misses := make(map[string]struct{}, len(keys))
+	for _, key := range keys {
+		misses[key] = struct{}{}
+	}
+	for i := range batches {
+		delete(misses, batches[i].Key)
+	}
+	missed = make([]string, 0, len(misses))
+	for miss := range misses {
+		missed = append(missed, miss)
 	}
 
-	return ReadBatch{}, false, nil
+	return batches, missed
 }
 
 func hashKey(key string) string {
@@ -116,21 +159,31 @@ func newCachingStorageClient(client chunk.StorageClient, cache cache.Cache, vali
 func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
-	cacheableMissed := []chunk.IndexQuery{}
-	missed := map[string]chunk.IndexQuery{}
 
-	span, ctx := ot.StartSpanFromContext(ctx, "Index cache lookups")
+	// Build list of keys to lookup in the cache.
+	keys := make([]string, 0, len(queries))
+	queriesByKey := make(map[string]chunk.IndexQuery, len(queries))
 	for _, query := range queries {
 		key := queryKey(query)
-		batch, ok, err := s.cache.Fetch(ctx, key)
-		if err != nil {
-			cacheCorruptErrs.Inc()
-		} else if ok {
-			callback(query, batch)
-			continue
-		}
+		keys = append(keys, key)
+		queriesByKey[key] = query
+	}
+
+	batches, misses := s.cache.Fetch(ctx, keys)
+	for _, batch := range batches {
+		query := queriesByKey[batch.Key]
+		callback(query, batch)
+	}
 
-		// Just reads the entire row and caches it; filter client side.
+	if len(misses) == 0 {
+		return nil
+	}
+
+	// Build list of cachable queries for the queries that missed the cache.
+	cacheableMissed := []chunk.IndexQuery{}
+	missed := map[string]chunk.IndexQuery{}
+	for _, key := range misses {
+		query := queriesByKey[key]
 		cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
 			TableName: query.TableName,
 			HashValue: query.HashValue,
@@ -138,12 +191,6 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		missed[key] = query
 	}
 
-	if len(cacheableMissed) == 0 {
-		return nil
-	}
-	span.LogFields(otlog.Int("queries", len(queries)), otlog.Int("hits", len(queries)-len(missed)), otlog.Int("misses", len(missed)))
-	span.Finish()
-
 	var resultsMtx sync.Mutex
 	results := map[string]ReadBatch{}
 	expiryTime := time.Now().Add(s.validity)
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index 00be68793da41..b252545c1ee4f 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/cortex/pkg/chunk"
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
@@ -16,21 +17,29 @@ type mockStore struct {
 }
 
 func (m *mockStore) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
-	m.queries++
 	for _, query := range queries {
-		callback(query, mockReadBatch{})
+		m.queries++
+		callback(query, mockReadBatch{
+			rangeValue: []byte(query.HashValue),
+			value:      []byte(query.HashValue),
+		})
 	}
 	return nil
 }
 
-type mockReadBatch struct{}
+type mockReadBatch struct {
+	rangeValue, value []byte
+}
 
-func (mockReadBatch) Iterator() chunk.ReadBatchIterator {
-	return &mockReadBatchIterator{}
+func (m mockReadBatch) Iterator() chunk.ReadBatchIterator {
+	return &mockReadBatchIterator{
+		mockReadBatch: m,
+	}
 }
 
 type mockReadBatchIterator struct {
 	consumed bool
+	mockReadBatch
 }
 
 func (m *mockReadBatchIterator) Next() bool {
@@ -41,18 +50,18 @@ func (m *mockReadBatchIterator) Next() bool {
 	return true
 }
 
-func (mockReadBatchIterator) RangeValue() []byte {
-	return []byte("foo")
+func (m *mockReadBatchIterator) RangeValue() []byte {
+	return m.mockReadBatch.rangeValue
 }
 
-func (mockReadBatchIterator) Value() []byte {
-	return []byte("bar")
+func (m *mockReadBatchIterator) Value() []byte {
+	return m.mockReadBatch.value
 }
 
-func TestCachingStorageClient(t *testing.T) {
-	mock := &mockStore{}
+func TestCachingStorageClientBasic(t *testing.T) {
+	store := &mockStore{}
 	cache := cache.NewFifoCache("test", 10, 10*time.Second)
-	client := newCachingStorageClient(mock, cache, 1*time.Second)
+	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
 		HashValue: "baz",
@@ -61,12 +70,49 @@ func TestCachingStorageClient(t *testing.T) {
 		return true
 	})
 	require.NoError(t, err)
-	require.EqualValues(t, 1, mock.queries)
+	assert.EqualValues(t, 1, store.queries)
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
 	err = client.QueryPages(context.Background(), queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
 		return true
 	})
 	require.NoError(t, err)
-	require.EqualValues(t, 1, mock.queries)
+	assert.EqualValues(t, 1, store.queries)
+}
+
+func TestCachingStorageClient(t *testing.T) {
+	store := &mockStore{}
+	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	client := newCachingStorageClient(store, cache, 1*time.Second)
+	queries := []chunk.IndexQuery{
+		{TableName: "table", HashValue: "foo"},
+		{TableName: "table", HashValue: "bar"},
+		{TableName: "table", HashValue: "baz"},
+	}
+	results := 0
+	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			assert.Equal(t, query.HashValue, string(iter.RangeValue()))
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
+
+	// If we do the query to the cache again, the underlying store shouldn't see it.
+	results = 0
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			assert.Equal(t, query.HashValue, string(iter.RangeValue()))
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
 }

From bd9addf2127f90ac0ea9d6762c30cb7ffc4f59b6 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 14 Sep 2018 11:06:25 +0100
Subject: [PATCH 160/660] Test for query cache key collisions.

---
 storage/caching_storage_client_test.go | 113 ++++++++++++++++---------
 1 file changed, 74 insertions(+), 39 deletions(-)

diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index b252545c1ee4f..1cc1f1391e587 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -14,52 +14,26 @@ import (
 type mockStore struct {
 	chunk.StorageClient
 	queries int
+	results ReadBatch
 }
 
 func (m *mockStore) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	for _, query := range queries {
 		m.queries++
-		callback(query, mockReadBatch{
-			rangeValue: []byte(query.HashValue),
-			value:      []byte(query.HashValue),
-		})
+		callback(query, m.results)
 	}
 	return nil
 }
 
-type mockReadBatch struct {
-	rangeValue, value []byte
-}
-
-func (m mockReadBatch) Iterator() chunk.ReadBatchIterator {
-	return &mockReadBatchIterator{
-		mockReadBatch: m,
-	}
-}
-
-type mockReadBatchIterator struct {
-	consumed bool
-	mockReadBatch
-}
-
-func (m *mockReadBatchIterator) Next() bool {
-	if m.consumed {
-		return false
-	}
-	m.consumed = true
-	return true
-}
-
-func (m *mockReadBatchIterator) RangeValue() []byte {
-	return m.mockReadBatch.rangeValue
-}
-
-func (m *mockReadBatchIterator) Value() []byte {
-	return m.mockReadBatch.value
-}
-
 func TestCachingStorageClientBasic(t *testing.T) {
-	store := &mockStore{}
+	store := &mockStore{
+		results: ReadBatch{
+			Entries: []Entry{{
+				Column: []byte("foo"),
+				Value:  []byte("bar"),
+			}},
+		},
+	}
 	cache := cache.NewFifoCache("test", 10, 10*time.Second)
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{
@@ -81,7 +55,14 @@ func TestCachingStorageClientBasic(t *testing.T) {
 }
 
 func TestCachingStorageClient(t *testing.T) {
-	store := &mockStore{}
+	store := &mockStore{
+		results: ReadBatch{
+			Entries: []Entry{{
+				Column: []byte("foo"),
+				Value:  []byte("bar"),
+			}},
+		},
+	}
 	cache := cache.NewFifoCache("test", 10, 10*time.Second)
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
@@ -93,7 +74,6 @@ func TestCachingStorageClient(t *testing.T) {
 	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
-			assert.Equal(t, query.HashValue, string(iter.RangeValue()))
 			results++
 		}
 		return true
@@ -107,7 +87,6 @@ func TestCachingStorageClient(t *testing.T) {
 	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
-			assert.Equal(t, query.HashValue, string(iter.RangeValue()))
 			results++
 		}
 		return true
@@ -116,3 +95,59 @@ func TestCachingStorageClient(t *testing.T) {
 	assert.EqualValues(t, len(queries), store.queries)
 	assert.EqualValues(t, len(queries), results)
 }
+
+func TestCachingStorageClientCollision(t *testing.T) {
+	// These two queries should result in one query to the cache & index, but
+	// two results, as we cache entire rows.
+	store := &mockStore{
+		results: ReadBatch{
+			Entries: []Entry{
+				{
+					Column: []byte("bar"),
+					Value:  []byte("bar"),
+				},
+				{
+					Column: []byte("baz"),
+					Value:  []byte("baz"),
+				},
+			},
+		},
+	}
+	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	client := newCachingStorageClient(store, cache, 1*time.Second)
+	queries := []chunk.IndexQuery{
+		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
+		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("baz")},
+	}
+
+	var results ReadBatch
+	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results.Entries = append(results.Entries, Entry{
+				Column: iter.RangeValue(),
+				Value:  iter.Value(),
+			})
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, 1, store.queries)
+	assert.EqualValues(t, store.results, results)
+
+	// If we do the query to the cache again, the underlying store shouldn't see it.
+	results = ReadBatch{}
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results.Entries = append(results.Entries, Entry{
+				Column: iter.RangeValue(),
+				Value:  iter.Value(),
+			})
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, 1, store.queries)
+	assert.EqualValues(t, store.results, results)
+}

From bbc2cc2c4701af67df771db2fd4046e8e6eb6f7b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 14 Sep 2018 11:07:25 +0100
Subject: [PATCH 161/660] Deal with collisions for the query cache key.

---
 storage/caching_storage_client.go | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 2d162ff9beaa1..770e68c0a0b1a 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -162,17 +162,19 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 
 	// Build list of keys to lookup in the cache.
 	keys := make([]string, 0, len(queries))
-	queriesByKey := make(map[string]chunk.IndexQuery, len(queries))
+	queriesByKey := make(map[string][]chunk.IndexQuery, len(queries))
 	for _, query := range queries {
 		key := queryKey(query)
 		keys = append(keys, key)
-		queriesByKey[key] = query
+		queriesByKey[key] = append(queriesByKey[key], query)
 	}
 
 	batches, misses := s.cache.Fetch(ctx, keys)
 	for _, batch := range batches {
-		query := queriesByKey[batch.Key]
-		callback(query, batch)
+		queries := queriesByKey[batch.Key]
+		for _, query := range queries {
+			callback(query, batch)
+		}
 	}
 
 	if len(misses) == 0 {
@@ -181,14 +183,13 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 
 	// Build list of cachable queries for the queries that missed the cache.
 	cacheableMissed := []chunk.IndexQuery{}
-	missed := map[string]chunk.IndexQuery{}
 	for _, key := range misses {
-		query := queriesByKey[key]
+		// Only need to consider one of the queries as they have the same table & hash.
+		queries := queriesByKey[key]
 		cacheableMissed = append(cacheableMissed, chunk.IndexQuery{
-			TableName: query.TableName,
-			HashValue: query.HashValue,
+			TableName: queries[0].TableName,
+			HashValue: queries[0].HashValue,
 		})
-		missed[key] = query
 	}
 
 	var resultsMtx sync.Mutex
@@ -218,9 +219,11 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	resultsMtx.Lock()
 	defer resultsMtx.Unlock()
 	for key, batch := range results {
-		query := missed[key]
-		callback(query, batch)
-		s.cache.Store(ctx, queryKey(query), batch)
+		queries := queriesByKey[key]
+		for _, query := range queries {
+			callback(query, batch)
+		}
+		s.cache.Store(ctx, key, batch)
 	}
 	return nil
 }

From 580a0386c6782ca611f3723e6cd34ab03cce21fc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 14 Sep 2018 15:13:35 +0100
Subject: [PATCH 162/660] Fix Log() calls for validation exceptions (#1002)

Log() takes name/value pairs.
---
 aws/storage_client.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index e47b9a666f693..cd2e346d06c22 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -273,8 +273,8 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this write will never work, so the only option is to drop the offending items and continue.
-				level.Warn(util.Logger).Log("Data lost while flushing to Dynamo: %v", awsErr)
-				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				level.Warn(util.Logger).Log("msg", "Data lost while flushing to Dynamo", "err", awsErr)
+				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
@@ -661,8 +661,8 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this read will never work, so the only option is to drop the offending request and continue.
-				level.Warn(util.Logger).Log("Error while fetching data from Dynamo: %v", awsErr)
-				level.Debug(util.Logger).Log("Dropped request details: \n%v", requests)
+				level.Warn(util.Logger).Log("msg", "Error while fetching data from Dynamo", "err", awsErr)
+				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {

From 635caf55fa13d91da8100d78a374362f5a05e90a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Tue, 18 Sep 2018 14:13:17 +0100
Subject: [PATCH 163/660] Ingester: defer converting labelPairs to Metric until
 flush (#1007)

* Defer converting from LabelPairs to Metric until needed

Since we average hundreds of samples to a chunk, it saves a lot of
work to leave label name/value sets as the slice they come in as,
rather than converting to the map type that Prometheus uses.

They are converted on flush, to avoid changing the chunk store.

* Store labelPairs in series sorted, for faster lookup
* Extract BenchmarkMetric as re-usable fixture for tests
* Add benchmark for Ingester Push()
* Copy name/value bytes, to avoid keeping the gRPC buffer alive
* Emit event for validationException
---
 aws/storage_client.go | 18 ++++++++++++++++++
 chunk_test.go         | 21 +--------------------
 fixtures.go           | 24 ++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 20 deletions(-)
 create mode 100644 fixtures.go

diff --git a/aws/storage_client.go b/aws/storage_client.go
index cd2e346d06c22..4a94d9637ed54 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -275,6 +275,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 				// this write will never work, so the only option is to drop the offending items and continue.
 				level.Warn(util.Logger).Log("msg", "Data lost while flushing to Dynamo", "err", awsErr)
 				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
+				util.Event().Log("msg", "ValidationException", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
@@ -663,6 +664,7 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 				// this read will never work, so the only option is to drop the offending request and continue.
 				level.Warn(util.Logger).Log("msg", "Error while fetching data from Dynamo", "err", awsErr)
 				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
+				util.Event().Log("msg", "ValidationException", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
@@ -836,6 +838,22 @@ func (b dynamoDBWriteBatch) Len() int {
 	return result
 }
 
+func (b dynamoDBWriteBatch) String() string {
+	var sb strings.Builder
+	sb.WriteByte('{')
+	for k, reqs := range b {
+		sb.WriteString(k)
+		sb.WriteString(": [")
+		for _, req := range reqs {
+			sb.WriteString(req.String())
+			sb.WriteByte(',')
+		}
+		sb.WriteString("], ")
+	}
+	sb.WriteByte('}')
+	return sb.String()
+}
+
 func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
 	item := map[string]*dynamodb.AttributeValue{
 		hashKey:  {S: aws.String(hashValue)},
diff --git a/chunk_test.go b/chunk_test.go
index b486565dfc876..00438a4600aeb 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -198,26 +198,7 @@ func TestChunksToMatrix(t *testing.T) {
 }
 
 func benchmarkChunk(now model.Time) Chunk {
-	// This is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel:              "container_cpu_usage_seconds_total",
-		"beta_kubernetes_io_arch":          "amd64",
-		"beta_kubernetes_io_instance_type": "c3.somesize",
-		"beta_kubernetes_io_os":            "linux",
-		"container_name":                   "some-name",
-		"cpu":                              "cpu01",
-		"failure_domain_beta_kubernetes_io_region": "somewhere-1",
-		"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
-		"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
-		"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
-		"instance": "ip-111-11-1-11.ec2.internal",
-		"job":      "kubernetes-cadvisor",
-		"kubernetes_io_hostname": "ip-111-11-1-11",
-		"monitor":                "prod",
-		"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
-		"namespace":              "kube-system",
-		"pod_name":               "some-other-name-5j8s8",
-	})
+	return dummyChunkFor(now, BenchmarkMetric)
 }
 
 func BenchmarkEncode(b *testing.B) {
diff --git a/fixtures.go b/fixtures.go
new file mode 100644
index 0000000000000..330a4d1d5e4e2
--- /dev/null
+++ b/fixtures.go
@@ -0,0 +1,24 @@
+package chunk
+
+import "github.com/prometheus/common/model"
+
+// BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
+var BenchmarkMetric = model.Metric{
+	model.MetricNameLabel:              "container_cpu_usage_seconds_total",
+	"beta_kubernetes_io_arch":          "amd64",
+	"beta_kubernetes_io_instance_type": "c3.somesize",
+	"beta_kubernetes_io_os":            "linux",
+	"container_name":                   "some-name",
+	"cpu":                              "cpu01",
+	"failure_domain_beta_kubernetes_io_region": "somewhere-1",
+	"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
+	"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
+	"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
+	"instance": "ip-111-11-1-11.ec2.internal",
+	"job":      "kubernetes-cadvisor",
+	"kubernetes_io_hostname": "ip-111-11-1-11",
+	"monitor":                "prod",
+	"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
+	"namespace":              "kube-system",
+	"pod_name":               "some-other-name-5j8s8",
+}

From 1dcc86deda2b877e4883d17ea2f3080ce4ef4e7b Mon Sep 17 00:00:00 2001
From: Misha Brukman <mbrukman@google.com>
Date: Sat, 22 Sep 2018 21:29:51 -0400
Subject: [PATCH 164/660] Fix capitalization of Bigtable. [skip ci]

One word, no space, no inner capital.

Signed-off-by: Misha Brukman <mbrukman@google.com>
---
 gcp/storage_client.go | 4 ++--
 util/util.go          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index aca2b4198eda2..77f20d81a6665 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -435,7 +435,7 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 				} else if err != nil {
 					errs <- errors.WithStack(err)
 				} else if recievedChunks < len(page) {
-					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for BigTable, received %d", len(page), recievedChunks))
+					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for Bigtable, received %d", len(page), recievedChunks))
 				}
 			}(page)
 		}
@@ -470,7 +470,7 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	var rowRange bigtable.RowRange
 
-	/* BigTable only seems to support regex match on cell values, so doing it
+	/* Bigtable only seems to support regex match on cell values, so doing it
 	   client side for now
 	readOpts := []bigtable.ReadOption{
 		bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)),
diff --git a/util/util.go b/util/util.go
index 4470de21b3754..c400f0670d877 100644
--- a/util/util.go
+++ b/util/util.go
@@ -79,7 +79,7 @@ func (f *filteringBatchIter) Next() bool {
 }
 
 // QueryFilter wraps a callback to ensure the results are filtered correctly;
-// useful for the cache and BigTable backend, which only ever fetches the whole
+// useful for the cache and Bigtable backend, which only ever fetches the whole
 // row.
 func QueryFilter(callback Callback) Callback {
 	return func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {

From a92af42538b5b9e84babd8d8724ac045302e57f2 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 18 Sep 2018 15:29:34 +0200
Subject: [PATCH 165/660] Batch cache puts, simplify instrumentation, and don't
 return errors.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/background.go      | 29 ++++++++--------
 cache/background_test.go |  9 ++---
 cache/cache.go           |  4 +--
 cache/cache_test.go      | 20 +++++------
 cache/diskcache.go       | 37 +++++++++++++-------
 cache/instrumented.go    | 74 ++++++++--------------------------------
 cache/memcached.go       | 66 ++++++++++++++++++-----------------
 cache/memcached_test.go  | 23 ++++++++-----
 cache/tiered.go          | 19 ++++-------
 cache/tiered_test.go     | 10 ++----
 chunk_store_utils.go     | 16 +++++----
 storage/factory.go       |  4 +--
 12 files changed, 138 insertions(+), 173 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index c41ee5f18e77f..47c2f89232ff2 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -5,9 +5,9 @@ import (
 	"flag"
 	"sync"
 
-	"github.com/go-kit/kit/log/level"
+	opentracing "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
@@ -49,8 +49,8 @@ type backgroundCache struct {
 }
 
 type backgroundWrite struct {
-	key string
-	buf []byte
+	keys []string
+	bufs [][]byte
 }
 
 // NewBackground returns a new Cache that does stores on background goroutines.
@@ -78,18 +78,19 @@ func (c *backgroundCache) Stop() error {
 }
 
 // Store writes keys for the cache in the background.
-func (c *backgroundCache) Store(ctx context.Context, key string, buf []byte) error {
+func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	bgWrite := backgroundWrite{
-		key: key,
-		buf: buf,
+		keys: keys,
+		bufs: bufs,
 	}
 	select {
 	case c.bgWrites <- bgWrite:
-		queueLength.Inc()
+		queueLength.Add(float64(len(keys)))
 	default:
-		droppedWriteBack.Inc()
+		sp := opentracing.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("dropped", len(keys)))
+		droppedWriteBack.Add(float64(len(keys)))
 	}
-	return nil
 }
 
 func (c *backgroundCache) writeBackLoop() {
@@ -101,11 +102,9 @@ func (c *backgroundCache) writeBackLoop() {
 			if !ok {
 				return
 			}
-			queueLength.Dec()
-			err := c.Cache.Store(context.Background(), bgWrite.key, bgWrite.buf)
-			if err != nil {
-				level.Error(util.Logger).Log("msg", "error writing to memcache", "err", err)
-			}
+			queueLength.Sub(float64(len(bgWrite.keys)))
+			c.Cache.Store(context.Background(), bgWrite.keys, bgWrite.bufs)
+
 		case <-c.quit:
 			return
 		}
diff --git a/cache/background_test.go b/cache/background_test.go
index 2276c764ad4e9..be447a09a6c0e 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -13,14 +13,15 @@ type mockCache struct {
 	cache map[string][]byte
 }
 
-func (m *mockCache) Store(_ context.Context, key string, buf []byte) error {
+func (m *mockCache) Store(_ context.Context, keys []string, bufs [][]byte) {
 	m.Lock()
 	defer m.Unlock()
-	m.cache[key] = buf
-	return nil
+	for i := range keys {
+		m.cache[keys[i]] = bufs[i]
+	}
 }
 
-func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
+func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string) {
 	m.Lock()
 	defer m.Unlock()
 	for _, key := range keys {
diff --git a/cache/cache.go b/cache/cache.go
index 769555e2ea626..a21f29cdf9c7b 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -7,8 +7,8 @@ import (
 
 // Cache byte arrays by key.
 type Cache interface {
-	Store(ctx context.Context, key string, buf []byte) error
-	Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error)
+	Store(ctx context.Context, key []string, buf [][]byte)
+	Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string)
 	Stop() error
 }
 
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 7f759e3c78334..856e05bf9e353 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -23,6 +23,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 
 	// put 100 chunks from 0 to 99
 	keys := []string{}
+	bufs := [][]byte{}
 	chunks := []chunk.Chunk{}
 	for i := 0; i < 100; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
@@ -35,7 +36,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			promChunk[0],
 			ts,
@@ -45,14 +46,12 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 		buf, err := c.Encode()
 		require.NoError(t, err)
 
-		key := c.ExternalKey()
-		err = cache.Store(context.Background(), key, buf)
-		require.NoError(t, err)
-
-		keys = append(keys, key)
+		keys = append(keys, c.ExternalKey())
+		bufs = append(bufs, buf)
 		chunks = append(chunks, c)
 	}
 
+	cache.Store(context.Background(), keys, bufs)
 	return keys, chunks
 }
 
@@ -61,8 +60,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 		index := rand.Intn(len(keys))
 		key := keys[index]
 
-		found, bufs, missingKeys, err := cache.Fetch(context.Background(), []string{key})
-		require.NoError(t, err)
+		found, bufs, missingKeys := cache.Fetch(context.Background(), []string{key})
 		require.Len(t, found, 1)
 		require.Len(t, bufs, 1)
 		require.Len(t, missingKeys, 0)
@@ -77,8 +75,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 
 func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []chunk.Chunk) {
 	// test getting them all
-	found, bufs, missingKeys, err := cache.Fetch(context.Background(), keys)
-	require.NoError(t, err)
+	found, bufs, missingKeys := cache.Fetch(context.Background(), keys)
 	require.Len(t, found, len(keys))
 	require.Len(t, bufs, len(keys))
 	require.Len(t, missingKeys, 0)
@@ -117,8 +114,7 @@ func (a byExternalKey) Less(i, j int) bool { return a[i].ExternalKey() < a[j].Ex
 func testCacheMiss(t *testing.T, cache cache.Cache) {
 	for i := 0; i < 100; i++ {
 		key := strconv.Itoa(rand.Int())
-		found, bufs, missing, err := cache.Fetch(context.Background(), []string{key})
-		require.NoError(t, err)
+		found, bufs, missing := cache.Fetch(context.Background(), []string{key})
 		require.Empty(t, found)
 		require.Empty(t, bufs)
 		require.Len(t, missing, 1)
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 0a2ae705b4c9b..7dbf72c8085b0 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -9,9 +9,14 @@ import (
 	"os"
 	"sync"
 
+	"github.com/go-kit/kit/log/level"
+	opentracing "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/tsdb/fileutil"
 	"golang.org/x/sys/unix"
+
+	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // TODO: in the future we could cuckoo hash or linear probe.
@@ -79,7 +84,7 @@ func (d *Diskcache) Stop() error {
 }
 
 // Fetch get chunks from the cache.
-func (d *Diskcache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+func (d *Diskcache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	for _, key := range keys {
 		buf, ok := d.fetch(key)
 		if ok {
@@ -115,24 +120,30 @@ func (d *Diskcache) fetch(key string) ([]byte, bool) {
 }
 
 // Store puts a chunk into the cache.
-func (d *Diskcache) Store(ctx context.Context, key string, value []byte) error {
+func (d *Diskcache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	sp := opentracing.SpanFromContext(ctx)
+
 	d.mtx.Lock()
 	defer d.mtx.Unlock()
 
-	bucket := hash(key) % d.buckets
-	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
+	for i := range keys {
+		bucket := hash(keys[i]) % d.buckets
+		buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
 
-	n, err := put([]byte(key), buf, 0)
-	if err != nil {
-		return err
-	}
+		n, err := put([]byte(keys[i]), buf, 0)
+		if err != nil {
+			sp.LogFields(otlog.Error(err))
+			level.Error(util.Logger).Log("msg", "failed to put key to diskcache", "err", err)
+			continue
+		}
 
-	_, err = put(value, buf, n)
-	if err != nil {
-		return err
+		_, err = put(bufs[i], buf, n)
+		if err != nil {
+			sp.LogFields(otlog.Error(err))
+			level.Error(util.Logger).Log("msg", "failed to put value to diskcache", "err", err)
+			continue
+		}
 	}
-
-	return nil
 }
 
 func put(value []byte, buf []byte, n int) (int, error) {
diff --git a/cache/instrumented.go b/cache/instrumented.go
index fd075d1946a16..1d194d74ad463 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -2,7 +2,6 @@ package cache
 
 import (
 	"context"
-	"time"
 
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
@@ -40,17 +39,6 @@ func init() {
 
 // Instrument returns an instrumented cache.
 func Instrument(name string, cache Cache) Cache {
-	return &instrumentedCache{
-		name:        name,
-		fetchedKeys: fetchedKeys.WithLabelValues(name),
-		hits:        hits.WithLabelValues(name),
-		trace:       true,
-		Cache:       cache,
-	}
-}
-
-// MetricsInstrument returns an instrumented cache that only tracks metrics and not traces.
-func MetricsInstrument(name string, cache Cache) Cache {
 	return &instrumentedCache{
 		name:        name,
 		fetchedKeys: fetchedKeys.WithLabelValues(name),
@@ -62,73 +50,41 @@ func MetricsInstrument(name string, cache Cache) Cache {
 type instrumentedCache struct {
 	name              string
 	fetchedKeys, hits prometheus.Counter
-	trace             bool
 	Cache
 }
 
-func (i *instrumentedCache) Store(ctx context.Context, key string, buf []byte) error {
+func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	method := i.name + ".store"
-	if i.trace {
-		return instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
-			sp := ot.SpanFromContext(ctx)
-			sp.LogFields(otlog.String("key", key))
-
-			return i.Cache.Store(ctx, key, buf)
-		})
-	}
-
-	return UntracedCollectedRequest(ctx, method, instr.NewHistogramCollector(requestDuration), instr.ErrorCode, func(ctx context.Context) error {
-		return i.Cache.Store(ctx, key, buf)
+	instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+		sp := ot.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("keys", len(keys)))
+		i.Cache.Store(ctx, keys, bufs)
+		return nil
 	})
 }
 
-func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string) {
 	var (
 		found   []string
 		bufs    [][]byte
 		missing []string
-		err     error
 		method  = i.name + ".fetch"
 	)
 
-	if i.trace {
-		err = instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
-			sp := ot.SpanFromContext(ctx)
-			sp.LogFields(otlog.Int("keys requested", len(keys)))
-
-			var err error
-			found, bufs, missing, err = i.Cache.Fetch(ctx, keys)
-
-			if err == nil {
-				sp.LogFields(otlog.Int("keys found", len(found)), otlog.Int("keys missing", len(keys)-len(found)))
-			}
+	instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+		sp := ot.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
-			return err
-		})
-	} else {
-		err = UntracedCollectedRequest(ctx, method, instr.NewHistogramCollector(requestDuration), instr.ErrorCode, func(ctx context.Context) error {
-			var err error
-			found, bufs, missing, err = i.Cache.Fetch(ctx, keys)
-
-			return err
-		})
-	}
+		found, bufs, missing = i.Cache.Fetch(ctx, keys)
+		sp.LogFields(otlog.Int("keys found", len(found)), otlog.Int("keys missing", len(keys)-len(found)))
+		return nil
+	})
 
 	i.fetchedKeys.Add(float64(len(keys)))
 	i.hits.Add(float64(len(found)))
-	return found, bufs, missing, err
+	return found, bufs, missing
 }
 
 func (i *instrumentedCache) Stop() error {
 	return i.Cache.Stop()
 }
-
-// UntracedCollectedRequest is the same as instr.CollectedRequest but without any tracing.
-func UntracedCollectedRequest(ctx context.Context, method string, col instr.Collector, toStatusCode func(error) string, f func(context.Context) error) error {
-	start := time.Now()
-	col.Before(method, start)
-	err := f(ctx)
-	col.After(method, toStatusCode(err), start)
-
-	return err
-}
diff --git a/cache/memcached.go b/cache/memcached.go
index 2f40891e4f93f..4536db5fd13d1 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/go-kit/kit/log/level"
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
@@ -72,7 +73,7 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient) *Memcached {
 				res := &result{
 					batchID: input.batchID,
 				}
-				res.found, res.bufs, res.missed, res.err = c.fetch(input.ctx, input.keys)
+				res.found, res.bufs, res.missed = c.fetch(input.ctx, input.keys)
 				input.resultCh <- res
 			}
 
@@ -94,7 +95,6 @@ type result struct {
 	found   []string
 	bufs    [][]byte
 	missed  []string
-	err     error
 	batchID int // For ordering results.
 }
 
@@ -113,37 +113,38 @@ func memcacheStatusCode(err error) string {
 }
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
-func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
-	err = instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
-		sp := opentracing.SpanFromContext(ctx)
-		sp.LogFields(otlog.Int("keys requested", len(keys)))
-		defer func() {
-			sp.LogFields(otlog.Int("keys found", len(found)), otlog.Int("keys missing", len(missed)))
-		}()
-
-		var err error
+func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
+	instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
 		if c.cfg.BatchSize == 0 {
-			found, bufs, missed, err = c.fetch(ctx, keys)
-			return err
+			found, bufs, missed = c.fetch(ctx, keys)
+			return nil
 		}
 
-		found, bufs, missed, err = c.fetchKeysBatched(ctx, keys)
-		return err
+		found, bufs, missed = c.fetchKeysBatched(ctx, keys)
+		return nil
 	})
-
 	return
 }
 
-func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	err = UntracedCollectedRequest(ctx, "Memcache.GetMulti", instr.NewHistogramCollector(memcacheRequestDuration), memcacheStatusCode, func(_ context.Context) error {
+	err := instr.TimeRequestHistogramStatus(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		sp := opentracing.SpanFromContext(ctx)
+		sp.LogFields(otlog.Int("keys requested", len(keys)))
+
 		var err error
 		items, err = c.memcache.GetMulti(keys)
+
+		sp.LogFields(otlog.Int("keys found", len(items)))
+		if err != nil {
+			sp.LogFields(otlog.Error(err))
+		}
 		return err
 	})
 
 	if err != nil {
 		missed = keys
+		level.Error(util.Logger).Log("msg", "Failed to get keys from memcached", "err", err)
 		return
 	}
 
@@ -159,7 +160,7 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 	return
 }
 
-func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string, err error) {
+func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	resultsCh := make(chan *result)
 	batchSize := c.cfg.BatchSize
 
@@ -191,10 +192,6 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 	close(resultsCh)
 
 	for _, result := range results {
-		if result.err != nil {
-			err = result.err
-		}
-
 		found = append(found, result.found...)
 		bufs = append(bufs, result.bufs...)
 		missed = append(missed, result.missed...)
@@ -204,15 +201,22 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 }
 
 // Store stores the key in the cache.
-func (c *Memcached) Store(ctx context.Context, key string, buf []byte) error {
-	return instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
-		item := memcache.Item{
-			Key:        key,
-			Value:      buf,
-			Expiration: int32(c.cfg.Expiration.Seconds()),
+func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	for i := range keys {
+		err := instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+			item := memcache.Item{
+				Key:        keys[i],
+				Value:      bufs[i],
+				Expiration: int32(c.cfg.Expiration.Seconds()),
+			}
+			return c.memcache.Set(&item)
+		})
+		if err != nil {
+			sp := opentracing.SpanFromContext(ctx)
+			sp.LogFields(otlog.Error(err))
+			level.Error(util.Logger).Log("msg", "failed to put to diskcache", "err", err)
 		}
-		return c.memcache.Set(&item)
-	})
+	}
 }
 
 // Stop does nothing.
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index b003e6a2d07c9..76b844ec3572f 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -34,19 +34,23 @@ func testMemcache(t *testing.T, memcache *cache.Memcached) {
 	numKeys := 1000
 
 	ctx := context.Background()
+	keysIncMissing := make([]string, 0, numKeys)
 	keys := make([]string, 0, numKeys)
+	bufs := make([][]byte, 0, numKeys)
+
 	// Insert 1000 keys skipping all multiples of 5.
 	for i := 0; i < numKeys; i++ {
-		keys = append(keys, string(i))
+		keysIncMissing = append(keysIncMissing, string(i))
 		if i%5 == 0 {
 			continue
 		}
 
-		require.NoError(t, memcache.Store(ctx, string(i), []byte(string(i))))
+		keys = append(keys, string(i))
+		bufs = append(bufs, []byte(string(i)))
 	}
+	memcache.Store(ctx, keys, bufs)
 
-	found, bufs, missing, err := memcache.Fetch(ctx, keys)
-	require.NoError(t, err)
+	found, bufs, missing := memcache.Fetch(ctx, keysIncMissing)
 	for i := 0; i < numKeys; i++ {
 		if i%5 == 0 {
 			require.Equal(t, string(i), missing[0])
@@ -105,19 +109,22 @@ func testMemcacheFailing(t *testing.T, memcache *cache.Memcached) {
 	numKeys := 1000
 
 	ctx := context.Background()
+	keysIncMissing := make([]string, 0, numKeys)
 	keys := make([]string, 0, numKeys)
+	bufs := make([][]byte, 0, numKeys)
 	// Insert 1000 keys skipping all multiples of 5.
 	for i := 0; i < numKeys; i++ {
-		keys = append(keys, string(i))
+		keysIncMissing = append(keysIncMissing, string(i))
 		if i%5 == 0 {
 			continue
 		}
-
-		require.NoError(t, memcache.Store(ctx, string(i), []byte(string(i))))
+		keys = append(keys, string(i))
+		bufs = append(bufs, []byte(string(i)))
 	}
+	memcache.Store(ctx, keys, bufs)
 
 	for i := 0; i < 10; i++ {
-		found, bufs, missing, _ := memcache.Fetch(ctx, keys)
+		found, bufs, missing := memcache.Fetch(ctx, keysIncMissing)
 
 		require.Equal(t, len(found), len(bufs))
 		for i := range found {
diff --git a/cache/tiered.go b/cache/tiered.go
index 65854c096ed4b..07bc6e4fe5924 100644
--- a/cache/tiered.go
+++ b/cache/tiered.go
@@ -13,35 +13,28 @@ func NewTiered(caches []Cache) Cache {
 	return tiered(caches)
 }
 
-func (t tiered) Store(ctx context.Context, key string, buf []byte) error {
+func (t tiered) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	for _, c := range []Cache(t) {
-		if err := c.Store(ctx, key, buf); err != nil {
-			return err
-		}
+		c.Store(ctx, keys, bufs)
 	}
-	return nil
 }
 
-func (t tiered) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string, error) {
+func (t tiered) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string) {
 	found := make(map[string][]byte, len(keys))
 	missing := keys
 	previousCaches := make([]Cache, 0, len(t))
 
 	for _, c := range []Cache(t) {
 		var (
-			err      error
 			passKeys []string
 			passBufs [][]byte
 		)
 
-		passKeys, passBufs, missing, err = c.Fetch(ctx, missing)
-		if err != nil {
-			return nil, nil, nil, err
-		}
+		passKeys, passBufs, missing = c.Fetch(ctx, missing)
+		tiered(previousCaches).Store(ctx, passKeys, passBufs)
 
 		for i, key := range passKeys {
 			found[key] = passBufs[i]
-			tiered(previousCaches).Store(ctx, key, passBufs[i])
 		}
 
 		if len(missing) == 0 {
@@ -60,7 +53,7 @@ func (t tiered) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, [
 		}
 	}
 
-	return resultKeys, resultBufs, missing, nil
+	return resultKeys, resultBufs, missing
 }
 
 func (t tiered) Stop() error {
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
index 0657cb3d12816..3c158f77f7107 100644
--- a/cache/tiered_test.go
+++ b/cache/tiered_test.go
@@ -23,14 +23,10 @@ func TestTiered(t *testing.T) {
 	level1, level2 := newMockCache(), newMockCache()
 	cache := cache.NewTiered([]cache.Cache{level1, level2})
 
-	err := level1.Store(context.Background(), "key1", []byte("hello"))
-	require.NoError(t, err)
+	level1.Store(context.Background(), []string{"key1"}, [][]byte{[]byte("hello")})
+	level2.Store(context.Background(), []string{"key2"}, [][]byte{[]byte("world")})
 
-	err = level2.Store(context.Background(), "key2", []byte("world"))
-	require.NoError(t, err)
-
-	keys, bufs, missing, err := cache.Fetch(context.Background(), []string{"key1", "key2", "key3"})
-	require.NoError(t, err)
+	keys, bufs, missing := cache.Fetch(context.Background(), []string{"key1", "key2", "key3"})
 	require.Equal(t, []string{"key1", "key2"}, keys)
 	require.Equal(t, [][]byte{[]byte("hello"), []byte("world")}, bufs)
 	require.Equal(t, []string{"key3"}, missing)
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 14f39aed504b6..d362f63256d8d 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -143,10 +143,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
-	cacheHits, cacheBufs, _, err := c.cache.Fetch(ctx, keys)
-	if err != nil {
-		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
-	}
+	cacheHits, cacheBufs, _ := c.cache.Fetch(ctx, keys)
 
 	fromCache, missing, err := c.processCacheResponse(chunks, cacheHits, cacheBufs)
 	if err != nil {
@@ -172,15 +169,20 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 }
 
 func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
+	keys := make([]string, 0, len(chunks))
+	bufs := make([][]byte, 0, len(chunks))
 	for i := range chunks {
 		encoded, err := chunks[i].Encode()
+		// TODO don't fail, just log and conitnue?
 		if err != nil {
 			return err
 		}
-		if err := c.cache.Store(ctx, chunks[i].ExternalKey(), encoded); err != nil {
-			return err
-		}
+
+		keys = append(keys, chunks[i].ExternalKey())
+		bufs = append(bufs, encoded)
 	}
+
+	c.cache.Store(ctx, keys, bufs)
 	return nil
 }
 
diff --git a/storage/factory.go b/storage/factory.go
index 8cf7c34facc40..4b6704e0f9c92 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -45,13 +45,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
 	var caches []cache.Cache
 	if cfg.IndexCacheSize > 0 {
-		fifocache := cache.MetricsInstrument("fifo-index", cache.NewFifoCache("index", cfg.IndexCacheSize, cfg.IndexCacheValidity))
+		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cfg.IndexCacheSize, cfg.IndexCacheValidity))
 		caches = append(caches, fifocache)
 	}
 
 	if cfg.memcacheClient.Host != "" {
 		client := cache.NewMemcachedClient(cfg.memcacheClient)
-		memcache := cache.MetricsInstrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
+		memcache := cache.Instrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
 			Expiration: cfg.IndexCacheValidity,
 		}, client))
 		caches = append(caches, cache.NewBackground(cache.BackgroundConfig{

From 52518ca65eeb9c303e3c9dd077688f9ed10e541b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 18 Sep 2018 17:00:51 +0200
Subject: [PATCH 166/660] Fix up FifoCache interface

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go               |  6 ++++++
 cache/fifo_cache.go               | 26 +++++++++++++++++---------
 cache/fifo_cache_test.go          | 23 ++++++++++++++++-------
 series_store.go                   |  7 ++++++-
 storage/caching_storage_client.go |  8 ++------
 5 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 856e05bf9e353..f739df05174ed 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -8,6 +8,7 @@ import (
 	"sort"
 	"strconv"
 	"testing"
+	"time"
 
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
@@ -164,3 +165,8 @@ func TestDiskcache(t *testing.T) {
 	require.NoError(t, err)
 	testCache(t, cache)
 }
+
+func TestFifoCache(t *testing.T) {
+	cache := cache.NewFifoCache("test", 1e3, 1*time.Hour)
+	testCache(t, cache)
+}
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 9d88e357bb362..74a6d666eea04 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -99,15 +99,8 @@ func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
 	}
 }
 
-// Store implements Cache.
-func (c *FifoCache) Store(ctx context.Context, key string, buf []byte) error {
-	c.Put(ctx, key, buf)
-
-	return nil
-}
-
 // Fetch implements Cache.
-func (c *FifoCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string, err error) {
+func (c *FifoCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string) {
 	found, missing, bufs = make([]string, 0, len(keys)), make([]string, 0, len(keys)), make([][]byte, 0, len(keys))
 	for _, key := range keys {
 		val, ok := c.Get(ctx, key)
@@ -123,13 +116,22 @@ func (c *FifoCache) Fetch(ctx context.Context, keys []string) (found []string, b
 	return
 }
 
+// Store implements Cache.
+func (c *FifoCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	values := make([]interface{}, 0, len(bufs))
+	for _, buf := range bufs {
+		values = append(values, buf)
+	}
+	c.Put(ctx, keys, values)
+}
+
 // Stop implements Cache.
 func (c *FifoCache) Stop() error {
 	return nil
 }
 
 // Put stores the value against the key.
-func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
+func (c *FifoCache) Put(ctx context.Context, keys []string, values []interface{}) {
 	c.entriesAdded.Inc()
 	if c.size == 0 {
 		return
@@ -138,6 +140,12 @@ func (c *FifoCache) Put(ctx context.Context, key string, value interface{}) {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
+	for i := range keys {
+		c.put(ctx, keys[i], values[i])
+	}
+}
+
+func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 	// See if we already have the entry
 	index, ok := c.index[key]
 	if ok {
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index 038103deebfa0..72e2e5cf4a8b6 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -18,10 +18,13 @@ func TestFifoCache(t *testing.T) {
 	ctx := context.Background()
 
 	// Check put / get works
+	keys := []string{}
+	values := []interface{}{}
 	for i := 0; i < size; i++ {
-		c.Put(ctx, strconv.Itoa(i), i)
-		//c.print()
+		keys = append(keys, strconv.Itoa(i))
+		values = append(values, i)
 	}
+	c.Put(ctx, keys, values)
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
@@ -32,10 +35,13 @@ func TestFifoCache(t *testing.T) {
 	}
 
 	// Check evictions
+	keys = []string{}
+	values = []interface{}{}
 	for i := size; i < size+overwrite; i++ {
-		c.Put(ctx, strconv.Itoa(i), i)
-		//c.print()
+		keys = append(keys, strconv.Itoa(i))
+		values = append(values, i)
 	}
+	c.Put(ctx, keys, values)
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
@@ -50,10 +56,13 @@ func TestFifoCache(t *testing.T) {
 	}
 
 	// Check updates work
+	keys = []string{}
+	values = []interface{}{}
 	for i := size; i < size+overwrite; i++ {
-		c.Put(ctx, strconv.Itoa(i), i*2)
-		//c.print()
+		keys = append(keys, strconv.Itoa(i))
+		values = append(values, i*2)
 	}
+	c.Put(ctx, keys, values)
 	require.Len(t, c.index, size)
 	require.Len(t, c.entries, size)
 
@@ -68,7 +77,7 @@ func TestFifoCacheExpiry(t *testing.T) {
 	c := NewFifoCache("test", size, 5*time.Millisecond)
 	ctx := context.Background()
 
-	c.Put(ctx, "0", 0)
+	c.Put(ctx, []string{"0"}, []interface{}{0})
 
 	value, ok := c.Get(ctx, "0")
 	require.True(t, ok)
diff --git a/series_store.go b/series_store.go
index 03bb07b576a84..ee5d6f2528ad5 100644
--- a/series_store.go
+++ b/series_store.go
@@ -241,9 +241,14 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	level.Debug(log).Log("entries", len(entries))
 
 	// TODO This is not correct, will overcount for queries > 24hrs
+	keys := make([]string, 0, len(queries))
+	values := make([]interface{}, 0, len(queries))
 	for _, query := range queries {
-		c.cardinalityCache.Put(ctx, query.HashValue, len(entries))
+		keys = append(keys, query.HashValue)
+		values = append(values, len(entries))
 	}
+	c.cardinalityCache.Put(ctx, keys, values)
+
 	if len(entries) > c.cfg.CardinalityLimit {
 		return nil, errCardinalityExceeded
 	}
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 770e68c0a0b1a..d4caf4580754d 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -61,7 +61,7 @@ func (c *indexCache) Store(ctx context.Context, key string, val ReadBatch) {
 
 	// We're doing the hashing to handle unicode and key len properly.
 	// Memcache fails for unicode keys and keys longer than 250 Bytes.
-	c.Cache.Store(ctx, hashKey(key), out)
+	c.Cache.Store(ctx, []string{hashKey(key)}, [][]byte{out})
 	return
 }
 
@@ -84,11 +84,7 @@ func (c *indexCache) Fetch(ctx context.Context, keys []string) (batches []ReadBa
 	// Look up the hashes in a single batch.  If we get an error, we just "miss" all
 	// of the keys.  Eventually I want to push all the errors to the leafs of the cache
 	// tree, to the caches only return found & missed.
-	foundHashes, bufs, _, err := c.Cache.Fetch(ctx, hashes)
-	if err != nil {
-		level.Warn(util.Logger).Log("msg", "error fetching index entries", "err", err)
-		return nil, keys
-	}
+	foundHashes, bufs, _ := c.Cache.Fetch(ctx, hashes)
 
 	// Reverse the hash, unmarshal the index entries, check we got what we expected
 	// and that its still valid.

From 0cd774805f85dc587f29689f60fbc43ea6df03f6 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 18 Sep 2018 18:03:05 +0200
Subject: [PATCH 167/660] Batch index cache puts too.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go | 47 ++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index d4caf4580754d..57a6f0be942bf 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -42,7 +42,7 @@ var (
 
 // IndexCache describes the cache for the Index.
 type IndexCache interface {
-	Store(ctx context.Context, key string, val ReadBatch)
+	Store(ctx context.Context, keys []string, batches []ReadBatch)
 	Fetch(ctx context.Context, keys []string) (batches []ReadBatch, misses []string)
 	Stop() error
 }
@@ -51,17 +51,25 @@ type indexCache struct {
 	cache.Cache
 }
 
-func (c *indexCache) Store(ctx context.Context, key string, val ReadBatch) {
-	cachePuts.Inc()
-	out, err := proto.Marshal(&val)
-	if err != nil {
-		cacheEncodeErrs.Inc()
-		return
-	}
+func (c *indexCache) Store(ctx context.Context, keys []string, batches []ReadBatch) {
+	cachePuts.Add(float64(len(keys)))
 
 	// We're doing the hashing to handle unicode and key len properly.
 	// Memcache fails for unicode keys and keys longer than 250 Bytes.
-	c.Cache.Store(ctx, []string{hashKey(key)}, [][]byte{out})
+	hashed := make([]string, 0, len(keys))
+	bufs := make([][]byte, 0, len(batches))
+	for i := range keys {
+		hashed = append(hashed, hashKey(keys[i]))
+		out, err := proto.Marshal(&batches[i])
+		if err != nil {
+			level.Warn(util.Logger).Log("msg", "error marshaling ReadBatch", "err", err)
+			cacheEncodeErrs.Inc()
+			return
+		}
+		bufs = append(bufs, out)
+	}
+
+	c.Cache.Store(ctx, hashed, bufs)
 	return
 }
 
@@ -212,14 +220,21 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		return err
 	}
 
-	resultsMtx.Lock()
-	defer resultsMtx.Unlock()
-	for key, batch := range results {
-		queries := queriesByKey[key]
-		for _, query := range queries {
-			callback(query, batch)
+	{
+		resultsMtx.Lock()
+		defer resultsMtx.Unlock()
+		keys := make([]string, 0, len(results))
+		batches := make([]ReadBatch, 0, len(results))
+		for key, batch := range results {
+			keys = append(keys, key)
+			batches = append(batches, batch)
+
+			queries := queriesByKey[key]
+			for _, query := range queries {
+				callback(query, batch)
+			}
 		}
-		s.cache.Store(ctx, key, batch)
+		s.cache.Store(ctx, keys, batches)
 	}
 	return nil
 }

From 7854ca015d26ae49521d21bfe6031af3da251883 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 18 Sep 2018 18:13:16 +0200
Subject: [PATCH 168/660] gofmt 1.10

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index f739df05174ed..44c014039be6f 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -37,7 +37,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			promChunk[0],
 			ts,

From 56a883b5a9ba599aa93f893bddc8ad8b2226004e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 23 Sep 2018 11:57:50 +0100
Subject: [PATCH 169/660] Memcache will return partial results on error.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/memcached.go | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index 4536db5fd13d1..43d7791bb12f8 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -128,7 +128,7 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	err := instr.TimeRequestHistogramStatus(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+	instr.TimeRequestHistogramStatus(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
 		sp := opentracing.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
@@ -136,18 +136,15 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 		items, err = c.memcache.GetMulti(keys)
 
 		sp.LogFields(otlog.Int("keys found", len(items)))
+
+		// Memcached returns partial results even on error.
 		if err != nil {
 			sp.LogFields(otlog.Error(err))
+			level.Error(util.Logger).Log("msg", "Failed to get keys from memcached", "err", err)
 		}
 		return err
 	})
 
-	if err != nil {
-		missed = keys
-		level.Error(util.Logger).Log("msg", "Failed to get keys from memcached", "err", err)
-		return
-	}
-
 	for _, key := range keys {
 		item, ok := items[key]
 		if ok {

From 9144e1d640f26282e8de3b45babbfb760e2ae43e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 27 Sep 2018 13:18:27 +0000
Subject: [PATCH 170/660] Differentiate SeriesStore from ChunkStore in logs

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 series_store.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/series_store.go b/series_store.go
index ee5d6f2528ad5..11754948b759c 100644
--- a/series_store.go
+++ b/series_store.go
@@ -74,7 +74,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 
 // Get implements Store
 func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+	log, ctx := newSpanLogger(ctx, "SeriesStore.Get")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
@@ -138,7 +138,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 }
 
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
+	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
 
 	// Just get series for metric if there are no matchers
@@ -202,7 +202,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 }
 
 func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, metricName string, matcher *labels.Matcher) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
+	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
 	defer log.Span.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)
@@ -263,7 +263,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 }
 
 func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, seriesIDs []string) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksBySeries")
+	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupChunksBySeries")
 	defer log.Span.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)

From 85c8fce6f55073d5161b8878f7a1251579355fb1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 27 Sep 2018 13:18:47 +0000
Subject: [PATCH 171/660] Move comment to better place

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 series_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/series_store.go b/series_store.go
index 11754948b759c..1ac1c23ee1522 100644
--- a/series_store.go
+++ b/series_store.go
@@ -109,11 +109,11 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 	level.Debug(log).Log("chunk-ids", len(chunkIDs))
 
-	// Filter out chunks that are not in the selected time range.
 	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
 	if err != nil {
 		return nil, err
 	}
+	// Filter out chunks that are not in the selected time range.
 	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("chunks-post-filtering", len(chunks))
 	chunksPerQuery.Observe(float64(len(filtered)))

From cf1f4ca41ee582f10e500d729f74288b90bbdbf3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 27 Sep 2018 13:25:26 +0000
Subject: [PATCH 172/660] Include the chunk ID that caused an error

otherwise the error can be very frustrating.

We don't need an explicit WithStack() now we call errors.Errorf().

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/chunk.go b/chunk.go
index 7bd15c8b711c2..2e1158131c415 100644
--- a/chunk.go
+++ b/chunk.go
@@ -25,7 +25,6 @@ import (
 
 // Errors that decode can return
 const (
-	ErrInvalidChunkID  = errs.Error("invalid chunk ID")
 	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
 	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
 	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
@@ -33,6 +32,10 @@ const (
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
 
+func errInvalidChunkID(s string) error {
+	return errors.Errorf("invalid chunk ID %q", s)
+}
+
 // Chunk contains encoded timeseries data
 type Chunk struct {
 	// These two fields will be missing from older chunks (as will the hash).
@@ -107,7 +110,7 @@ func ParseExternalKey(userID, externalKey string) (Chunk, error) {
 func parseLegacyChunkID(userID, key string) (Chunk, error) {
 	parts := strings.Split(key, ":")
 	if len(parts) != 3 {
-		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
+		return Chunk{}, errInvalidChunkID(key)
 	}
 	fingerprint, err := strconv.ParseUint(parts[0], 10, 64)
 	if err != nil {
@@ -132,12 +135,12 @@ func parseLegacyChunkID(userID, key string) (Chunk, error) {
 func parseNewExternalKey(key string) (Chunk, error) {
 	parts := strings.Split(key, "/")
 	if len(parts) != 2 {
-		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
+		return Chunk{}, errInvalidChunkID(key)
 	}
 	userID := parts[0]
 	hexParts := strings.Split(parts[1], ":")
 	if len(hexParts) != 4 {
-		return Chunk{}, errors.WithStack(ErrInvalidChunkID)
+		return Chunk{}, errInvalidChunkID(key)
 	}
 	fingerprint, err := strconv.ParseUint(hexParts[0], 16, 64)
 	if err != nil {

From de5ff7e8142331af097148f0316e43095d367a8a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 27 Sep 2018 11:07:25 +0100
Subject: [PATCH 173/660] Add snappy cache middleware, fixes #1015

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go |  5 +++++
 cache/snappy.go     | 47 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 cache/snappy.go

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 44c014039be6f..a950192234d44 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -170,3 +170,8 @@ func TestFifoCache(t *testing.T) {
 	cache := cache.NewFifoCache("test", 1e3, 1*time.Hour)
 	testCache(t, cache)
 }
+
+func TestSnappyCache(t *testing.T) {
+	cache := cache.NewSnappy(cache.NewMockCache())
+	testCache(t, cache)
+}
diff --git a/cache/snappy.go b/cache/snappy.go
new file mode 100644
index 0000000000000..22395d4a43599
--- /dev/null
+++ b/cache/snappy.go
@@ -0,0 +1,47 @@
+package cache
+
+import (
+	"context"
+
+	"github.com/go-kit/kit/log/level"
+	"github.com/golang/snappy"
+	"github.com/weaveworks/cortex/pkg/util"
+)
+
+type snappyCache struct {
+	next Cache
+}
+
+// NewSnappy makes a new snappy encoding cache wrapper.
+func NewSnappy(next Cache) Cache {
+	return &snappyCache{
+		next: next,
+	}
+}
+
+func (s *snappyCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	cs := make([][]byte, 0, len(bufs))
+	for _, buf := range bufs {
+		c := snappy.Encode(nil, buf)
+		cs = append(cs, c)
+	}
+	s.next.Store(ctx, keys, cs)
+}
+
+func (s *snappyCache) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, []string) {
+	found, bufs, missing := s.next.Fetch(ctx, keys)
+	ds := make([][]byte, 0, len(bufs))
+	for _, buf := range bufs {
+		d, err := snappy.Decode(nil, buf)
+		if err != nil {
+			level.Error(util.Logger).Log("msg", "failed to decode cache entry", "err", err)
+			return nil, nil, keys
+		}
+		ds = append(ds, d)
+	}
+	return found, ds, missing
+}
+
+func (s *snappyCache) Stop() error {
+	return s.next.Stop()
+}

From db8fe9b7939e93c4025ba4ad7fd4cf5dd89fcaf3 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 10 Sep 2018 13:02:25 +0100
Subject: [PATCH 174/660] Align, split and cache queries in the frontend.

Split incoming requests by day and run them in parallel.

- Generic code to parse incoming query_range requests, mutate them and round trip them.
- Split queries along day boundaries, modulo step.
- Run queries in parallel and combine their results.
- Ensure we propagate org ids correctly; add e2e tests.
- Take care to ensure we propagate trace IDs correctly; involved updating weaveworks/common.

Query results caching.

- Align incoming queries with their step to make the results more cachable.
- Work out intersection of query and cached result and only query for difference.
- Cache query results using the key `hash(userid, promql query, step, day)`.
- Cache multiple, non-overlapping time ranges per day.  Don't cache the last 1 minute.
- Hash the key for the query results cache.
- Don't cache the last minute, as results can still be in flux.
- Make sure the trimmed results should also align with step.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/background_test.go          | 42 +----------------------------
 cache/memcached.go                | 13 ++++++++-
 cache/mock.go                     | 45 +++++++++++++++++++++++++++++++
 cache/tiered_test.go              |  4 +--
 storage/caching_storage_client.go | 14 ++--------
 5 files changed, 62 insertions(+), 56 deletions(-)
 create mode 100644 cache/mock.go

diff --git a/cache/background_test.go b/cache/background_test.go
index be447a09a6c0e..5a1933e8b4138 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -1,56 +1,16 @@
 package cache_test
 
 import (
-	"context"
-	"sync"
 	"testing"
 
 	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
-type mockCache struct {
-	sync.Mutex
-	cache map[string][]byte
-}
-
-func (m *mockCache) Store(_ context.Context, keys []string, bufs [][]byte) {
-	m.Lock()
-	defer m.Unlock()
-	for i := range keys {
-		m.cache[keys[i]] = bufs[i]
-	}
-}
-
-func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string) {
-	m.Lock()
-	defer m.Unlock()
-	for _, key := range keys {
-		buf, ok := m.cache[key]
-		if ok {
-			found = append(found, key)
-			bufs = append(bufs, buf)
-		} else {
-			missing = append(missing, key)
-		}
-	}
-	return
-}
-
-func (m *mockCache) Stop() error {
-	return nil
-}
-
-func newMockCache() cache.Cache {
-	return &mockCache{
-		cache: map[string][]byte{},
-	}
-}
-
 func TestBackground(t *testing.T) {
 	c := cache.NewBackground(cache.BackgroundConfig{
 		WriteBackGoroutines: 1,
 		WriteBackBuffer:     100,
-	}, newMockCache())
+	}, cache.NewMockCache())
 
 	keys, chunks := fillCache(t, c)
 	cache.Flush(c)
diff --git a/cache/memcached.go b/cache/memcached.go
index 43d7791bb12f8..331b7e2058973 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -2,7 +2,9 @@ package cache
 
 import (
 	"context"
+	"encoding/hex"
 	"flag"
+	"hash/fnv"
 	"sync"
 	"time"
 
@@ -211,7 +213,7 @@ func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 		if err != nil {
 			sp := opentracing.SpanFromContext(ctx)
 			sp.LogFields(otlog.Error(err))
-			level.Error(util.Logger).Log("msg", "failed to put to diskcache", "err", err)
+			level.Error(util.Logger).Log("msg", "failed to put to memcached", "err", err)
 		}
 	}
 }
@@ -226,3 +228,12 @@ func (c *Memcached) Stop() error {
 	c.wg.Wait()
 	return nil
 }
+
+// HashKey hashes key into something you can store in memcached.
+func HashKey(key string) string {
+	hasher := fnv.New64a()
+	hasher.Write([]byte(key)) // This'll never error.
+
+	// Hex because memcache errors for the bytes produced by the hash.
+	return hex.EncodeToString(hasher.Sum(nil))
+}
diff --git a/cache/mock.go b/cache/mock.go
new file mode 100644
index 0000000000000..6db73704ffb63
--- /dev/null
+++ b/cache/mock.go
@@ -0,0 +1,45 @@
+package cache
+
+import (
+	"context"
+	"sync"
+)
+
+type mockCache struct {
+	sync.Mutex
+	cache map[string][]byte
+}
+
+func (m *mockCache) Store(_ context.Context, keys []string, bufs [][]byte) {
+	m.Lock()
+	defer m.Unlock()
+	for i := range keys {
+		m.cache[keys[i]] = bufs[i]
+	}
+}
+
+func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string) {
+	m.Lock()
+	defer m.Unlock()
+	for _, key := range keys {
+		buf, ok := m.cache[key]
+		if ok {
+			found = append(found, key)
+			bufs = append(bufs, buf)
+		} else {
+			missing = append(missing, key)
+		}
+	}
+	return
+}
+
+func (m *mockCache) Stop() error {
+	return nil
+}
+
+// NewMockCache makes a new MockCache
+func NewMockCache() Cache {
+	return &mockCache{
+		cache: map[string][]byte{},
+	}
+}
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
index 3c158f77f7107..9385a04d30587 100644
--- a/cache/tiered_test.go
+++ b/cache/tiered_test.go
@@ -12,7 +12,7 @@ func TestTieredSimple(t *testing.T) {
 	for i := 1; i < 10; i++ {
 		caches := []cache.Cache{}
 		for j := 0; j <= i; j++ {
-			caches = append(caches, newMockCache())
+			caches = append(caches, cache.NewMockCache())
 		}
 		cache := cache.NewTiered(caches)
 		testCache(t, cache)
@@ -20,7 +20,7 @@ func TestTieredSimple(t *testing.T) {
 }
 
 func TestTiered(t *testing.T) {
-	level1, level2 := newMockCache(), newMockCache()
+	level1, level2 := cache.NewMockCache(), cache.NewMockCache()
 	cache := cache.NewTiered([]cache.Cache{level1, level2})
 
 	level1.Store(context.Background(), []string{"key1"}, [][]byte{[]byte("hello")})
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 57a6f0be942bf..a6671731b4e54 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -2,8 +2,6 @@ package storage
 
 import (
 	"context"
-	"encoding/hex"
-	"hash/fnv"
 	"sync"
 	"time"
 
@@ -59,7 +57,7 @@ func (c *indexCache) Store(ctx context.Context, keys []string, batches []ReadBat
 	hashed := make([]string, 0, len(keys))
 	bufs := make([][]byte, 0, len(batches))
 	for i := range keys {
-		hashed = append(hashed, hashKey(keys[i]))
+		hashed = append(hashed, cache.HashKey(keys[i]))
 		out, err := proto.Marshal(&batches[i])
 		if err != nil {
 			level.Warn(util.Logger).Log("msg", "error marshaling ReadBatch", "err", err)
@@ -80,7 +78,7 @@ func (c *indexCache) Fetch(ctx context.Context, keys []string) (batches []ReadBa
 	// the last hash.
 	hashedKeys := make(map[string]string, len(keys))
 	for _, key := range keys {
-		hashedKeys[hashKey(key)] = key
+		hashedKeys[cache.HashKey(key)] = key
 	}
 
 	// Build a list of hashes; could be less than keys due to collisions.
@@ -134,14 +132,6 @@ func (c *indexCache) Fetch(ctx context.Context, keys []string) (batches []ReadBa
 	return batches, missed
 }
 
-func hashKey(key string) string {
-	hasher := fnv.New64a()
-	hasher.Write([]byte(key)) // This'll never error.
-
-	// Hex because memcache errors for the bytes produced by the hash.
-	return hex.EncodeToString(hasher.Sum(nil))
-}
-
 type cachingStorageClient struct {
 	chunk.StorageClient
 	cache    IndexCache

From 17ae17a88771466677c07f2e54064865ae526637 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 28 Sep 2018 19:19:25 +0100
Subject: [PATCH 175/660] Lock round access to query results (#1042)

Now that QueryPages() runs across multiple goroutines, the callback
can be running in parallel so we need to lock round the single data
structure it is accessing.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index a0d0819e3f454..d4266a9a918de 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -5,6 +5,7 @@ import (
 	"flag"
 	"fmt"
 	"sort"
+	"sync"
 	"time"
 
 	"github.com/go-kit/kit/log/level"
@@ -347,9 +348,11 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+	var lock sync.Mutex
 	var entries []IndexEntry
 	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
 		iter := resp.Iterator()
+		lock.Lock()
 		for iter.Next() {
 			entries = append(entries, IndexEntry{
 				TableName:  query.TableName,
@@ -358,6 +361,7 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 				Value:      iter.Value(),
 			})
 		}
+		lock.Unlock()
 		return true
 	})
 	if err != nil {

From c17e19bf307c78799e795e49f06345850d4cd317 Mon Sep 17 00:00:00 2001
From: Malcolm Holmes <mdh@odoko.co.uk>
Date: Fri, 28 Sep 2018 17:35:15 +0100
Subject: [PATCH 176/660] Add a timeout option for Cassandra access

Signed-off-by: Malcolm Holmes <mdh@odoko.co.uk>
---
 cassandra/storage_client.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index b0a6922770bce..91d0bddd0d9b7 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -33,6 +33,7 @@ type Config struct {
 	auth                     bool
 	username                 string
 	password                 string
+	timeout                  time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -49,6 +50,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.StringVar(&cfg.password, "cassandra.password", "", "Password to use when connecting to cassandra.")
+	f.DurationVar(&cfg.timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
 }
 
 func (cfg *Config) session() (*gocql.Session, error) {
@@ -67,7 +69,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.Consistency = consistency
 	cluster.BatchObserver = observer{}
 	cluster.QueryObserver = observer{}
-
+	cluster.Timeout = cfg.timeout
 	cfg.setClusterConfig(cluster)
 
 	return cluster.CreateSession()

From 614411a064025a74d7e82727219f18264c3acacd Mon Sep 17 00:00:00 2001
From: Chris Marchbanks <csmarchbanks@gmail.com>
Date: Tue, 2 Oct 2018 11:12:31 -0600
Subject: [PATCH 177/660] Switch imports (#1038)

* Remove leftover build.BAZEL files
* Add test-exporter to .gitignore
* Rename weaveworks/cortex to cortexproject/cortex everywhere

Signed-off-by: Chris Marchbanks <csmarchbanks@gmail.com>
---
 aws/aws_autoscaling.go                 |  4 ++--
 aws/dynamodb_table_client.go           |  4 ++--
 aws/dynamodb_table_client_test.go      |  4 ++--
 aws/fixtures.go                        |  6 +++---
 aws/metrics_autoscaling.go             |  4 ++--
 aws/metrics_autoscaling_test.go        |  2 +-
 aws/mock.go                            |  2 +-
 aws/storage_client.go                  |  6 +++---
 aws/storage_client_test.go             |  2 +-
 cache/background_test.go               |  2 +-
 cache/cache_test.go                    |  6 +++---
 cache/diskcache.go                     |  2 +-
 cache/memcached.go                     |  2 +-
 cache/memcached_client.go              |  2 +-
 cache/memcached_test.go                |  2 +-
 cache/snappy.go                        |  2 +-
 cache/tiered_test.go                   |  2 +-
 cassandra/fixtures.go                  |  6 +++---
 cassandra/storage_client.go            |  4 ++--
 cassandra/table_client.go              |  2 +-
 chunk.go                               |  6 +++---
 chunk_store.go                         |  6 +++---
 chunk_store_test.go                    |  6 +++---
 chunk_store_utils.go                   |  4 ++--
 chunk_test.go                          |  4 ++--
 gcp/fixtures.go                        |  6 +++---
 gcp/instrumentation.go                 |  2 +-
 gcp/storage_client.go                  |  6 +++---
 gcp/table_client.go                    |  2 +-
 inmemory_storage_client.go             |  2 +-
 schema_config.go                       |  2 +-
 schema_test.go                         |  2 +-
 series_store.go                        |  6 +++---
 storage/by_key_test.go                 |  2 +-
 storage/caching_fixtures.go            |  8 ++++----
 storage/caching_storage_client.go      |  8 ++++----
 storage/caching_storage_client.proto   |  4 ++--
 storage/caching_storage_client_test.go |  4 ++--
 storage/factory.go                     | 12 ++++++------
 storage/index_test.go                  |  2 +-
 storage/storage_client_test.go         |  4 ++--
 storage/utils_test.go                  | 10 +++++-----
 table_manager.go                       |  2 +-
 table_manager_test.go                  |  2 +-
 testutils/testutils.go                 |  4 ++--
 util/util.go                           |  2 +-
 46 files changed, 92 insertions(+), 92 deletions(-)

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
index e367bcc8600d7..d32b9a8370ec3 100644
--- a/aws/aws_autoscaling.go
+++ b/aws/aws_autoscaling.go
@@ -10,9 +10,9 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/instrument"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 10e8c38e5fc6c..ea8ea73fa4450 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -10,9 +10,9 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"golang.org/x/time/rate"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/instrument"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // Pluggable auto-scaler implementation
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 2581cf4985bef..666e0b0846dba 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -14,8 +14,8 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 7bb34a55a091f..be46c1bef0e0d 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -4,10 +4,10 @@ import (
 	"fmt"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 type fixture struct {
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index d261f59642a96..32acabbe23cb9 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -13,8 +13,8 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 0534e4d899723..08eb87328ed0a 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -10,7 +10,7 @@ import (
 	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 func TestTableManagerMetricsAutoScaling(t *testing.T) {
diff --git a/aws/mock.go b/aws/mock.go
index 974963143dcfb..7f963c47a37ab 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -16,8 +16,8 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const arnPrefix = "arn:"
diff --git a/aws/storage_client.go b/aws/storage_client.go
index 4a94d9637ed54..396da0e05e290 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -26,12 +26,12 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
diff --git a/aws/storage_client_test.go b/aws/storage_client_test.go
index 5ef280ee9436b..976add5d8009f 100644
--- a/aws/storage_client_test.go
+++ b/aws/storage_client_test.go
@@ -6,7 +6,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 )
 
 const (
diff --git a/cache/background_test.go b/cache/background_test.go
index 5a1933e8b4138..7ffbd1500df23 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -3,7 +3,7 @@ package cache_test
 import (
 	"testing"
 
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestBackground(t *testing.T) {
diff --git a/cache/cache_test.go b/cache/cache_test.go
index a950192234d44..fc80152aea8f8 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -10,11 +10,11 @@ import (
 	"testing"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	prom_chunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 )
 
 const userID = "1"
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 7dbf72c8085b0..e0397bb4aaa9a 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -16,7 +16,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"golang.org/x/sys/unix"
 
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // TODO: in the future we could cuckoo hash or linear probe.
diff --git a/cache/memcached.go b/cache/memcached.go
index 331b7e2058973..e303e15e13473 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -9,12 +9,12 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	instr "github.com/weaveworks/common/instrument"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index eec25e5529681..01970cb50b630 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -9,8 +9,8 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // MemcachedClient interface exists for mocking memcacheClient.
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index 76b844ec3572f..dce7863a79439 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -7,8 +7,8 @@ import (
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
 func TestMemcached(t *testing.T) {
diff --git a/cache/snappy.go b/cache/snappy.go
index 22395d4a43599..2cf32b1f66d35 100644
--- a/cache/snappy.go
+++ b/cache/snappy.go
@@ -3,9 +3,9 @@ package cache
 import (
 	"context"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	"github.com/golang/snappy"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 type snappyCache struct {
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
index 9385a04d30587..a77457471692a 100644
--- a/cache/tiered_test.go
+++ b/cache/tiered_test.go
@@ -4,8 +4,8 @@ import (
 	"context"
 	"testing"
 
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
 func TestTieredSimple(t *testing.T) {
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index fbde830741c47..acffba4345444 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -5,10 +5,10 @@ import (
 	"flag"
 	"os"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index b0a6922770bce..5415a81021831 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -11,8 +11,8 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 const (
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 6fd058aad2dfe..1e683fc40ca4b 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -7,7 +7,7 @@ import (
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 type tableClient struct {
diff --git a/chunk.go b/chunk.go
index 2e1158131c415..0b5cb30e7c4d3 100644
--- a/chunk.go
+++ b/chunk.go
@@ -10,17 +10,17 @@ import (
 	"strings"
 	"sync"
 
+	prom_chunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 	"github.com/golang/snappy"
 	jsoniter "github.com/json-iterator/go"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
-	prom_chunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"github.com/weaveworks/cortex/pkg/prom1/storage/metric"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	errs "github.com/weaveworks/common/errors"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // Errors that decode can return
diff --git a/chunk_store.go b/chunk_store.go
index d4266a9a918de..a50fdd986ef5c 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -14,10 +14,10 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/weaveworks/common/user"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	"github.com/weaveworks/cortex/pkg/util"
-	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 var (
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 62e7e632920cd..eeeb00652f032 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -15,11 +15,11 @@ import (
 	"github.com/stretchr/testify/require"
 	"golang.org/x/net/context"
 
+	"github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
-	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
-	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 type schemaFactory func(cfg SchemaConfig) Schema
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index d362f63256d8d..99c4bcf43df72 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -12,8 +12,8 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const chunkDecodeParallelism = 16
diff --git a/chunk_test.go b/chunk_test.go
index 00438a4600aeb..66d252ec6172d 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -7,11 +7,11 @@ import (
 	"testing"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const userID = "userID"
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index d8339b379316e..ca1c3f48fd64b 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -10,9 +10,9 @@ import (
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 9aa8e071ba489..53701fc912b94 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -9,7 +9,7 @@ import (
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
 
-	"github.com/weaveworks/cortex/pkg/util/middleware"
+	"github.com/cortexproject/cortex/pkg/util/middleware"
 )
 
 var bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 77f20d81a6665..8593bc957bb08 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -12,10 +12,10 @@ import (
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/common/model"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 8e2b4359e0c7f..a7162a71e85e6 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -7,7 +7,7 @@ import (
 	"cloud.google.com/go/bigtable"
 	"google.golang.org/grpc/status"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 type tableClient struct {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 4978dede575cd..885066aea5454 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -7,9 +7,9 @@ import (
 	"sort"
 	"sync"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/common/model"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // MockStorage is a fake in-memory StorageClient.
diff --git a/schema_config.go b/schema_config.go
index 4ee6baa85b701..af9ea612846fb 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -8,8 +8,8 @@ import (
 
 	"github.com/prometheus/common/model"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/mtime"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
diff --git a/schema_test.go b/schema_test.go
index 81e36c961e8a6..3306de379155a 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -9,10 +9,10 @@ import (
 	"testing"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 type ByHashRangeKey []IndexEntry
diff --git a/series_store.go b/series_store.go
index 1ac1c23ee1522..aead4a4c98f4b 100644
--- a/series_store.go
+++ b/series_store.go
@@ -11,10 +11,10 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/weaveworks/common/user"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	"github.com/weaveworks/cortex/pkg/util"
-	"github.com/weaveworks/cortex/pkg/util/extract"
 )
 
 var (
diff --git a/storage/by_key_test.go b/storage/by_key_test.go
index 8d157b8095640..9d2b84f90dc72 100644
--- a/storage/by_key_test.go
+++ b/storage/by_key_test.go
@@ -1,7 +1,7 @@
 package storage
 
 import (
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 // ByKey allow you to sort chunks by ID
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 36e46bfaf0cf8..7da67e26cf916 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -3,11 +3,11 @@ package storage
 import (
 	"time"
 
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	"github.com/weaveworks/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 )
 
 type fixture struct {
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index a6671731b4e54..4b6ef3b8490c1 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -5,14 +5,14 @@ import (
 	"sync"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	proto "github.com/golang/protobuf/proto"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	chunk_util "github.com/weaveworks/cortex/pkg/chunk/util"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 var (
diff --git a/storage/caching_storage_client.proto b/storage/caching_storage_client.proto
index cc133b8288439..1c22c94c8ab51 100644
--- a/storage/caching_storage_client.proto
+++ b/storage/caching_storage_client.proto
@@ -8,8 +8,8 @@ option (gogoproto.marshaler_all) = true;
 option (gogoproto.unmarshaler_all) = true;
 
 message Entry {
-    bytes Column = 1 [(gogoproto.customtype) = "github.com/weaveworks/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
-    bytes Value = 2 [(gogoproto.customtype) = "github.com/weaveworks/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
+    bytes Column = 1 [(gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
+    bytes Value = 2 [(gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
 }
 
 message ReadBatch {
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index 1cc1f1391e587..ab19d9c5d19d8 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -5,10 +5,10 @@ import (
 	"testing"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
 )
 
 type mockStore struct {
diff --git a/storage/factory.go b/storage/factory.go
index 4b6704e0f9c92..baa04924bd463 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -7,14 +7,14 @@ import (
 	"strings"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/aws"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
+	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/aws"
-	"github.com/weaveworks/cortex/pkg/chunk/cache"
-	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
-	"github.com/weaveworks/cortex/pkg/chunk/gcp"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 // Config chooses which storage client to use.
diff --git a/storage/index_test.go b/storage/index_test.go
index 24bca687ed21c..e0a9394c64f77 100644
--- a/storage/index_test.go
+++ b/storage/index_test.go
@@ -5,8 +5,8 @@ import (
 	"fmt"
 	"testing"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk"
 )
 
 func TestIndexBasic(t *testing.T) {
diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index c661a87872105..ab914db4f4f3a 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -10,8 +10,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 )
 
 func TestChunksBasic(t *testing.T) {
diff --git a/storage/utils_test.go b/storage/utils_test.go
index c1500f655c975..d677a2f289ed8 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -3,12 +3,12 @@ package storage
 import (
 	"testing"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/aws"
+	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
+	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/cortex/pkg/chunk"
-	"github.com/weaveworks/cortex/pkg/chunk/aws"
-	"github.com/weaveworks/cortex/pkg/chunk/cassandra"
-	"github.com/weaveworks/cortex/pkg/chunk/gcp"
-	"github.com/weaveworks/cortex/pkg/chunk/testutils"
 )
 
 const (
diff --git a/table_manager.go b/table_manager.go
index 947f7ab2a606e..8fa627251e914 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -11,9 +11,9 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
-	"github.com/weaveworks/cortex/pkg/util"
 )
 
 const (
diff --git a/table_manager_test.go b/table_manager_test.go
index 37c02a35899b3..e634506c05588 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -10,7 +10,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/weaveworks/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
diff --git a/testutils/testutils.go b/testutils/testutils.go
index dc9ecb8f05cff..7b2da64df7255 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -5,10 +5,10 @@ import (
 	"strconv"
 	"time"
 
+	promchunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
 	"github.com/prometheus/common/model"
-	promchunk "github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 const (
diff --git a/util/util.go b/util/util.go
index c400f0670d877..8e748b32b2edb 100644
--- a/util/util.go
+++ b/util/util.go
@@ -4,7 +4,7 @@ import (
 	"bytes"
 	"context"
 
-	"github.com/weaveworks/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 // DoSingleQuery is the interface for indexes that don't support batching yet.

From e0f1ea114467a090ff18a955752ebf5b7b34c072 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Tue, 28 Aug 2018 17:36:35 -0400
Subject: [PATCH 178/660] Update disk cache to shard lock.

- Store keys in memory.
- Use a shared diskcache for each schema, and give each cache a background writer.
- Add name to background cache for memcache index
- Initialize disk cache using sync.Once
- Update background cache metrics to be implemented within the background cache itself
- Update once to be global and add timeout to diskcache

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/background.go      |  35 +++++------
 cache/background_test.go |   2 +-
 cache/cache.go           |   6 +-
 cache/diskcache.go       | 121 +++++++++++++++++++++++++++------------
 composite_store.go       |   8 +++
 storage/factory.go       |   2 +-
 6 files changed, 116 insertions(+), 58 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 47c2f89232ff2..05e139029c867 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -8,26 +8,22 @@ import (
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 )
 
 var (
-	droppedWriteBack = prometheus.NewCounter(prometheus.CounterOpts{
+	droppedWriteBack = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "cache_dropped_background_writes_total",
 		Help:      "Total count of dropped write backs to cache.",
-	})
-	queueLength = prometheus.NewGauge(prometheus.GaugeOpts{
+	}, []string{"name"})
+	queueLength = promauto.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "cache_background_queue_length",
 		Help:      "Length of the cache background write queue.",
-	})
+	}, []string{"name"})
 )
 
-func init() {
-	prometheus.MustRegister(droppedWriteBack)
-	prometheus.MustRegister(queueLength)
-}
-
 // BackgroundConfig is config for a Background Cache.
 type BackgroundConfig struct {
 	WriteBackGoroutines int
@@ -46,6 +42,10 @@ type backgroundCache struct {
 	wg       sync.WaitGroup
 	quit     chan struct{}
 	bgWrites chan backgroundWrite
+	name     string
+
+	droppedWriteBack prometheus.Counter
+	queueLength      prometheus.Gauge
 }
 
 type backgroundWrite struct {
@@ -54,11 +54,14 @@ type backgroundWrite struct {
 }
 
 // NewBackground returns a new Cache that does stores on background goroutines.
-func NewBackground(cfg BackgroundConfig, cache Cache) Cache {
+func NewBackground(name string, cfg BackgroundConfig, cache Cache) Cache {
 	c := &backgroundCache{
-		Cache:    cache,
-		quit:     make(chan struct{}),
-		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
+		Cache:            cache,
+		quit:             make(chan struct{}),
+		bgWrites:         make(chan backgroundWrite, cfg.WriteBackBuffer),
+		name:             name,
+		droppedWriteBack: droppedWriteBack.WithLabelValues(name),
+		queueLength:      queueLength.WithLabelValues(name),
 	}
 
 	c.wg.Add(cfg.WriteBackGoroutines)
@@ -85,11 +88,11 @@ func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byt
 	}
 	select {
 	case c.bgWrites <- bgWrite:
-		queueLength.Add(float64(len(keys)))
+		c.queueLength.Add(float64(len(keys)))
 	default:
 		sp := opentracing.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("dropped", len(keys)))
-		droppedWriteBack.Add(float64(len(keys)))
+		c.droppedWriteBack.Add(float64(len(keys)))
 	}
 }
 
@@ -102,7 +105,7 @@ func (c *backgroundCache) writeBackLoop() {
 			if !ok {
 				return
 			}
-			queueLength.Sub(float64(len(bgWrite.keys)))
+			c.queueLength.Sub(float64(len(bgWrite.keys)))
 			c.Cache.Store(context.Background(), bgWrite.keys, bgWrite.bufs)
 
 		case <-c.quit:
diff --git a/cache/background_test.go b/cache/background_test.go
index 7ffbd1500df23..e8acaa50ea3f3 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -7,7 +7,7 @@ import (
 )
 
 func TestBackground(t *testing.T) {
-	c := cache.NewBackground(cache.BackgroundConfig{
+	c := cache.NewBackground("mock", cache.BackgroundConfig{
 		WriteBackGoroutines: 1,
 		WriteBackBuffer:     100,
 	}, cache.NewMockCache())
diff --git a/cache/cache.go b/cache/cache.go
index a21f29cdf9c7b..2411fa0ba0872 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -48,20 +48,18 @@ func New(cfg Config) (Cache, error) {
 		if err != nil {
 			return nil, err
 		}
-		caches = append(caches, Instrument("diskcache", cache))
+		caches = append(caches, NewBackground("diskcache", cfg.background, Instrument("diskcache", cache)))
 	}
 
 	if cfg.memcacheClient.Host != "" {
 		client := NewMemcachedClient(cfg.memcacheClient)
 		cache := NewMemcached(cfg.memcache, client)
-		caches = append(caches, Instrument("memcache", cache))
+		caches = append(caches, NewBackground("memcache", cfg.background, Instrument("memcache", cache)))
 	}
 
 	cache := NewTiered(caches)
 	if len(caches) > 1 {
 		cache = Instrument("tiered", cache)
 	}
-
-	cache = NewBackground(cfg.background, cache)
 	return cache, nil
 }
diff --git a/cache/diskcache.go b/cache/diskcache.go
index e0397bb4aaa9a..0eeed66f7f84c 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -13,16 +13,44 @@ import (
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/tsdb/fileutil"
 	"golang.org/x/sys/unix"
 
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
+var (
+	bucketsTotal = promauto.NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "diskcache_buckets_total",
+		Help:      "Total count of buckets in the cache.",
+	})
+	bucketsInitialized = promauto.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "diskcache_added_new_total",
+		Help:      "total number of entries added to the cache",
+	})
+	collisionsTotal = promauto.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "diskcache_evicted_total",
+		Help:      "total number entries evicted from the cache",
+	})
+
+	globalCache *Diskcache
+	once        sync.Once
+)
+
 // TODO: in the future we could cuckoo hash or linear probe.
 
-// Buckets contain key (~50), chunks (1024) and their metadata (~100)
-const bucketSize = 2048
+const (
+	// Buckets contain chunks (1024) and their metadata (~100)
+	bucketSize = 2048
+
+	// Total number of mutexes shared by the disk cache index
+	numMutexes = 1000
+)
 
 // DiskcacheConfig for the Disk cache.
 type DiskcacheConfig struct {
@@ -38,14 +66,23 @@ func (cfg *DiskcacheConfig) RegisterFlags(f *flag.FlagSet) {
 
 // Diskcache is an on-disk chunk cache.
 type Diskcache struct {
-	mtx     sync.RWMutex
-	f       *os.File
-	buckets uint32
-	buf     []byte
+	f            *os.File
+	buckets      uint32
+	buf          []byte
+	entries      []string
+	entryMutexes []sync.RWMutex
 }
 
 // NewDiskcache creates a new on-disk cache.
 func NewDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
+	var err error
+	once.Do(func() {
+		globalCache, err = newDiskcache(cfg)
+	})
+	return globalCache, err
+}
+
+func newDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
 	f, err := os.OpenFile(cfg.Path, os.O_RDWR|os.O_CREATE, 0644)
 	if err != nil {
 		return nil, errors.Wrap(err, "open")
@@ -66,12 +103,15 @@ func NewDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
 		return nil, err
 	}
 
-	buckets := len(buf) / bucketSize
+	buckets := uint32(len(buf) / bucketSize)
+	bucketsTotal.Set(float64(buckets)) // Report the number of buckets in the diskcache as a metric
 
 	return &Diskcache{
-		f:       f,
-		buf:     buf,
-		buckets: uint32(buckets),
+		f:            f,
+		buckets:      buckets,
+		buf:          buf,
+		entries:      make([]string, buckets),
+		entryMutexes: make([]sync.RWMutex, numMutexes),
 	}, nil
 }
 
@@ -98,18 +138,16 @@ func (d *Diskcache) Fetch(ctx context.Context, keys []string) (found []string, b
 }
 
 func (d *Diskcache) fetch(key string) ([]byte, bool) {
-	d.mtx.RLock()
-	defer d.mtx.RUnlock()
-
 	bucket := hash(key) % d.buckets
-	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
-
-	existingKey, n, ok := get(buf, 0)
-	if !ok || string(existingKey) != key {
+	shard := bucket % numMutexes // Get the index of the mutex associated with this bucket
+	d.entryMutexes[shard].RLock()
+	defer d.entryMutexes[shard].RUnlock()
+	if d.entries[bucket] != key {
 		return nil, false
 	}
 
-	existingValue, _, ok := get(buf, n)
+	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
+	existingValue, _, ok := get(buf, 0)
 	if !ok {
 		return nil, false
 	}
@@ -121,31 +159,42 @@ func (d *Diskcache) fetch(key string) ([]byte, bool) {
 
 // Store puts a chunk into the cache.
 func (d *Diskcache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	for i := range keys {
+		d.store(ctx, keys[i], bufs[i])
+	}
+}
+
+func (d *Diskcache) store(ctx context.Context, key string, value []byte) {
 	sp := opentracing.SpanFromContext(ctx)
 
-	d.mtx.Lock()
-	defer d.mtx.Unlock()
+	bucket := hash(key) % d.buckets
+	shard := bucket % numMutexes // Get the index of the mutex associated with this bucket
+	d.entryMutexes[shard].Lock()
+	defer d.entryMutexes[shard].Unlock()
+	if d.entries[bucket] == key { // If chunk is already cached return nil
+		return
+	}
 
-	for i := range keys {
-		bucket := hash(keys[i]) % d.buckets
-		buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
-
-		n, err := put([]byte(keys[i]), buf, 0)
-		if err != nil {
-			sp.LogFields(otlog.Error(err))
-			level.Error(util.Logger).Log("msg", "failed to put key to diskcache", "err", err)
-			continue
-		}
+	if d.entries[bucket] == "" {
+		bucketsInitialized.Inc()
+	} else {
+		collisionsTotal.Inc()
+	}
 
-		_, err = put(bufs[i], buf, n)
-		if err != nil {
-			sp.LogFields(otlog.Error(err))
-			level.Error(util.Logger).Log("msg", "failed to put value to diskcache", "err", err)
-			continue
-		}
+	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
+	_, err := put(value, buf, 0)
+	if err != nil {
+		d.entries[bucket] = ""
+		sp.LogFields(otlog.Error(err))
+		level.Error(util.Logger).Log("msg", "failed to put key to diskcache", "err", err)
+		return
 	}
+
+	d.entries[bucket] = key
 }
 
+// put places a value in the buffer in the following format
+// |u int64 <length of key> | key | uint64 <length of value> | value |
 func put(value []byte, buf []byte, n int) (int, error) {
 	if len(value)+n+4 > len(buf) {
 		return 0, errors.Wrap(fmt.Errorf("value too big: %d > %d", len(value), len(buf)), "put")
diff --git a/composite_store.go b/composite_store.go
index 5c5423c601a54..516d8e8f4594c 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -6,6 +6,8 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 // Store for chunks.
@@ -121,6 +123,12 @@ func latest(a, b model.Time) model.Time {
 // NewStore creates a new Store which delegates to different stores depending
 // on time.
 func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt) (Store, error) {
+	cache, err := cache.New(cfg.CacheConfig)
+	if err != nil {
+		return nil, err
+	}
+	cfg.CacheConfig.Cache = cache
+
 	schemaOpts := SchemaOpts(cfg, schemaCfg)
 
 	return newCompositeStore(cfg, schemaCfg, schemaOpts, storageOpts)
diff --git a/storage/factory.go b/storage/factory.go
index baa04924bd463..27b5be6f37670 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -54,7 +54,7 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 		memcache := cache.Instrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
 			Expiration: cfg.IndexCacheValidity,
 		}, client))
-		caches = append(caches, cache.NewBackground(cache.BackgroundConfig{
+		caches = append(caches, cache.NewBackground("memcache-index", cache.BackgroundConfig{
 			WriteBackGoroutines: 10,
 			WriteBackBuffer:     100,
 		}, memcache))

From 43857c4cccac71a48d2517157d47dbe611bc0d7b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 28 Sep 2018 12:49:17 +0100
Subject: [PATCH 179/660] Instrument test exporter for OpenTracing.

Also:
- Move SpanLogger out of chunk package and reuse it.
- Update the logging in the test-exporter to use gokit.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go       |  9 +++++----
 chunk_store_utils.go | 34 ++--------------------------------
 series_store.go      |  9 +++++----
 3 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index a50fdd986ef5c..053d9f6ec0567 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -17,6 +17,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/weaveworks/common/user"
 )
 
@@ -165,7 +166,7 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 
 // Get implements Store
 func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.Get")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
@@ -188,7 +189,7 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 }
 
 func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time) (shortcut bool, err error) {
-	log, ctx := newSpanLogger(ctx, "store.validateQuery")
+	log, ctx := spanlogger.New(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
 	now := model.Now()
@@ -221,7 +222,7 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 }
 
 func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.getMetricNameChunks")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.getMetricNameChunks")
 	defer log.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
 
@@ -254,7 +255,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 }
 
 func (c *store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.lookupChunksByMetricName")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.lookupChunksByMetricName")
 	defer log.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 99c4bcf43df72..cb919a20022b8 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -4,16 +4,14 @@ import (
 	"context"
 	"sync"
 
-	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
 const chunkDecodeParallelism = 16
@@ -45,34 +43,6 @@ outer:
 	return filteredChunks
 }
 
-// spanLogger unifies tracing and logging, to reduce repetition.
-type spanLogger struct {
-	log.Logger
-	ot.Span
-}
-
-func newSpanLogger(ctx context.Context, method string, kvps ...interface{}) (*spanLogger, context.Context) {
-	span, ctx := ot.StartSpanFromContext(ctx, method)
-	logger := &spanLogger{
-		Logger: log.With(util.WithContext(ctx, util.Logger), "method", method),
-		Span:   span,
-	}
-	if len(kvps) > 0 {
-		logger.Log(kvps...)
-	}
-	return logger, ctx
-}
-
-func (s *spanLogger) Log(kvps ...interface{}) error {
-	s.Logger.Log(kvps...)
-	fields, err := otlog.InterleavedKVToFields(kvps...)
-	if err != nil {
-		return err
-	}
-	s.Span.LogFields(fields...)
-	return nil
-}
-
 // Fetcher deals with fetching chunk contents from the cache/store,
 // and writing back any misses to the cache.  Also responsible for decoding
 // chunks from the cache, in parallel.
@@ -139,7 +109,7 @@ func (c *Fetcher) worker() {
 
 // FetchChunks fetchers a set of chunks from cache and store.
 func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "ChunkStore.fetchChunks")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.fetchChunks")
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
diff --git a/series_store.go b/series_store.go
index aead4a4c98f4b..231786d2326fc 100644
--- a/series_store.go
+++ b/series_store.go
@@ -14,6 +14,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/weaveworks/common/user"
 )
 
@@ -74,7 +75,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 
 // Get implements Store
 func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
-	log, ctx := newSpanLogger(ctx, "SeriesStore.Get")
+	log, ctx := spanlogger.New(ctx, "SeriesStore.Get")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
@@ -138,7 +139,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 }
 
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
+	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
 
 	// Just get series for metric if there are no matchers
@@ -202,7 +203,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 }
 
 func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, metricName string, matcher *labels.Matcher) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
+	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
 	defer log.Span.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)
@@ -263,7 +264,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 }
 
 func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, seriesIDs []string) ([]string, error) {
-	log, ctx := newSpanLogger(ctx, "SeriesStore.lookupChunksBySeries")
+	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupChunksBySeries")
 	defer log.Span.Finish()
 
 	userID, err := user.ExtractOrgID(ctx)

From d017ff33dcb0bb1d9f2306959ca62cb9149f96ae Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 21 Sep 2018 16:47:54 +0530
Subject: [PATCH 180/660] store: cache the series-id written to store to dedupe
 writes

We're writing the series label index to bigtable for every chunk
We now cache the series-id and write only if we didn't write it before

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

--------

This is a squashed commit but only including Tom's commits'
description for attribution.

Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

Write back cache keys after they have be written to store.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache.go                         |  34 +++-
 cache/cache_test.go                    |   2 +-
 cache/fifo_cache.go                    |  26 ++-
 cache/fifo_cache_test.go               |   4 +-
 cache/memcached_client.go              |   6 +-
 chunk_store.go                         |  13 +-
 chunk_store_test.go                    | 228 ++++++++++++++++---------
 inmemory_storage_client.go             |   4 +
 schema.go                              | 129 +++++++++++++-
 series_store.go                        | 106 +++++++++++-
 storage/caching_fixtures.go            |   2 +-
 storage/caching_storage_client_test.go |   6 +-
 storage/factory.go                     |  32 +++-
 13 files changed, 466 insertions(+), 126 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index a21f29cdf9c7b..6f625ed589fe2 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -3,6 +3,7 @@ package cache
 import (
 	"context"
 	"flag"
+	"time"
 )
 
 // Cache byte arrays by key.
@@ -15,11 +16,17 @@ type Cache interface {
 // Config for building Caches.
 type Config struct {
 	EnableDiskcache bool
+	EnableFifoCache bool
+
+	DefaultValidity time.Duration
 
 	background     BackgroundConfig
 	memcache       MemcachedConfig
 	memcacheClient MemcachedClientConfig
 	diskcache      DiskcacheConfig
+	fifocache      FifoCacheConfig
+
+	prefix string
 
 	// For tests to inject specific implementations.
 	Cache Cache
@@ -28,11 +35,14 @@ type Config struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.EnableDiskcache, "cache.enable-diskcache", false, "Enable on-disk cache")
+	f.BoolVar(&cfg.EnableFifoCache, "cache.enable-fifocache", false, "Enable in-mem cache")
+	f.DurationVar(&cfg.DefaultValidity, "cache.default-validity", 0, "The default validity of entries for caches unless overridden.")
 
 	cfg.background.RegisterFlags(f)
 	cfg.memcache.RegisterFlags(f)
 	cfg.memcacheClient.RegisterFlags(f)
 	cfg.diskcache.RegisterFlags(f)
+	cfg.fifocache.RegisterFlags(f)
 }
 
 // New creates a new Cache using Config.
@@ -43,23 +53,41 @@ func New(cfg Config) (Cache, error) {
 
 	caches := []Cache{}
 
+	if cfg.EnableFifoCache {
+		prefix := ""
+		if cfg.prefix != "" {
+			prefix = cfg.prefix
+		}
+
+		if cfg.fifocache.Validity == 0 && cfg.DefaultValidity != 0 {
+			cfg.fifocache.Validity = cfg.DefaultValidity
+		}
+
+		cache := NewFifoCache(prefix, cfg.fifocache)
+		caches = append(caches, Instrument(cfg.prefix+"fifocache", cache))
+	}
+
 	if cfg.EnableDiskcache {
 		cache, err := NewDiskcache(cfg.diskcache)
 		if err != nil {
 			return nil, err
 		}
-		caches = append(caches, Instrument("diskcache", cache))
+		caches = append(caches, Instrument(cfg.prefix+"diskcache", cache))
 	}
 
 	if cfg.memcacheClient.Host != "" {
+		if cfg.memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
+			cfg.memcache.Expiration = cfg.DefaultValidity
+		}
+
 		client := NewMemcachedClient(cfg.memcacheClient)
 		cache := NewMemcached(cfg.memcache, client)
-		caches = append(caches, Instrument("memcache", cache))
+		caches = append(caches, Instrument(cfg.prefix+"memcache", cache))
 	}
 
 	cache := NewTiered(caches)
 	if len(caches) > 1 {
-		cache = Instrument("tiered", cache)
+		cache = Instrument(cfg.prefix+"tiered", cache)
 	}
 
 	cache = NewBackground(cfg.background, cache)
diff --git a/cache/cache_test.go b/cache/cache_test.go
index a950192234d44..cd82e1d515942 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -167,7 +167,7 @@ func TestDiskcache(t *testing.T) {
 }
 
 func TestFifoCache(t *testing.T) {
-	cache := cache.NewFifoCache("test", 1e3, 1*time.Hour)
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{1e3, 1 * time.Hour})
 	testCache(t, cache)
 }
 
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 74a6d666eea04..6d333fd686026 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -2,6 +2,7 @@ package cache
 
 import (
 	"context"
+	"flag"
 	"sync"
 	"time"
 
@@ -53,6 +54,18 @@ var (
 	}, []string{"cache"})
 )
 
+// FifoCacheConfig holds config for the FifoCache.
+type FifoCacheConfig struct {
+	Size     int
+	Validity time.Duration
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *FifoCacheConfig) RegisterFlags(f *flag.FlagSet) {
+	f.IntVar(&cfg.Size, "fifocache.size", 0, "The number of entries to cache.")
+	f.DurationVar(&cfg.Validity, "fifocache.duration", 0, "The expiry duration for the cache.")
+}
+
 // FifoCache is a simple string -> interface{} cache which uses a fifo slide to
 // manage evictions.  O(1) inserts and updates, O(1) gets.
 type FifoCache struct {
@@ -82,12 +95,12 @@ type cacheEntry struct {
 }
 
 // NewFifoCache returns a new initialised FifoCache of size.
-func NewFifoCache(name string, size int, validity time.Duration) *FifoCache {
+func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 	return &FifoCache{
-		size:     size,
-		validity: validity,
-		entries:  make([]cacheEntry, 0, size),
-		index:    make(map[string]int, size),
+		size:     cfg.Size,
+		validity: cfg.Validity,
+		entries:  make([]cacheEntry, 0, cfg.Size),
+		index:    make(map[string]int, cfg.Size),
 
 		name:            name,
 		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
@@ -216,8 +229,7 @@ func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
 	index, ok := c.index[key]
 	if ok {
 		updated := c.entries[index].updated
-		if time.Now().Sub(updated) < c.validity {
-
+		if c.validity == 0 || time.Now().Sub(updated) < c.validity {
 			return c.entries[index].value, true
 		}
 
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index 72e2e5cf4a8b6..230107e1122c8 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -14,7 +14,7 @@ const size = 10
 const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
-	c := NewFifoCache("test", size, 1*time.Minute)
+	c := NewFifoCache("test", FifoCacheConfig{size, 1 * time.Minute})
 	ctx := context.Background()
 
 	// Check put / get works
@@ -74,7 +74,7 @@ func TestFifoCache(t *testing.T) {
 }
 
 func TestFifoCacheExpiry(t *testing.T) {
-	c := NewFifoCache("test", size, 5*time.Millisecond)
+	c := NewFifoCache("test", FifoCacheConfig{size, 5 * time.Millisecond})
 	ctx := context.Background()
 
 	c.Put(ctx, []string{"0"}, []interface{}{0})
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index eec25e5529681..944e26454cb1c 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -41,15 +41,11 @@ type MemcachedClientConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.registerFlagsWithPrefix("", f)
+	cfg.RegisterFlagsWithPrefix("", f)
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	cfg.registerFlagsWithPrefix(prefix, f)
-}
-
-func (cfg *MemcachedClientConfig) registerFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	if prefix != "" {
 		prefix = prefix + "."
 	}
diff --git a/chunk_store.go b/chunk_store.go
index d4266a9a918de..28594cff945cf 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
@@ -21,7 +22,7 @@ import (
 )
 
 var (
-	indexEntriesPerChunk = prometheus.NewHistogram(prometheus.HistogramOpts{
+	indexEntriesPerChunk = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_index_entries_per_chunk",
 		Help:      "Number of entries written to storage per chunk.",
@@ -36,7 +37,7 @@ var (
 		},
 		HashBuckets: 1024,
 	})
-	cacheCorrupt = prometheus.NewCounter(prometheus.CounterOpts{
+	cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "cache_corrupt_chunks_total",
 		Help:      "Total count of corrupt chunks found in cache.",
@@ -44,9 +45,7 @@ var (
 )
 
 func init() {
-	prometheus.MustRegister(indexEntriesPerChunk)
 	prometheus.MustRegister(rowWrites)
-	prometheus.MustRegister(cacheCorrupt)
 }
 
 // StoreConfig specifies config for a ChunkStore
@@ -58,11 +57,15 @@ type StoreConfig struct {
 	CardinalityCacheSize     int
 	CardinalityCacheValidity time.Duration
 	CardinalityLimit         int
+
+	EntryCache cache.Config
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.CacheConfig.RegisterFlags(f)
+	cfg.EntryCache.RegisterFlags(f)
+
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
 	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
@@ -77,6 +80,8 @@ type store struct {
 	storage StorageClient
 	schema  Schema
 	*Fetcher
+
+	entryCache cache.Cache
 }
 
 func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 62e7e632920cd..bbaceb749be9c 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -17,6 +17,7 @@ import (
 
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
+	"github.com/weaveworks/cortex/pkg/chunk/cache"
 	"github.com/weaveworks/cortex/pkg/prom1/storage/local/chunk"
 	"github.com/weaveworks/cortex/pkg/util"
 	"github.com/weaveworks/cortex/pkg/util/extract"
@@ -24,6 +25,7 @@ import (
 
 type schemaFactory func(cfg SchemaConfig) Schema
 type storeFactory func(StoreConfig, Schema, StorageClient) (Store, error)
+type configFactory func() (StoreConfig, SchemaConfig)
 
 var schemas = []struct {
 	name              string
@@ -40,6 +42,39 @@ var schemas = []struct {
 	{"v9 schema", v9Schema, newSeriesStore, true},
 }
 
+var stores = []struct {
+	name     string
+	configFn configFactory
+}{
+	{
+		name: "store",
+		configFn: func() (StoreConfig, SchemaConfig) {
+			var (
+				storeCfg  StoreConfig
+				schemaCfg SchemaConfig
+			)
+			util.DefaultValues(&storeCfg, &schemaCfg)
+			return storeCfg, schemaCfg
+		},
+	},
+	{
+		name: "cached_store",
+		configFn: func() (StoreConfig, SchemaConfig) {
+			var (
+				storeCfg  StoreConfig
+				schemaCfg SchemaConfig
+			)
+			util.DefaultValues(&storeCfg, &schemaCfg)
+
+			storeCfg.EntryCache.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
+				Size: 500,
+			})
+
+			return storeCfg, schemaCfg
+		},
+	},
+}
+
 // newTestStore creates a new Store for testing.
 func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory storeFactory) Store {
 	var (
@@ -48,6 +83,10 @@ func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory s
 	)
 	util.DefaultValues(&storeCfg, &schemaCfg)
 
+	return newTestChunkStoreConfig(t, storeCfg, schemaCfg, schemaFactory, storeFactory)
+}
+
+func newTestChunkStoreConfig(t *testing.T, storeCfg StoreConfig, schemaCfg SchemaConfig, schemaFactory schemaFactory, storeFactory storeFactory) Store {
 	storage := NewMockStorage()
 	tableManager, err := NewTableManager(schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
@@ -202,62 +241,65 @@ func TestChunkStore_Get(t *testing.T) {
 		},
 	} {
 		for _, schema := range schemas {
-			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				t.Log("========= Running query", tc.query, "with schema", schema.name)
-				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
-				defer store.Stop()
-
-				if err := store.Put(ctx, []Chunk{
-					fooChunk1,
-					fooChunk2,
-					barChunk1,
-					barChunk2,
-				}); err != nil {
-					t.Fatal(err)
-				}
-
-				matchers, err := promql.ParseMetricSelector(tc.query)
-				if err != nil {
-					t.Fatal(err)
-				}
-
-				metricNameMatcher, _, ok := extract.MetricNameMatcherFromMatchers(matchers)
-				if schema.requireMetricName && (!ok || metricNameMatcher.Type != labels.MatchEqual) {
-					return
-				}
-
-				// Query with ordinary time-range
-				chunks1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
-				require.NoError(t, err)
-
-				matrix1, err := ChunksToMatrix(ctx, chunks1, now.Add(-time.Hour), now)
-				require.NoError(t, err)
-
-				sort.Sort(ByFingerprint(matrix1))
-				if !reflect.DeepEqual(tc.expect, matrix1) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
-				}
-
-				// Pushing end of time-range into future should yield exact same resultset
-				chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
-				require.NoError(t, err)
-
-				matrix2, err := ChunksToMatrix(ctx, chunks2, now.Add(-time.Hour), now)
-				require.NoError(t, err)
-
-				sort.Sort(ByFingerprint(matrix2))
-				if !reflect.DeepEqual(tc.expect, matrix2) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix2))
-				}
-
-				// Query with both begin & end of time-range in future should yield empty resultset
-				matrix3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
-				require.NoError(t, err)
-				if len(matrix3) != 0 {
-					t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
-						tc.query, len(matrix3), matrix3)
-				}
-			})
+			for _, storeCase := range stores {
+				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running query", tc.query, "with schema", schema.name)
+					storeCfg, schemaCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, schema.schemaFn, schema.storeFn)
+					defer store.Stop()
+
+					if err := store.Put(ctx, []Chunk{
+						fooChunk1,
+						fooChunk2,
+						barChunk1,
+						barChunk2,
+					}); err != nil {
+						t.Fatal(err)
+					}
+
+					matchers, err := promql.ParseMetricSelector(tc.query)
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					metricNameMatcher, _, ok := extract.MetricNameMatcherFromMatchers(matchers)
+					if schema.requireMetricName && (!ok || metricNameMatcher.Type != labels.MatchEqual) {
+						return
+					}
+
+					// Query with ordinary time-range
+					chunks1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+					require.NoError(t, err)
+
+					matrix1, err := ChunksToMatrix(ctx, chunks1, now.Add(-time.Hour), now)
+					require.NoError(t, err)
+
+					sort.Sort(ByFingerprint(matrix1))
+					if !reflect.DeepEqual(tc.expect, matrix1) {
+						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
+					}
+
+					// Pushing end of time-range into future should yield exact same resultset
+					chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
+					require.NoError(t, err)
+
+					matrix2, err := ChunksToMatrix(ctx, chunks2, now.Add(-time.Hour), now)
+					require.NoError(t, err)
+
+					sort.Sort(ByFingerprint(matrix2))
+					if !reflect.DeepEqual(tc.expect, matrix2) {
+						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix2))
+					}
+
+					// Query with both begin & end of time-range in future should yield empty resultset
+					matrix3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
+					require.NoError(t, err)
+					if len(matrix3) != 0 {
+						t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
+							tc.query, len(matrix3), matrix3)
+					}
+				})
+			}
 		}
 	}
 }
@@ -320,27 +362,30 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 		},
 	} {
 		for _, schema := range schemas {
-			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				t.Log("========= Running query", tc.query, "with schema", schema.name)
-				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
-				defer store.Stop()
-
-				if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
-					t.Fatal(err)
-				}
-
-				matchers, err := promql.ParseMetricSelector(tc.query)
-				if err != nil {
-					t.Fatal(err)
-				}
-
-				chunks, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
-				require.NoError(t, err)
-
-				if !reflect.DeepEqual(tc.expect, chunks) {
-					t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
-				}
-			})
+			for _, storeCase := range stores {
+				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running query", tc.query, "with schema", schema.name)
+					storeCfg, schemaCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, schema.schemaFn, schema.storeFn)
+					defer store.Stop()
+
+					if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
+						t.Fatal(err)
+					}
+
+					matchers, err := promql.ParseMetricSelector(tc.query)
+					if err != nil {
+						t.Fatal(err)
+					}
+
+					chunks, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+					require.NoError(t, err)
+
+					if !reflect.DeepEqual(tc.expect, chunks) {
+						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks))
+					}
+				})
+			}
 		}
 	}
 }
@@ -479,3 +524,32 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		assert.Equal(t, int(numChunks), len(chunks))
 	}
 }
+
+func TestIndexCachingWorks(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	now := model.Now()
+	metric := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar": "baz",
+	}
+
+	storeMaker := stores[1]
+	storeCfg, schemaCfg := storeMaker.configFn()
+
+	store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, v9Schema, newSeriesStore)
+	defer store.Stop()
+
+	storage := store.(*seriesStore).storage.(*MockStorage)
+
+	fooChunk1 := dummyChunkFor(now, metric)
+	fooChunk2 := dummyChunkFor(now.Add(1*time.Millisecond), metric)
+
+	err := store.Put(ctx, []Chunk{fooChunk1})
+	require.NoError(t, err)
+	n := storage.numWrites
+
+	// Only one extra entry for the new chunk of same series.
+	err = store.Put(ctx, []Chunk{fooChunk2})
+	require.NoError(t, err)
+	require.Equal(t, n+1, storage.numWrites)
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 4978dede575cd..8d8888c95f9d2 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -17,6 +17,8 @@ type MockStorage struct {
 	mtx     sync.RWMutex
 	tables  map[string]*mockTable
 	objects map[string][]byte
+
+	numWrites int
 }
 
 type mockTable struct {
@@ -125,6 +127,8 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 	mockBatch := *batch.(*mockWriteBatch)
 	seenWrites := map[string]bool{}
 
+	m.numWrites += len(mockBatch)
+
 	for _, req := range mockBatch {
 		table, ok := m.tables[req.tableName]
 		if !ok {
diff --git a/schema.go b/schema.go
index 00d09e9f8937e..6a12a054740b6 100644
--- a/schema.go
+++ b/schema.go
@@ -1,6 +1,8 @@
 package chunk
 
 import (
+	"bytes"
+	"encoding/hex"
 	"errors"
 	"fmt"
 	"strings"
@@ -30,6 +32,10 @@ type Schema interface {
 	// When doing a write, use this method to return the list of entries you should write to.
 	GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
 
+	GetLabelWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetLabelEntryCacheKey(from, through model.Time, userID string, labels model.Metric) []string
+
 	// When doing a read, use these methods to return the list of entries you should query
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
@@ -153,6 +159,50 @@ func (s schema) GetWriteEntries(from, through model.Time, userID string, metricN
 	return result, nil
 }
 
+func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	var result []IndexEntry
+
+	for _, bucket := range s.buckets(from, through, userID) {
+		entries, err := s.entries.GetLabelWriteEntries(bucket, metricName, labels, chunkID)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	var result []IndexEntry
+
+	for _, bucket := range s.buckets(from, through, userID) {
+		entries, err := s.entries.GetChunkWriteEntries(bucket, metricName, labels, chunkID)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+
+}
+
+// Should only used for v9Schema
+func (s schema) GetLabelEntryCacheKey(from, through model.Time, userID string, labels model.Metric) []string {
+	var result []string
+	for _, bucket := range s.buckets(from, through, userID) {
+		key := bytes.Join([][]byte{
+			[]byte(bucket.tableName),
+			[]byte(bucket.hashKey),
+			sha256bytes(labels.String()),
+		},
+			[]byte("-"),
+		)
+
+		result = append(result, hex.EncodeToString(key))
+	}
+
+	return result
+}
+
 func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
 	var result []IndexQuery
 
@@ -211,6 +261,9 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+
 	GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error)
 	GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
@@ -238,6 +291,13 @@ func (originalEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValu
 	return result, nil
 }
 
+func (originalEntries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+func (originalEntries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
 func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -297,6 +357,13 @@ func (base64Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue,
 	return result, nil
 }
 
+func (base64Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+func (base64Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
 func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
 	encodedBytes := encodeBase64Value(labelValue)
 	return []IndexQuery{
@@ -335,6 +402,13 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model
 	return entries, nil
 }
 
+func (labelNameInHashKeyEntries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+func (labelNameInHashKeyEntries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
 func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -398,6 +472,13 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
+func (v5Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+func (v5Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
 func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -460,6 +541,13 @@ func (v6Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
+func (v6Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+func (v6Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
 func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
@@ -502,9 +590,24 @@ func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 type v9Entries struct {
 }
 
-func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (e v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	labelEntries, err := e.GetLabelWriteEntries(bucket, metricName, labels, chunkID)
+	if err != nil {
+		return nil, err
+	}
+
+	chunkEntries, err := e.GetChunkWriteEntries(bucket, metricName, labels, chunkID)
+	if err != nil {
+		return nil, err
+	}
+
+	entries := append(labelEntries, chunkEntries...)
+
+	return entries, nil
+}
+
+func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
-	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
 		// Entry for metricName -> seriesID
@@ -513,12 +616,6 @@ func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 			HashValue:  bucket.hashKey + ":" + string(metricName),
 			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
 		},
-		// Entry for seriesID -> chunkID
-		{
-			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(seriesID),
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
-		},
 	}
 
 	// Entries for metricName:labelName -> hash(value):seriesID
@@ -539,6 +636,22 @@ func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
+func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	seriesID := sha256bytes(labels.String())
+	encodedThroughBytes := encodeTime(bucket.through)
+
+	entries := []IndexEntry{
+		// Entry for seriesID -> chunkID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(seriesID),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
+		},
+	}
+
+	return entries, nil
+}
+
 func (v9Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
diff --git a/series_store.go b/series_store.go
index 1ac1c23ee1522..372f8d8a10e5b 100644
--- a/series_store.go
+++ b/series_store.go
@@ -2,10 +2,10 @@ package chunk
 
 import (
 	"context"
-	"errors"
 	"fmt"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -61,14 +61,20 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 		return nil, err
 	}
 
+	entryCache, err := cache.New(cfg.EntryCache)
+	if err != nil {
+		return nil, err
+	}
+
 	return &seriesStore{
 		store: store{
-			cfg:     cfg,
-			storage: storage,
-			schema:  schema,
-			Fetcher: fetcher,
+			cfg:        cfg,
+			storage:    storage,
+			schema:     schema,
+			Fetcher:    fetcher,
+			entryCache: entryCache,
 		},
-		cardinalityCache: cache.NewFifoCache("cardinality", cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity),
+		cardinalityCache: cache.NewFifoCache("cardinality", cache.FifoCacheConfig{cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity}),
 	}, nil
 }
 
@@ -291,3 +297,91 @@ func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through mo
 	result, err := c.parseIndexEntries(ctx, entries, nil)
 	return result, err
 }
+
+// Put implements ChunkStore
+func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
+	for _, chunk := range chunks {
+		if err := c.PutOne(ctx, chunk.From, chunk.Through, chunk); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// PutOne implements ChunkStore
+func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return err
+	}
+
+	// Horribly, PutChunks mutates the chunk by setting its checksum.  By putting
+	// the chunk in a slice we are in fact passing by reference, so below we
+	// need to make sure we pick the chunk back out the slice.
+	chunks := []Chunk{chunk}
+
+	err = c.storage.PutChunks(ctx, chunks)
+	if err != nil {
+		return err
+	}
+
+	c.writeBackCache(ctx, chunks)
+
+	writeReqs, _, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunks[0])
+	if err != nil {
+		return err
+	}
+
+	if err := c.storage.BatchWrite(ctx, writeReqs); err != nil {
+		return err
+	}
+
+	bufs := make([][]byte, len(keysToCache))
+	c.entryCache.Store(ctx, keysToCache, bufs)
+	return nil
+}
+
+// calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
+func (c *seriesStore) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, []IndexEntry, []string, error) {
+	seenIndexEntries := map[string]struct{}{}
+	entries := []IndexEntry{}
+	keysToCache := []string{}
+
+	metricName, err := extract.MetricNameFromMetric(chunk.Metric)
+	if err != nil {
+		return nil, nil, nil, err
+	}
+
+	keys := c.schema.GetLabelEntryCacheKey(from, through, userID, chunk.Metric)
+	_, _, missing := c.entryCache.Fetch(context.Background(), keys)
+	if len(missing) != 0 {
+		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
+		if err != nil {
+			return nil, nil, nil, err
+		}
+
+		entries = append(entries, labelEntries...)
+		keysToCache = missing
+	}
+
+	chunkEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
+	if err != nil {
+		return nil, nil, nil, err
+	}
+	entries = append(entries, chunkEntries...)
+
+	indexEntriesPerChunk.Observe(float64(len(entries)))
+
+	// Remove duplicate entries based on tableName:hashValue:rangeValue
+	result := c.storage.NewWriteBatch()
+	for _, entry := range entries {
+		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
+		if _, ok := seenIndexEntries[key]; !ok {
+			seenIndexEntries[key] = struct{}{}
+			rowWrites.Observe(entry.HashValue, 1)
+			result.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
+		}
+	}
+
+	return result, entries, keysToCache, nil
+}
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 36e46bfaf0cf8..eff4d446a6efc 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -17,7 +17,7 @@ type fixture struct {
 func (f fixture) Name() string { return "caching-store" }
 func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
 	storageClient, tableClient, schemaConfig, err := f.fixture.Clients()
-	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", 500, 5*time.Minute), 5*time.Minute)
+	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{500, 5 * time.Minute}), 5*time.Minute)
 	return client, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index 1cc1f1391e587..c91dcf31a836a 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -34,7 +34,7 @@ func TestCachingStorageClientBasic(t *testing.T) {
 			}},
 		},
 	}
-	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
@@ -63,7 +63,7 @@ func TestCachingStorageClient(t *testing.T) {
 			}},
 		},
 	}
-	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
@@ -113,7 +113,7 @@ func TestCachingStorageClientCollision(t *testing.T) {
 			},
 		},
 	}
-	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
diff --git a/storage/factory.go b/storage/factory.go
index 4b6704e0f9c92..05f7afcb14e54 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -27,6 +27,8 @@ type Config struct {
 	IndexCacheSize     int
 	IndexCacheValidity time.Duration
 	memcacheClient     cache.MemcachedClientConfig
+
+	indexCache cache.Config
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -36,19 +38,25 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.GCPStorageConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 
-	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable.")
-	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
+	// Deprecated flags!!
+	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable. DEPRECATED: Use -store.index-cache-read.*")
+	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.  DEPRECATED: Use -store.index-cache-read.*")
 	cfg.memcacheClient.RegisterFlagsWithPrefix("index", f)
+
+	cfg.indexCache.RegisterFlags(f)
 }
 
 // Opts makes the storage clients based on the configuration.
 func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+	var tieredCache cache.Cache
+	var err error
+
+	// Building up from deprecated flags.
 	var caches []cache.Cache
 	if cfg.IndexCacheSize > 0 {
-		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cfg.IndexCacheSize, cfg.IndexCacheValidity))
+		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{cfg.IndexCacheSize, cfg.IndexCacheValidity}))
 		caches = append(caches, fifocache)
 	}
-
 	if cfg.memcacheClient.Host != "" {
 		client := cache.NewMemcachedClient(cfg.memcacheClient)
 		memcache := cache.Instrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
@@ -60,16 +68,22 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 		}, memcache))
 	}
 
+	if len(caches) > 0 {
+		tieredCache = cache.NewTiered(caches)
+	} else {
+		tieredCache, err = cache.New(cfg.indexCache)
+		if err != nil {
+			return nil, err
+		}
+	}
+
 	opts, err := newStorageOpts(cfg, schemaCfg)
 	if err != nil {
 		return nil, errors.Wrap(err, "error creating storage client")
 	}
 
-	if len(caches) > 0 {
-		tieredCache := cache.Instrument("tiered-index", cache.NewTiered(caches))
-		for i := range opts {
-			opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.IndexCacheValidity)
-		}
+	for i := range opts {
+		opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.indexCache.DefaultValidity)
 	}
 
 	return opts, nil

From b8e695b6ce35e00dc6218205c1eaa597c9d368c8 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 2 Oct 2018 14:25:52 +0530
Subject: [PATCH 181/660] flags: Add prefixes and descriptions to flags of
 cache

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/background.go       | 13 +++++++++++--
 cache/cache.go            | 29 ++++++++++++++++++++---------
 cache/diskcache.go        | 13 +++++++++++--
 cache/fifo_cache.go       | 13 +++++++++++--
 cache/memcached.go        | 15 ++++++++++++---
 cache/memcached_client.go | 12 ++++++------
 chunk_store.go            |  3 ++-
 storage/factory.go        |  8 ++++----
 8 files changed, 77 insertions(+), 29 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 47c2f89232ff2..4b5226bef9565 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -36,8 +36,17 @@ type BackgroundConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *BackgroundConfig) RegisterFlags(f *flag.FlagSet) {
-	f.IntVar(&cfg.WriteBackGoroutines, "memcache.write-back-goroutines", 10, "How many goroutines to use to write back to memcache.")
-	f.IntVar(&cfg.WriteBackBuffer, "memcache.write-back-buffer", 10000, "How many chunks to buffer for background write back.")
+	cfg.RegisterFlagsWithPrefix("", "", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *BackgroundConfig) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
+	if prefix != "" {
+		prefix = prefix + "."
+	}
+
+	f.IntVar(&cfg.WriteBackGoroutines, prefix+"memcache.write-back-goroutines", 10, description+"How many goroutines to use to write back to memcache.")
+	f.IntVar(&cfg.WriteBackBuffer, prefix+"memcache.write-back-buffer", 10000, description+"How many chunks to buffer for background write back.")
 }
 
 type backgroundCache struct {
diff --git a/cache/cache.go b/cache/cache.go
index 6f625ed589fe2..0af219bd116be 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -34,15 +34,26 @@ type Config struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.BoolVar(&cfg.EnableDiskcache, "cache.enable-diskcache", false, "Enable on-disk cache")
-	f.BoolVar(&cfg.EnableFifoCache, "cache.enable-fifocache", false, "Enable in-mem cache")
-	f.DurationVar(&cfg.DefaultValidity, "cache.default-validity", 0, "The default validity of entries for caches unless overridden.")
-
-	cfg.background.RegisterFlags(f)
-	cfg.memcache.RegisterFlags(f)
-	cfg.memcacheClient.RegisterFlags(f)
-	cfg.diskcache.RegisterFlags(f)
-	cfg.fifocache.RegisterFlags(f)
+	cfg.RegisterFlagsWithPrefix("", "", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
+	cfg.background.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.memcache.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.memcacheClient.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.diskcache.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.fifocache.RegisterFlagsWithPrefix(prefix, description, f)
+
+	if prefix != "" {
+		prefix += "."
+	}
+
+	f.BoolVar(&cfg.EnableDiskcache, prefix+"cache.enable-diskcache", false, description+"Enable on-disk cache.")
+	f.BoolVar(&cfg.EnableFifoCache, prefix+"cache.enable-fifocache", false, description+"Enable in-memory cache.")
+	f.DurationVar(&cfg.DefaultValidity, prefix+"cache.default-validity", 0, description+"The default validity of entries for caches unless overridden.")
+
+	cfg.prefix = prefix
 }
 
 // New creates a new Cache using Config.
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 7dbf72c8085b0..3f32facf3b36d 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -32,8 +32,17 @@ type DiskcacheConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *DiskcacheConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.Path, "diskcache.path", "/var/run/chunks", "Path to file used to cache chunks.")
-	f.IntVar(&cfg.Size, "diskcache.size", 1024*1024*1024, "Size of file (bytes)")
+	cfg.RegisterFlagsWithPrefix("", "", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *DiskcacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
+	if prefix != "" {
+		prefix += "."
+	}
+
+	f.StringVar(&cfg.Path, prefix+"diskcache.path", "/var/run/chunks", description+"Path to file used to cache chunks.")
+	f.IntVar(&cfg.Size, prefix+"diskcache.size", 1024*1024*1024, description+"Size of file (bytes)")
 }
 
 // Diskcache is an on-disk chunk cache.
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 6d333fd686026..a008f8f0161f9 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -62,8 +62,17 @@ type FifoCacheConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *FifoCacheConfig) RegisterFlags(f *flag.FlagSet) {
-	f.IntVar(&cfg.Size, "fifocache.size", 0, "The number of entries to cache.")
-	f.DurationVar(&cfg.Validity, "fifocache.duration", 0, "The expiry duration for the cache.")
+	cfg.RegisterFlagsWithPrefix("", "", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *FifoCacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
+	if prefix != "" {
+		prefix += "."
+	}
+
+	f.IntVar(&cfg.Size, prefix+"fifocache.size", 0, description+"The number of entries to cache.")
+	f.DurationVar(&cfg.Validity, prefix+"fifocache.duration", 0, description+"The expiry duration for the cache.")
 }
 
 // FifoCache is a simple string -> interface{} cache which uses a fifo slide to
diff --git a/cache/memcached.go b/cache/memcached.go
index 331b7e2058973..77d643d590cb1 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -41,9 +41,18 @@ type MemcachedConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedConfig) RegisterFlags(f *flag.FlagSet) {
-	f.DurationVar(&cfg.Expiration, "memcached.expiration", 0, "How long keys stay in the memcache.")
-	f.IntVar(&cfg.BatchSize, "memcached.batchsize", 0, "How many keys to fetch in each batch.")
-	f.IntVar(&cfg.Parallelism, "memcached.parallelism", 100, "Maximum active requests to memcache.")
+	cfg.RegisterFlagsWithPrefix("", "", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *MemcachedConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
+	if prefix != "" {
+		prefix += "."
+	}
+
+	f.DurationVar(&cfg.Expiration, prefix+"memcached.expiration", 0, description+"How long keys stay in the memcache.")
+	f.IntVar(&cfg.BatchSize, prefix+"memcached.batchsize", 0, description+"How many keys to fetch in each batch.")
+	f.IntVar(&cfg.Parallelism, prefix+"memcached.parallelism", 100, description+"Maximum active requests to memcache.")
 }
 
 // Memcached type caches chunks in memcached
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 944e26454cb1c..89bef4b85fd60 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -41,19 +41,19 @@ type MemcachedClientConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", f)
+	cfg.RegisterFlagsWithPrefix("", "", f)
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
-func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
 	if prefix != "" {
 		prefix = prefix + "."
 	}
 
-	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", "Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
-	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", "SRV service used to discover memcache servers.")
-	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, "Maximum time to wait before giving up on memcached requests.")
-	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, "Period with which to poll DNS for memcache servers.")
+	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
+	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
+	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
+	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
diff --git a/chunk_store.go b/chunk_store.go
index 28594cff945cf..282465a3174e3 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -64,7 +64,8 @@ type StoreConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.CacheConfig.RegisterFlags(f)
-	cfg.EntryCache.RegisterFlags(f)
+
+	cfg.EntryCache.RegisterFlagsWithPrefix("store.index-cache-write", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
diff --git a/storage/factory.go b/storage/factory.go
index 05f7afcb14e54..395c8c769c12d 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -39,11 +39,11 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 
 	// Deprecated flags!!
-	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Size of in-memory index cache, 0 to disable. DEPRECATED: Use -store.index-cache-read.*")
-	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.  DEPRECATED: Use -store.index-cache-read.*")
-	cfg.memcacheClient.RegisterFlagsWithPrefix("index", f)
+	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
+	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Deprecated: Use -store.index-cache-read.*; Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
+	cfg.memcacheClient.RegisterFlagsWithPrefix("index", "Deprecated: Use -store.index-cache-read.*;", f)
 
-	cfg.indexCache.RegisterFlags(f)
+	cfg.indexCache.RegisterFlagsWithPrefix("store.index-cache-read", "Cache config for index entry reading. ", f)
 }
 
 // Opts makes the storage clients based on the configuration.

From bc4ffa72ee2f941b8db425035acf708b89c05a8e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 3 Oct 2018 20:07:46 +0100
Subject: [PATCH 182/660] Snappy compress cached index entries.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go | 189 ++++++++++++++----------------
 1 file changed, 89 insertions(+), 100 deletions(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 4b6ef3b8490c1..4823e3165870d 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -38,114 +38,20 @@ var (
 	})
 )
 
-// IndexCache describes the cache for the Index.
-type IndexCache interface {
-	Store(ctx context.Context, keys []string, batches []ReadBatch)
-	Fetch(ctx context.Context, keys []string) (batches []ReadBatch, misses []string)
-	Stop() error
-}
-
-type indexCache struct {
-	cache.Cache
-}
-
-func (c *indexCache) Store(ctx context.Context, keys []string, batches []ReadBatch) {
-	cachePuts.Add(float64(len(keys)))
-
-	// We're doing the hashing to handle unicode and key len properly.
-	// Memcache fails for unicode keys and keys longer than 250 Bytes.
-	hashed := make([]string, 0, len(keys))
-	bufs := make([][]byte, 0, len(batches))
-	for i := range keys {
-		hashed = append(hashed, cache.HashKey(keys[i]))
-		out, err := proto.Marshal(&batches[i])
-		if err != nil {
-			level.Warn(util.Logger).Log("msg", "error marshaling ReadBatch", "err", err)
-			cacheEncodeErrs.Inc()
-			return
-		}
-		bufs = append(bufs, out)
-	}
-
-	c.Cache.Store(ctx, hashed, bufs)
-	return
-}
-
-func (c *indexCache) Fetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
-	cacheGets.Inc()
-
-	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
-	// the last hash.
-	hashedKeys := make(map[string]string, len(keys))
-	for _, key := range keys {
-		hashedKeys[cache.HashKey(key)] = key
-	}
-
-	// Build a list of hashes; could be less than keys due to collisions.
-	hashes := make([]string, 0, len(keys))
-	for hash := range hashedKeys {
-		hashes = append(hashes, hash)
-	}
-
-	// Look up the hashes in a single batch.  If we get an error, we just "miss" all
-	// of the keys.  Eventually I want to push all the errors to the leafs of the cache
-	// tree, to the caches only return found & missed.
-	foundHashes, bufs, _ := c.Cache.Fetch(ctx, hashes)
-
-	// Reverse the hash, unmarshal the index entries, check we got what we expected
-	// and that its still valid.
-	batches = make([]ReadBatch, 0, len(foundHashes))
-	for j, foundHash := range foundHashes {
-		key := hashedKeys[foundHash]
-		var readBatch ReadBatch
-
-		if err := proto.Unmarshal(bufs[j], &readBatch); err != nil {
-			level.Warn(util.Logger).Log("msg", "error unmarshalling index entry from cache", "err", err)
-			cacheCorruptErrs.Inc()
-			continue
-		}
-
-		// Make sure the hash(key) is not a collision in the cache by looking at the
-		// key in the value.
-		if key != readBatch.Key || time.Now().After(time.Unix(0, readBatch.Expiry)) {
-			cacheCorruptErrs.Inc()
-			continue
-		}
-
-		cacheHits.Inc()
-		batches = append(batches, readBatch)
-	}
-
-	// Finally work out what we're missing.
-	misses := make(map[string]struct{}, len(keys))
-	for _, key := range keys {
-		misses[key] = struct{}{}
-	}
-	for i := range batches {
-		delete(misses, batches[i].Key)
-	}
-	missed = make([]string, 0, len(misses))
-	for miss := range misses {
-		missed = append(missed, miss)
-	}
-
-	return batches, missed
-}
-
 type cachingStorageClient struct {
 	chunk.StorageClient
-	cache    IndexCache
+	cache    cache.Cache
 	validity time.Duration
 }
 
-func newCachingStorageClient(client chunk.StorageClient, cache cache.Cache, validity time.Duration) chunk.StorageClient {
-	if cache == nil {
+func newCachingStorageClient(client chunk.StorageClient, c cache.Cache, validity time.Duration) chunk.StorageClient {
+	if c == nil {
 		return client
 	}
 
 	return &cachingStorageClient{
 		StorageClient: client,
-		cache:         &indexCache{cache},
+		cache:         cache.NewSnappy(c),
 		validity:      validity,
 	}
 }
@@ -163,7 +69,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		queriesByKey[key] = append(queriesByKey[key], query)
 	}
 
-	batches, misses := s.cache.Fetch(ctx, keys)
+	batches, misses := s.cacheFetch(ctx, keys)
 	for _, batch := range batches {
 		queries := queriesByKey[batch.Key]
 		for _, query := range queries {
@@ -224,7 +130,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 				callback(query, batch)
 			}
 		}
-		s.cache.Store(ctx, keys, batches)
+		s.cacheStore(ctx, keys, batches)
 	}
 	return nil
 }
@@ -262,3 +168,86 @@ func queryKey(q chunk.IndexQuery) string {
 	const sep = "\xff"
 	return q.TableName + sep + q.HashValue
 }
+
+func (s *cachingStorageClient) cacheStore(ctx context.Context, keys []string, batches []ReadBatch) {
+	cachePuts.Add(float64(len(keys)))
+
+	// We're doing the hashing to handle unicode and key len properly.
+	// Memcache fails for unicode keys and keys longer than 250 Bytes.
+	hashed := make([]string, 0, len(keys))
+	bufs := make([][]byte, 0, len(batches))
+	for i := range keys {
+		hashed = append(hashed, cache.HashKey(keys[i]))
+		out, err := proto.Marshal(&batches[i])
+		if err != nil {
+			level.Warn(util.Logger).Log("msg", "error marshaling ReadBatch", "err", err)
+			cacheEncodeErrs.Inc()
+			return
+		}
+		bufs = append(bufs, out)
+	}
+
+	s.cache.Store(ctx, hashed, bufs)
+	return
+}
+
+func (s *cachingStorageClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
+	cacheGets.Inc()
+
+	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
+	// the last hash.
+	hashedKeys := make(map[string]string, len(keys))
+	for _, key := range keys {
+		hashedKeys[cache.HashKey(key)] = key
+	}
+
+	// Build a list of hashes; could be less than keys due to collisions.
+	hashes := make([]string, 0, len(keys))
+	for hash := range hashedKeys {
+		hashes = append(hashes, hash)
+	}
+
+	// Look up the hashes in a single batch.  If we get an error, we just "miss" all
+	// of the keys.  Eventually I want to push all the errors to the leafs of the cache
+	// tree, to the caches only return found & missed.
+	foundHashes, bufs, _ := s.cache.Fetch(ctx, hashes)
+
+	// Reverse the hash, unmarshal the index entries, check we got what we expected
+	// and that its still valid.
+	batches = make([]ReadBatch, 0, len(foundHashes))
+	for j, foundHash := range foundHashes {
+		key := hashedKeys[foundHash]
+		var readBatch ReadBatch
+
+		if err := proto.Unmarshal(bufs[j], &readBatch); err != nil {
+			level.Warn(util.Logger).Log("msg", "error unmarshalling index entry from cache", "err", err)
+			cacheCorruptErrs.Inc()
+			continue
+		}
+
+		// Make sure the hash(key) is not a collision in the cache by looking at the
+		// key in the value.
+		if key != readBatch.Key || time.Now().After(time.Unix(0, readBatch.Expiry)) {
+			cacheCorruptErrs.Inc()
+			continue
+		}
+
+		cacheHits.Inc()
+		batches = append(batches, readBatch)
+	}
+
+	// Finally work out what we're missing.
+	misses := make(map[string]struct{}, len(keys))
+	for _, key := range keys {
+		misses[key] = struct{}{}
+	}
+	for i := range batches {
+		delete(misses, batches[i].Key)
+	}
+	missed = make([]string, 0, len(misses))
+	for miss := range misses {
+		missed = append(missed, miss)
+	}
+
+	return batches, missed
+}

From fe719d07568c1cb565b1205985aedb08286c8728 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 4 Oct 2018 12:34:06 +0530
Subject: [PATCH 183/660] Review feedback.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/background.go                    |  9 -----
 cache/cache.go                         | 19 ++--------
 cache/cache_test.go                    |  2 +-
 cache/diskcache.go                     |  4 ---
 cache/fifo_cache.go                    |  9 -----
 cache/fifo_cache_test.go               |  4 +--
 cache/memcached.go                     |  9 -----
 cache/memcached_client.go              |  9 -----
 chunk_store.go                         | 12 +++----
 chunk_store_test.go                    |  2 +-
 composite_store.go                     |  4 +--
 schema.go                              | 34 ++++++------------
 series_store.go                        | 48 +++++++++++++++++---------
 storage/caching_fixtures.go            |  5 ++-
 storage/caching_storage_client_test.go |  6 ++--
 storage/factory.go                     | 13 +++----
 16 files changed, 70 insertions(+), 119 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index bbdd5799a0cfd..648cc03461248 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -30,17 +30,8 @@ type BackgroundConfig struct {
 	WriteBackBuffer     int
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet.
-func (cfg *BackgroundConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *BackgroundConfig) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
-	if prefix != "" {
-		prefix = prefix + "."
-	}
-
 	f.IntVar(&cfg.WriteBackGoroutines, prefix+"memcache.write-back-goroutines", 10, description+"How many goroutines to use to write back to memcache.")
 	f.IntVar(&cfg.WriteBackBuffer, prefix+"memcache.write-back-buffer", 10000, description+"How many chunks to buffer for background write back.")
 }
diff --git a/cache/cache.go b/cache/cache.go
index a202da37e4146..1f230c515a448 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -26,17 +26,13 @@ type Config struct {
 	diskcache      DiskcacheConfig
 	fifocache      FifoCacheConfig
 
+	// This is to name the cache metrics properly.
 	prefix string
 
 	// For tests to inject specific implementations.
 	Cache Cache
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet.
-func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
 	cfg.background.RegisterFlagsWithPrefix(prefix, description, f)
@@ -45,13 +41,9 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f
 	cfg.diskcache.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.fifocache.RegisterFlagsWithPrefix(prefix, description, f)
 
-	if prefix != "" {
-		prefix += "."
-	}
-
 	f.BoolVar(&cfg.EnableDiskcache, prefix+"cache.enable-diskcache", false, description+"Enable on-disk cache.")
 	f.BoolVar(&cfg.EnableFifoCache, prefix+"cache.enable-fifocache", false, description+"Enable in-memory cache.")
-	f.DurationVar(&cfg.DefaultValidity, prefix+"cache.default-validity", 0, description+"The default validity of entries for caches unless overridden.")
+	f.DurationVar(&cfg.DefaultValidity, prefix+"default-validity", 0, description+"The default validity of entries for caches unless overridden.")
 
 	cfg.prefix = prefix
 }
@@ -65,16 +57,11 @@ func New(cfg Config) (Cache, error) {
 	caches := []Cache{}
 
 	if cfg.EnableFifoCache {
-		prefix := ""
-		if cfg.prefix != "" {
-			prefix = cfg.prefix
-		}
-
 		if cfg.fifocache.Validity == 0 && cfg.DefaultValidity != 0 {
 			cfg.fifocache.Validity = cfg.DefaultValidity
 		}
 
-		cache := NewFifoCache(prefix, cfg.fifocache)
+		cache := NewFifoCache(cfg.prefix+"fifocache", cfg.fifocache)
 		caches = append(caches, Instrument(cfg.prefix+"fifocache", cache))
 	}
 
diff --git a/cache/cache_test.go b/cache/cache_test.go
index d96da9e5c5d1f..706d340a6804d 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -167,7 +167,7 @@ func TestDiskcache(t *testing.T) {
 }
 
 func TestFifoCache(t *testing.T) {
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{1e3, 1 * time.Hour})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 1e3, Validity: 1 * time.Hour})
 	testCache(t, cache)
 }
 
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 7414d2f4123ae..2b84cb136a985 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -65,10 +65,6 @@ func (cfg *DiskcacheConfig) RegisterFlags(f *flag.FlagSet) {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *DiskcacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	if prefix != "" {
-		prefix += "."
-	}
-
 	f.StringVar(&cfg.Path, prefix+"diskcache.path", "/var/run/chunks", description+"Path to file used to cache chunks.")
 	f.IntVar(&cfg.Size, prefix+"diskcache.size", 1024*1024*1024, description+"Size of file (bytes)")
 }
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index a008f8f0161f9..59c7d35b71f10 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -60,17 +60,8 @@ type FifoCacheConfig struct {
 	Validity time.Duration
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *FifoCacheConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *FifoCacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	if prefix != "" {
-		prefix += "."
-	}
-
 	f.IntVar(&cfg.Size, prefix+"fifocache.size", 0, description+"The number of entries to cache.")
 	f.DurationVar(&cfg.Validity, prefix+"fifocache.duration", 0, description+"The expiry duration for the cache.")
 }
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index 230107e1122c8..b3461689d2759 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -14,7 +14,7 @@ const size = 10
 const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
-	c := NewFifoCache("test", FifoCacheConfig{size, 1 * time.Minute})
+	c := NewFifoCache("test", FifoCacheConfig{Size: size, Validity: 1 * time.Minute})
 	ctx := context.Background()
 
 	// Check put / get works
@@ -74,7 +74,7 @@ func TestFifoCache(t *testing.T) {
 }
 
 func TestFifoCacheExpiry(t *testing.T) {
-	c := NewFifoCache("test", FifoCacheConfig{size, 5 * time.Millisecond})
+	c := NewFifoCache("test", FifoCacheConfig{Size: size, Validity: 5 * time.Millisecond})
 	ctx := context.Background()
 
 	c.Put(ctx, []string{"0"}, []interface{}{0})
diff --git a/cache/memcached.go b/cache/memcached.go
index 807e32663cc47..0fd1752a2f55f 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -39,17 +39,8 @@ type MemcachedConfig struct {
 	Parallelism int
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *MemcachedConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	if prefix != "" {
-		prefix += "."
-	}
-
 	f.DurationVar(&cfg.Expiration, prefix+"memcached.expiration", 0, description+"How long keys stay in the memcache.")
 	f.IntVar(&cfg.BatchSize, prefix+"memcached.batchsize", 0, description+"How many keys to fetch in each batch.")
 	f.IntVar(&cfg.Parallelism, prefix+"memcached.parallelism", 100, description+"Maximum active requests to memcache.")
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 74e47430b8c40..8679330c01c9e 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -39,17 +39,8 @@ type MemcachedClientConfig struct {
 	UpdateInterval time.Duration
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *MemcachedClientConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	if prefix != "" {
-		prefix = prefix + "."
-	}
-
 	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
 	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
diff --git a/chunk_store.go b/chunk_store.go
index 7603503c596a9..f2ea07edce3dd 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -51,7 +51,7 @@ func init() {
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	CacheConfig cache.Config
+	ChunkCacheConfig cache.Config
 
 	MinChunkAge              time.Duration
 	QueryChunkLimit          int
@@ -59,14 +59,14 @@ type StoreConfig struct {
 	CardinalityCacheValidity time.Duration
 	CardinalityLimit         int
 
-	EntryCache cache.Config
+	WriteDedupeCacheConfig cache.Config
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.CacheConfig.RegisterFlags(f)
+	cfg.ChunkCacheConfig.RegisterFlagsWithPrefix("", "Cache config for chunks. ", f)
 
-	cfg.EntryCache.RegisterFlagsWithPrefix("store.index-cache-write", "Cache config for index entry writing. ", f)
+	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
@@ -82,12 +82,10 @@ type store struct {
 	storage StorageClient
 	schema  Schema
 	*Fetcher
-
-	entryCache cache.Cache
 }
 
 func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.CacheConfig, storage)
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
 	if err != nil {
 		return nil, err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 006ba7ab27b4d..d04589b4a25b9 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -66,7 +66,7 @@ var stores = []struct {
 			)
 			util.DefaultValues(&storeCfg, &schemaCfg)
 
-			storeCfg.EntryCache.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
+			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
 				Size: 500,
 			})
 
diff --git a/composite_store.go b/composite_store.go
index 516d8e8f4594c..d4a2a5a3fa828 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -123,11 +123,11 @@ func latest(a, b model.Time) model.Time {
 // NewStore creates a new Store which delegates to different stores depending
 // on time.
 func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt) (Store, error) {
-	cache, err := cache.New(cfg.CacheConfig)
+	cache, err := cache.New(cfg.ChunkCacheConfig)
 	if err != nil {
 		return nil, err
 	}
-	cfg.CacheConfig.Cache = cache
+	cfg.ChunkCacheConfig.Cache = cache
 
 	schemaOpts := SchemaOpts(cfg, schemaCfg)
 
diff --git a/schema.go b/schema.go
index 6a12a054740b6..d04ca94cba318 100644
--- a/schema.go
+++ b/schema.go
@@ -1,8 +1,6 @@
 package chunk
 
 import (
-	"bytes"
-	"encoding/hex"
 	"errors"
 	"fmt"
 	"strings"
@@ -32,9 +30,10 @@ type Schema interface {
 	// When doing a write, use this method to return the list of entries you should write to.
 	GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
 
+	// Should only be used with the seriesStore. TODO: Make seriesStore implement a different interface altogether.
 	GetLabelWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
 	GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetLabelEntryCacheKey(from, through model.Time, userID string, labels model.Metric) []string
+	GetLabelEntryCacheKeys(from, through model.Time, userID string, labels model.Metric) []string
 
 	// When doing a read, use these methods to return the list of entries you should query
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
@@ -171,6 +170,7 @@ func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, me
 	}
 	return result, nil
 }
+
 func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
@@ -186,18 +186,18 @@ func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, me
 }
 
 // Should only used for v9Schema
-func (s schema) GetLabelEntryCacheKey(from, through model.Time, userID string, labels model.Metric) []string {
+func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string, labels model.Metric) []string {
 	var result []string
 	for _, bucket := range s.buckets(from, through, userID) {
-		key := bytes.Join([][]byte{
-			[]byte(bucket.tableName),
-			[]byte(bucket.hashKey),
-			sha256bytes(labels.String()),
+		key := strings.Join([]string{
+			bucket.tableName,
+			bucket.hashKey,
+			string(sha256bytes(labels.String())),
 		},
-			[]byte("-"),
+			"-",
 		)
 
-		result = append(result, hex.EncodeToString(key))
+		result = append(result, key)
 	}
 
 	return result
@@ -591,19 +591,7 @@ type v9Entries struct {
 }
 
 func (e v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
-	labelEntries, err := e.GetLabelWriteEntries(bucket, metricName, labels, chunkID)
-	if err != nil {
-		return nil, err
-	}
-
-	chunkEntries, err := e.GetChunkWriteEntries(bucket, metricName, labels, chunkID)
-	if err != nil {
-		return nil, err
-	}
-
-	entries := append(labelEntries, chunkEntries...)
-
-	return entries, nil
+	return nil, ErrNotSupported
 }
 
 func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
diff --git a/series_store.go b/series_store.go
index b65311a012740..718f0f11d497b 100644
--- a/series_store.go
+++ b/series_store.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"encoding/hex"
 	"fmt"
 
 	"github.com/go-kit/kit/log/level"
@@ -54,28 +55,33 @@ var (
 type seriesStore struct {
 	store
 	cardinalityCache *cache.FifoCache
+
+	writeDedupeCache cache.Cache
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.CacheConfig, storage)
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
 	if err != nil {
 		return nil, err
 	}
 
-	entryCache, err := cache.New(cfg.EntryCache)
+	writeDedupeCache, err := cache.New(cfg.WriteDedupeCacheConfig)
 	if err != nil {
 		return nil, err
 	}
 
 	return &seriesStore{
 		store: store{
-			cfg:        cfg,
-			storage:    storage,
-			schema:     schema,
-			Fetcher:    fetcher,
-			entryCache: entryCache,
+			cfg:     cfg,
+			storage: storage,
+			schema:  schema,
+			Fetcher: fetcher,
 		},
-		cardinalityCache: cache.NewFifoCache("cardinality", cache.FifoCacheConfig{cfg.CardinalityCacheSize, cfg.CardinalityCacheValidity}),
+		cardinalityCache: cache.NewFifoCache("cardinality", cache.FifoCacheConfig{
+			Size:     cfg.CardinalityCacheSize,
+			Validity: cfg.CardinalityCacheValidity,
+		}),
+		writeDedupeCache: writeDedupeCache,
 	}, nil
 }
 
@@ -328,7 +334,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 
 	c.writeBackCache(ctx, chunks)
 
-	writeReqs, _, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunks[0])
+	writeReqs, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunks[0])
 	if err != nil {
 		return err
 	}
@@ -338,27 +344,35 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 	}
 
 	bufs := make([][]byte, len(keysToCache))
-	c.entryCache.Store(ctx, keysToCache, bufs)
+	c.writeDedupeCache.Store(ctx, keysToCache, bufs)
 	return nil
 }
 
 // calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
-func (c *seriesStore) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, []IndexEntry, []string, error) {
+func (c *seriesStore) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
 	seenIndexEntries := map[string]struct{}{}
 	entries := []IndexEntry{}
 	keysToCache := []string{}
 
 	metricName, err := extract.MetricNameFromMetric(chunk.Metric)
 	if err != nil {
-		return nil, nil, nil, err
+		return nil, nil, err
+	}
+
+	keys := c.schema.GetLabelEntryCacheKeys(from, through, userID, chunk.Metric)
+
+	cacheKeys := make([]string, 0, len(keys)) // Keys which translate to the strings stored in the cache.
+	for _, key := range keys {
+		// This is just encoding to remove invalid characters so that we can put them in memcache.
+		// We're not hashing them as the length of the key is well within memcache bounds. tableName + userid + day + 32Byte(seriesID)
+		cacheKeys = append(cacheKeys, hex.EncodeToString([]byte(key)))
 	}
 
-	keys := c.schema.GetLabelEntryCacheKey(from, through, userID, chunk.Metric)
-	_, _, missing := c.entryCache.Fetch(context.Background(), keys)
+	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), cacheKeys)
 	if len(missing) != 0 {
 		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
 		if err != nil {
-			return nil, nil, nil, err
+			return nil, nil, err
 		}
 
 		entries = append(entries, labelEntries...)
@@ -367,7 +381,7 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 
 	chunkEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
 	if err != nil {
-		return nil, nil, nil, err
+		return nil, nil, err
 	}
 	entries = append(entries, chunkEntries...)
 
@@ -384,5 +398,5 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 		}
 	}
 
-	return result, entries, keysToCache, nil
+	return result, keysToCache, nil
 }
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 0519d856b69fa..b4d490ec1eff1 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -17,7 +17,10 @@ type fixture struct {
 func (f fixture) Name() string { return "caching-store" }
 func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
 	storageClient, tableClient, schemaConfig, err := f.fixture.Clients()
-	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{500, 5 * time.Minute}), 5*time.Minute)
+	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
+		Size:     500,
+		Validity: 5 * time.Minute,
+	}), 5*time.Minute)
 	return client, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index 8665acfb09b21..53612a9fa05b6 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -34,7 +34,7 @@ func TestCachingStorageClientBasic(t *testing.T) {
 			}},
 		},
 	}
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
@@ -63,7 +63,7 @@ func TestCachingStorageClient(t *testing.T) {
 			}},
 		},
 	}
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
@@ -113,7 +113,7 @@ func TestCachingStorageClientCollision(t *testing.T) {
 			},
 		},
 	}
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{10, 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
diff --git a/storage/factory.go b/storage/factory.go
index 24174325b850e..bcc83d60a8e86 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -28,7 +28,7 @@ type Config struct {
 	IndexCacheValidity time.Duration
 	memcacheClient     cache.MemcachedClientConfig
 
-	indexCache cache.Config
+	indexQueriesCacheConfig cache.Config
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -41,9 +41,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	// Deprecated flags!!
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Deprecated: Use -store.index-cache-read.*; Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
-	cfg.memcacheClient.RegisterFlagsWithPrefix("index", "Deprecated: Use -store.index-cache-read.*;", f)
+	cfg.memcacheClient.RegisterFlagsWithPrefix("index.", "Deprecated: Use -store.index-cache-read.*;", f)
 
-	cfg.indexCache.RegisterFlagsWithPrefix("store.index-cache-read", "Cache config for index entry reading. ", f)
+	cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 }
 
 // Opts makes the storage clients based on the configuration.
@@ -54,7 +54,7 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 	// Building up from deprecated flags.
 	var caches []cache.Cache
 	if cfg.IndexCacheSize > 0 {
-		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{cfg.IndexCacheSize, cfg.IndexCacheValidity}))
+		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize, Validity: cfg.IndexCacheValidity}))
 		caches = append(caches, fifocache)
 	}
 	if cfg.memcacheClient.Host != "" {
@@ -70,8 +70,9 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 
 	if len(caches) > 0 {
 		tieredCache = cache.NewTiered(caches)
+		cfg.indexQueriesCacheConfig.DefaultValidity = cfg.IndexCacheValidity
 	} else {
-		tieredCache, err = cache.New(cfg.indexCache)
+		tieredCache, err = cache.New(cfg.indexQueriesCacheConfig)
 		if err != nil {
 			return nil, err
 		}
@@ -83,7 +84,7 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 	}
 
 	for i := range opts {
-		opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.indexCache.DefaultValidity)
+		opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.indexQueriesCacheConfig.DefaultValidity)
 	}
 
 	return opts, nil

From 07c5d7d7af421727d0abe984273f257ac167f182 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 4 Oct 2018 14:30:01 +0100
Subject: [PATCH 184/660] Typo.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 4823e3165870d..1ce71da38c4b1 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -180,7 +180,7 @@ func (s *cachingStorageClient) cacheStore(ctx context.Context, keys []string, ba
 		hashed = append(hashed, cache.HashKey(keys[i]))
 		out, err := proto.Marshal(&batches[i])
 		if err != nil {
-			level.Warn(util.Logger).Log("msg", "error marshaling ReadBatch", "err", err)
+			level.Warn(util.Logger).Log("msg", "error marshalling ReadBatch", "err", err)
 			cacheEncodeErrs.Inc()
 			return
 		}

From 6aea01edb6a86c4bceded9ea7d4d9d859538bae8 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 4 Oct 2018 14:21:44 +0000
Subject: [PATCH 185/660] Fix TestChunksBasic() to check all chunks returned

To remove confusion over which loop counter is which, I renamed both
of them.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 storage/storage_client_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/storage_client_test.go b/storage/storage_client_test.go
index ab914db4f4f3a..69e5e49e262c5 100644
--- a/storage/storage_client_test.go
+++ b/storage/storage_client_test.go
@@ -31,7 +31,7 @@ func TestChunksBasic(t *testing.T) {
 		}
 
 		// Get a few batches of chunks.
-		for i := 0; i < 50; i++ {
+		for batch := 0; batch < 50; batch++ {
 			keysToGet := map[string]struct{}{}
 			chunksToGet := []chunk.Chunk{}
 			for len(chunksToGet) < batchSize {
@@ -51,7 +51,7 @@ func TestChunksBasic(t *testing.T) {
 
 			sort.Sort(ByKey(chunksToGet))
 			sort.Sort(ByKey(chunksWeGot))
-			for j := 0; j < len(chunksWeGot); j++ {
+			for i := 0; i < len(chunksWeGot); i++ {
 				require.Equal(t, chunksToGet[i].ExternalKey(), chunksWeGot[i].ExternalKey(), strconv.Itoa(i))
 			}
 		}

From 0a5f3fce9c603de86136f7d61598ffd70f263e54 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Fri, 5 Oct 2018 08:55:19 +0100
Subject: [PATCH 186/660] Bounded parallelism for DoParallelQueries (#1053)

* Bounded parallelism for DoParallelQueries

Limit the number of queries we will send in parallel to the back-end,
in case we have tens of thousands of series to fetch.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 util/util.go | 39 +++++++++++++++++++++++++++++++++------
 1 file changed, 33 insertions(+), 6 deletions(-)

diff --git a/util/util.go b/util/util.go
index 8e748b32b2edb..8e77ce7140ba4 100644
--- a/util/util.go
+++ b/util/util.go
@@ -4,7 +4,10 @@ import (
 	"bytes"
 	"context"
 
+	ot "github.com/opentracing/opentracing-go"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // DoSingleQuery is the interface for indexes that don't support batching yet.
@@ -13,20 +16,44 @@ type DoSingleQuery func(
 	callback func(chunk.ReadBatch) bool,
 ) error
 
+// QueryParallelism is the maximum number of subqueries run in
+// parallel per higher-level query
+var QueryParallelism = 100
+
 // DoParallelQueries translates between our interface for query batching,
 // and indexes that don't yet support batching.
 func DoParallelQueries(
 	ctx context.Context, doSingleQuery DoSingleQuery, queries []chunk.IndexQuery,
 	callback func(chunk.IndexQuery, chunk.ReadBatch) bool,
 ) error {
+	queue := make(chan chunk.IndexQuery)
 	incomingErrors := make(chan error)
-	for _, query := range queries {
-		go func(query chunk.IndexQuery) {
-			incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
-				return callback(query, r)
-			})
-		}(query)
+	n := util.Min(len(queries), QueryParallelism)
+	// Run n parallel goroutines fetching queries from the queue
+	for i := 0; i < n; i++ {
+		go func() {
+			sp, ctx := ot.StartSpanFromContext(ctx, "DoParallelQueries-worker")
+			defer sp.Finish()
+			for {
+				query, ok := <-queue
+				if !ok {
+					return
+				}
+				incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
+					return callback(query, r)
+				})
+			}
+		}()
 	}
+	// Send all the queries into the queue
+	go func() {
+		for _, query := range queries {
+			queue <- query
+		}
+		close(queue)
+	}()
+
+	// Now receive all the results.
 	var lastErr error
 	for i := 0; i < len(queries); i++ {
 		err := <-incomingErrors

From a6b3f72dd1e233af6211f14fa2e8ec380b438393 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 8 Oct 2018 11:04:19 +0100
Subject: [PATCH 187/660] Remove deprecated global tags setting (#1062)

This was added in July 2017 as a backwards-compatibility measure;
removing now nobody uses it.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema_config.go | 37 +------------------------------------
 table_manager.go |  2 +-
 2 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index af9ea612846fb..b430587278a3f 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -45,9 +45,6 @@ type SchemaConfig struct {
 	UsePeriodicTables bool
 	IndexTables       PeriodicTableConfig
 	ChunkTables       PeriodicTableConfig
-
-	// Deprecated configuration for setting tags on all tables.
-	Tags Tags
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -67,12 +64,8 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
 
-	f.Var(&cfg.Tags, "dynamodb.table.tag", "Deprecated. Set tags on tables individually.")
-
 	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", "cortex_", f)
-	cfg.IndexTables.globalTags = &cfg.Tags
 	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
-	cfg.ChunkTables.globalTags = &cfg.Tags
 }
 
 func (cfg *SchemaConfig) tableForBucket(bucketStart int64) string {
@@ -166,10 +159,6 @@ type PeriodicTableConfig struct {
 	WriteScale              AutoScalingConfig
 	InactiveWriteScale      AutoScalingConfig
 	InactiveWriteScaleLastN int64
-
-	// Temporarily in place to support tags set on all tables, as means of
-	// smoothing transition to per-table tags.
-	globalTags *Tags
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -223,21 +212,12 @@ func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 		result         = []TableDesc{}
 	)
 	for i := firstTable; i <= lastTable; i++ {
-		tags := Tags(map[string]string{})
-		for k, v := range cfg.Tags {
-			tags[k] = v
-		}
-		if cfg.globalTags != nil {
-			for k, v := range *cfg.globalTags {
-				tags[k] = v
-			}
-		}
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
 			Name:             cfg.Prefix + strconv.Itoa(int(i)),
 			ProvisionedRead:  cfg.InactiveReadThroughput,
 			ProvisionedWrite: cfg.InactiveWriteThroughput,
-			Tags:             cfg.GetTags(),
+			Tags:             cfg.Tags,
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
@@ -258,21 +238,6 @@ func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 	return result
 }
 
-// GetTags returns tags for the table. Exists to provide backwards
-// compatibility for the command-line.
-func (cfg *PeriodicTableConfig) GetTags() Tags {
-	tags := Tags(map[string]string{})
-	for k, v := range cfg.Tags {
-		tags[k] = v
-	}
-	if cfg.globalTags != nil {
-		for k, v := range *cfg.globalTags {
-			tags[k] = v
-		}
-	}
-	return tags
-}
-
 // TableFor calculates the table shard for a given point in time.
 func (cfg *PeriodicTableConfig) TableFor(t model.Time) string {
 	var (
diff --git a/table_manager.go b/table_manager.go
index 8fa627251e914..f29fd514c8709 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -165,7 +165,7 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		Name:             m.cfg.OriginalTableName,
 		ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
 		ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
-		Tags:             m.cfg.IndexTables.GetTags(),
+		Tags:             m.cfg.IndexTables.Tags,
 	}
 
 	if m.cfg.UsePeriodicTables {

From d7769086b1ca0bb146555d85ad876290f0b31b8c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 10 Oct 2018 10:32:20 +0100
Subject: [PATCH 188/660] Make sure we cache empty results in the index cache.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client.go | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 1ce71da38c4b1..1240d12762ebf 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -82,7 +82,13 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	}
 
 	// Build list of cachable queries for the queries that missed the cache.
-	cacheableMissed := []chunk.IndexQuery{}
+	var (
+		resultsMtx      sync.Mutex
+		results         = make(map[string]ReadBatch, len(misses))
+		cacheableMissed = make([]chunk.IndexQuery, 0, len(misses))
+		expiryTime      = time.Now().Add(s.validity)
+	)
+
 	for _, key := range misses {
 		// Only need to consider one of the queries as they have the same table & hash.
 		queries := queriesByKey[key]
@@ -90,22 +96,17 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 			TableName: queries[0].TableName,
 			HashValue: queries[0].HashValue,
 		})
+		results[key] = ReadBatch{
+			Key:    key,
+			Expiry: expiryTime.UnixNano(),
+		}
 	}
 
-	var resultsMtx sync.Mutex
-	results := map[string]ReadBatch{}
-	expiryTime := time.Now().Add(s.validity)
 	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
 		resultsMtx.Lock()
 		defer resultsMtx.Unlock()
 		key := queryKey(cacheableQuery)
-		existing, ok := results[key]
-		if !ok {
-			existing = ReadBatch{
-				Key:    key,
-				Expiry: expiryTime.UnixNano(),
-			}
-		}
+		existing := results[key]
 		for iter := r.Iterator(); iter.Next(); {
 			existing.Entries = append(existing.Entries, Entry{Column: iter.RangeValue(), Value: iter.Value()})
 		}

From 7861c77abc73554a5cf16c8454c94f18f9455889 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 17 Oct 2018 17:45:39 -0600
Subject: [PATCH 189/660] Add test to check we cache empty responses.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_storage_client_test.go | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index ab19d9c5d19d8..99b000c48d35b 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -96,6 +96,27 @@ func TestCachingStorageClient(t *testing.T) {
 	assert.EqualValues(t, len(queries), results)
 }
 
+func TestCachingStorageClientEmptyResponse(t *testing.T) {
+	store := &mockStore{}
+	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	client := newCachingStorageClient(store, cache, 1*time.Second)
+	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
+	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		assert.False(t, batch.Iterator().Next())
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, 1, store.queries)
+
+	// If we do the query to the cache again, the underlying store shouldn't see it.
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		assert.False(t, batch.Iterator().Next())
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, 1, store.queries)
+}
+
 func TestCachingStorageClientCollision(t *testing.T) {
 	// These two queries should result in one query to the cache & index, but
 	// two results, as we cache entire rows.

From 83558416d530487ff77cabd073984344508bfa70 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve+github@gmail.com>
Date: Thu, 18 Oct 2018 01:51:41 -0600
Subject: [PATCH 190/660] Make sure span exists when logging (#1076)

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/background.go | 6 ++++--
 cache/diskcache.go  | 8 +++++---
 cache/memcached.go  | 2 --
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 648cc03461248..6b552047c03aa 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -90,9 +90,11 @@ func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byt
 	case c.bgWrites <- bgWrite:
 		c.queueLength.Add(float64(len(keys)))
 	default:
-		sp := opentracing.SpanFromContext(ctx)
-		sp.LogFields(otlog.Int("dropped", len(keys)))
 		c.droppedWriteBack.Add(float64(len(keys)))
+		sp := opentracing.SpanFromContext(ctx)
+		if sp != nil {
+			sp.LogFields(otlog.Int("dropped", len(keys)))
+		}
 	}
 }
 
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 2b84cb136a985..6463827f73d6e 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -170,8 +170,6 @@ func (d *Diskcache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 }
 
 func (d *Diskcache) store(ctx context.Context, key string, value []byte) {
-	sp := opentracing.SpanFromContext(ctx)
-
 	bucket := hash(key) % d.buckets
 	shard := bucket % numMutexes // Get the index of the mutex associated with this bucket
 	d.entryMutexes[shard].Lock()
@@ -190,8 +188,12 @@ func (d *Diskcache) store(ctx context.Context, key string, value []byte) {
 	_, err := put(value, buf, 0)
 	if err != nil {
 		d.entries[bucket] = ""
-		sp.LogFields(otlog.Error(err))
 		level.Error(util.Logger).Log("msg", "failed to put key to diskcache", "err", err)
+
+		sp := opentracing.SpanFromContext(ctx)
+		if sp != nil {
+			sp.LogFields(otlog.Error(err))
+		}
 		return
 	}
 
diff --git a/cache/memcached.go b/cache/memcached.go
index 0fd1752a2f55f..5875ffde3f3a7 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -211,8 +211,6 @@ func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 			return c.memcache.Set(&item)
 		})
 		if err != nil {
-			sp := opentracing.SpanFromContext(ctx)
-			sp.LogFields(otlog.Error(err))
 			level.Error(util.Logger).Log("msg", "failed to put to memcached", "err", err)
 		}
 	}

From 6603a3db528ffdea655f014c033ca3224657296c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 18 Oct 2018 12:39:30 +0000
Subject: [PATCH 191/660] Fix test that was failing to compile

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 storage/caching_storage_client_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index f73eafd1ccb53..a87e9910ffb9f 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -98,7 +98,7 @@ func TestCachingStorageClient(t *testing.T) {
 
 func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	store := &mockStore{}
-	cache := cache.NewFifoCache("test", 10, 10*time.Second)
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
 	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {

From ae5dc38a6e0b551497dd699aa03d5dfaafbac575 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 4 Oct 2018 12:06:44 +0100
Subject: [PATCH 192/660] Optionally limit size of queries to the chunk store.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go      | 52 +++++++++++++++-------------
 chunk_store_test.go | 83 +++++++++++++++++++++++++++++++++++----------
 series_store.go     | 19 +++++------
 3 files changed, 102 insertions(+), 52 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index f2ea07edce3dd..b5fbf28697371 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"net/http"
 	"sort"
 	"sync"
 	"time"
@@ -19,6 +20,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
 )
 
@@ -51,15 +53,15 @@ func init() {
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	ChunkCacheConfig cache.Config
+	ChunkCacheConfig       cache.Config
+	WriteDedupeCacheConfig cache.Config
 
 	MinChunkAge              time.Duration
 	QueryChunkLimit          int
 	CardinalityCacheSize     int
 	CardinalityCacheValidity time.Duration
 	CardinalityLimit         int
-
-	WriteDedupeCacheConfig cache.Config
+	QueryLengthLimit         time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -73,6 +75,7 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
 	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
 	f.IntVar(&cfg.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.")
+	f.DurationVar(&cfg.QueryLengthLimit, "store.query-length-limit", 0, "Limit to length of chunk store queries, 0 to disable.")
 }
 
 // store implements Store
@@ -169,51 +172,52 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 }
 
 // Get implements Store
-func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+func (c *store) Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
-	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
+	level.Debug(log).Log("from", from, "through", through, "matchers", len(matchers))
 
 	// Validate the query is within reasonable bounds.
-	shortcut, err := c.validateQuery(ctx, from, &through)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, matchers)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
 		return nil, nil
 	}
 
-	// Fetch metric name chunks if the matcher is of type equal,
-	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
-	if !ok && metricNameMatcher.Type != labels.MatchEqual {
-		return nil, fmt.Errorf("query must contain metric name")
-	}
-
-	log.Span.SetTag("metric", metricNameMatcher.Value)
-	return c.getMetricNameChunks(ctx, from, through, matchers, metricNameMatcher.Value)
+	log.Span.SetTag("metric", metricName)
+	return c.getMetricNameChunks(ctx, from, through, matchers, metricName)
 }
 
-func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time) (shortcut bool, err error) {
+func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
-	now := model.Now()
-
 	if *through < from {
-		err = fmt.Errorf("invalid query, through < from (%d < %d)", through, from)
-		return
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
 	}
 
+	if c.cfg.QueryLengthLimit > 0 && (*through).Sub(from) > c.cfg.QueryLengthLimit {
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, length > limit (%s > %s)", (*through).Sub(from), c.cfg.QueryLengthLimit)
+	}
+
+	// Fetch metric name chunks if the matcher is of type equal,
+	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(matchers)
+	if !ok || metricNameMatcher.Type != labels.MatchEqual {
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "query must contain metric name")
+	}
+
+	now := model.Now()
+
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
 		level.Error(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
-		shortcut = true
-		return
+		return "", nil, true, nil
 	}
 
 	if from.After(now.Add(-c.cfg.MinChunkAge)) {
 		// no data relevant to this query will have arrived at the store yet
-		shortcut = true
-		return
+		return "", nil, true, nil
 	}
 
 	if through.After(now.Add(5 * time.Minute)) {
@@ -222,7 +226,7 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 		*through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
-	return
+	return metricNameMatcher.Value, matchers, false, nil
 }
 
 func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index d04589b4a25b9..bcce3632bc2b2 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -82,6 +82,7 @@ func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory s
 		schemaCfg SchemaConfig
 	)
 	util.DefaultValues(&storeCfg, &schemaCfg)
+	storeCfg.QueryLengthLimit = 30 * 24 * time.Hour
 
 	return newTestChunkStoreConfig(t, storeCfg, schemaCfg, schemaFactory, storeFactory)
 }
@@ -130,25 +131,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -310,14 +311,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	})
 
 	for _, tc := range []struct {
@@ -407,7 +408,7 @@ func TestChunkStoreRandom(t *testing.T) {
 			defer store.Stop()
 
 			// put 100 chunks from 0 to 99
-			const chunkLen = 13 * 3600 // in seconds
+			const chunkLen = 2 * 3600 // in seconds
 			for i := 0; i < 100; i++ {
 				ts := model.TimeFromUnix(int64(i * chunkLen))
 				chunks, _ := chunk.New().Add(model.SamplePair{
@@ -419,7 +420,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar": "baz",
+						"bar":                 "baz",
 					},
 					chunks[0],
 					ts,
@@ -483,7 +484,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			chunks[0],
 			ts,
@@ -530,7 +531,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	now := model.Now()
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 
 	storeMaker := stores[1]
@@ -553,3 +554,51 @@ func TestIndexCachingWorks(t *testing.T) {
 	require.NoError(t, err)
 	require.Equal(t, n+1, storage.numWrites)
 }
+
+func TestChunkStoreError(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	for _, tc := range []struct {
+		query         string
+		from, through model.Time
+		err           string
+	}{
+		{
+			query:   "foo",
+			from:    model.Time(0).Add(31 * 24 * time.Hour),
+			through: model.Time(0),
+			err:     "rpc error: code = Code(400) desc = invalid query, through < from (0 < 2678400)",
+		},
+		{
+			query:   "foo",
+			from:    model.Time(0),
+			through: model.Time(0).Add(31 * 24 * time.Hour),
+			err:     "rpc error: code = Code(400) desc = invalid query, length > limit (744h0m0s > 720h0m0s)",
+		},
+		{
+			query:   "{foo=\"bar\"}",
+			from:    model.Time(0),
+			through: model.Time(0).Add(1 * time.Hour),
+			err:     "rpc error: code = Code(400) desc = query must contain metric name",
+		},
+		{
+			query:   "{__name__=~\"bar\"}",
+			from:    model.Time(0),
+			through: model.Time(0).Add(1 * time.Hour),
+			err:     "rpc error: code = Code(400) desc = query must contain metric name",
+		},
+	} {
+		for _, schema := range schemas {
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
+				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+				defer store.Stop()
+
+				matchers, err := promql.ParseMetricSelector(tc.query)
+				require.NoError(t, err)
+
+				// Query with ordinary time-range
+				_, err = store.Get(ctx, tc.from, tc.through, matchers...)
+				require.EqualError(t, err, tc.err)
+			})
+		}
+	}
+}
diff --git a/series_store.go b/series_store.go
index 718f0f11d497b..784be2dd4fbad 100644
--- a/series_store.go
+++ b/series_store.go
@@ -11,12 +11,12 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
-	"github.com/weaveworks/common/user"
 )
 
 var (
@@ -92,24 +92,19 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
 	// Validate the query is within reasonable bounds.
-	shortcut, err := c.validateQuery(ctx, from, &through)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
 		return nil, nil
 	}
 
-	// Ensure this query includes a metric name.
-	metricNameMatcher, allMatchers, ok := extract.MetricNameMatcherFromMatchers(allMatchers)
-	if !ok || metricNameMatcher.Type != labels.MatchEqual {
-		return nil, fmt.Errorf("query must contain metric name")
-	}
-	level.Debug(log).Log("metric", metricNameMatcher.Value)
+	level.Debug(log).Log("metric", metricName)
 
 	// Fetch the series IDs from the index, based on non-empty matchers from
 	// the query.
-	_, matchers := util.SplitFiltersAndMatchers(allMatchers)
-	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricNameMatcher.Value, matchers)
+	_, matchers = util.SplitFiltersAndMatchers(matchers)
+	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricName, matchers)
 	if err != nil {
 		return nil, err
 	}
@@ -118,12 +113,14 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	// Lookup the series in the index to get the chunks.
 	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
 	if err != nil {
+		level.Error(log).Log("msg", "lookupChunksBySeries", "err", err)
 		return nil, err
 	}
 	level.Debug(log).Log("chunk-ids", len(chunkIDs))
 
 	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
 	if err != nil {
+		level.Error(log).Log("err", "convertChunkIDsToChunks", "err", err)
 		return nil, err
 	}
 	// Filter out chunks that are not in the selected time range.
@@ -141,7 +138,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	// Now fetch the actual chunk data from Memcache / S3
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
-		level.Error(log).Log("err", err)
+		level.Error(log).Log("msg", "FetchChunks", "err", err)
 		return nil, err
 	}
 

From 55d33af41562747750e185cfbe69487386bd278d Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 4 Oct 2018 12:44:15 +0100
Subject: [PATCH 193/660] Make chunk store limits overridable per user.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go      | 35 +++++++++++++++++++----------
 chunk_store_test.go | 54 +++++++++++++++++++++++----------------------
 composite_store.go  | 21 +++++++++---------
 series_store.go     | 16 +++++++++++---
 4 files changed, 75 insertions(+), 51 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index b5fbf28697371..286d26d73513f 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -20,6 +20,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
 )
@@ -57,11 +58,9 @@ type StoreConfig struct {
 	WriteDedupeCacheConfig cache.Config
 
 	MinChunkAge              time.Duration
-	QueryChunkLimit          int
 	CardinalityCacheSize     int
 	CardinalityCacheValidity time.Duration
 	CardinalityLimit         int
-	QueryLengthLimit         time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -71,11 +70,9 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
-	f.IntVar(&cfg.QueryChunkLimit, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query.")
 	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
 	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
 	f.IntVar(&cfg.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.")
-	f.DurationVar(&cfg.QueryLengthLimit, "store.query-length-limit", 0, "Limit to length of chunk store queries, 0 to disable.")
 }
 
 // store implements Store
@@ -84,10 +81,11 @@ type store struct {
 
 	storage StorageClient
 	schema  Schema
+	limits  *validation.Overrides
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
+func newStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
 	if err != nil {
 		return nil, err
@@ -97,6 +95,7 @@ func newStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, err
 		cfg:     cfg,
 		storage: storage,
 		schema:  schema,
+		limits:  limits,
 		Fetcher: fetcher,
 	}, nil
 }
@@ -172,13 +171,13 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 }
 
 // Get implements Store
-func (c *store) Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
-	level.Debug(log).Log("from", from, "through", through, "matchers", len(matchers))
+	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
 	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, matchers)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
@@ -197,8 +196,14 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
 	}
 
-	if c.cfg.QueryLengthLimit > 0 && (*through).Sub(from) > c.cfg.QueryLengthLimit {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, length > limit (%s > %s)", (*through).Sub(from), c.cfg.QueryLengthLimit)
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return "", nil, false, err
+	}
+
+	maxQueryLength := c.limits.MaxQueryLength(userID)
+	if maxQueryLength > 0 && (*through).Sub(from) > maxQueryLength {
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, length > limit (%s > %s)", (*through).Sub(from), maxQueryLength)
 	}
 
 	// Fetch metric name chunks if the matcher is of type equal,
@@ -234,6 +239,11 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	defer log.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
 
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
 	if err != nil {
@@ -245,8 +255,9 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("Chunks post filtering", len(chunks))
 
-	if len(filtered) > c.cfg.QueryChunkLimit {
-		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), c.cfg.QueryChunkLimit)
+	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
+	if maxChunksPerQuery > 0 && len(filtered) > maxChunksPerQuery {
+		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), maxChunksPerQuery)
 		level.Error(log).Log("err", err)
 		return nil, err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index bcce3632bc2b2..c52e8525ca277 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -19,12 +19,13 @@ import (
 	"github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
 )
 
 type schemaFactory func(cfg SchemaConfig) Schema
-type storeFactory func(StoreConfig, Schema, StorageClient) (Store, error)
+type storeFactory func(StoreConfig, Schema, StorageClient, *validation.Overrides) (Store, error)
 type configFactory func() (StoreConfig, SchemaConfig)
 
 var schemas = []struct {
@@ -82,8 +83,6 @@ func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory s
 		schemaCfg SchemaConfig
 	)
 	util.DefaultValues(&storeCfg, &schemaCfg)
-	storeCfg.QueryLengthLimit = 30 * 24 * time.Hour
-
 	return newTestChunkStoreConfig(t, storeCfg, schemaCfg, schemaFactory, storeFactory)
 }
 
@@ -95,7 +94,13 @@ func newTestChunkStoreConfig(t *testing.T, storeCfg StoreConfig, schemaCfg Schem
 	err = tableManager.SyncTables(context.Background())
 	require.NoError(t, err)
 
-	store, err := storeFactory(storeCfg, schemaFactory(schemaCfg), storage)
+	var limits validation.Limits
+	util.DefaultValues(&storeCfg, &schemaCfg, &limits)
+	limits.MaxQueryLength = 30 * 24 * time.Hour
+	overrides, err := validation.NewOverrides(limits)
+	require.NoError(t, err)
+
+	store, err := storeFactory(storeCfg, schemaFactory(schemaCfg), storage, overrides)
 	require.NoError(t, err)
 	return store
 }
@@ -131,25 +136,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -281,7 +286,7 @@ func TestChunkStore_Get(t *testing.T) {
 					}
 
 					// Pushing end of time-range into future should yield exact same resultset
-					chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*30), matchers...)
+					chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), matchers...)
 					require.NoError(t, err)
 
 					matrix2, err := ChunksToMatrix(ctx, chunks2, now.Add(-time.Hour), now)
@@ -311,14 +316,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	})
 
 	for _, tc := range []struct {
@@ -420,7 +425,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar":                 "baz",
+						"bar": "baz",
 					},
 					chunks[0],
 					ts,
@@ -484,7 +489,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			chunks[0],
 			ts,
@@ -528,12 +533,10 @@ func TestChunkStoreLeastRead(t *testing.T) {
 
 func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
-	now := model.Now()
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
-
 	storeMaker := stores[1]
 	storeCfg, schemaCfg := storeMaker.configFn()
 
@@ -542,14 +545,13 @@ func TestIndexCachingWorks(t *testing.T) {
 
 	storage := store.(*seriesStore).storage.(*MockStorage)
 
-	fooChunk1 := dummyChunkFor(now, metric)
-	fooChunk2 := dummyChunkFor(now.Add(1*time.Millisecond), metric)
-
+	fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
 	err := store.Put(ctx, []Chunk{fooChunk1})
 	require.NoError(t, err)
 	n := storage.numWrites
 
 	// Only one extra entry for the new chunk of same series.
+	fooChunk2 := dummyChunkFor(model.Time(0).Add(30*time.Second), metric)
 	err = store.Put(ctx, []Chunk{fooChunk2})
 	require.NoError(t, err)
 	require.Equal(t, n+1, storage.numWrites)
diff --git a/composite_store.go b/composite_store.go
index d4a2a5a3fa828..5d156499bd6e3 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -8,6 +8,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 // Store for chunks.
@@ -42,11 +43,11 @@ type SchemaOpt struct {
 }
 
 // SchemaOpts returns the schemas and the times when they activate.
-func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
+func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig, limits *validation.Overrides) []SchemaOpt {
 	opts := []SchemaOpt{{
 		From: 0,
 		NewStore: func(storage StorageClient) (Store, error) {
-			return newStore(cfg, v1Schema(schemaCfg), storage)
+			return newStore(cfg, v1Schema(schemaCfg), storage, limits)
 		},
 	}}
 
@@ -54,7 +55,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.DailyBucketsFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v2Schema(schemaCfg), storage)
+				return newStore(cfg, v2Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -63,7 +64,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.Base64ValuesFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v3Schema(schemaCfg), storage)
+				return newStore(cfg, v3Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -72,7 +73,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.V4SchemaFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v4Schema(schemaCfg), storage)
+				return newStore(cfg, v4Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -81,7 +82,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.V5SchemaFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v5Schema(schemaCfg), storage)
+				return newStore(cfg, v5Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -90,7 +91,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.V6SchemaFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v6Schema(schemaCfg), storage)
+				return newStore(cfg, v6Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -99,7 +100,7 @@ func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig) []SchemaOpt {
 		opts = append(opts, SchemaOpt{
 			From: schemaCfg.V9SchemaFrom.Time,
 			NewStore: func(storage StorageClient) (Store, error) {
-				return newSeriesStore(cfg, v9Schema(schemaCfg), storage)
+				return newSeriesStore(cfg, v9Schema(schemaCfg), storage, limits)
 			},
 		})
 	}
@@ -122,14 +123,14 @@ func latest(a, b model.Time) model.Time {
 
 // NewStore creates a new Store which delegates to different stores depending
 // on time.
-func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt) (Store, error) {
+func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt, limits *validation.Overrides) (Store, error) {
 	cache, err := cache.New(cfg.ChunkCacheConfig)
 	if err != nil {
 		return nil, err
 	}
 	cfg.ChunkCacheConfig.Cache = cache
 
-	schemaOpts := SchemaOpts(cfg, schemaCfg)
+	schemaOpts := SchemaOpts(cfg, schemaCfg, limits)
 
 	return newCompositeStore(cfg, schemaCfg, schemaOpts, storageOpts)
 }
diff --git a/series_store.go b/series_store.go
index 784be2dd4fbad..ac0612af2abe0 100644
--- a/series_store.go
+++ b/series_store.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/hex"
 	"fmt"
+	"net/http"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
@@ -11,12 +12,14 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 var (
@@ -59,7 +62,7 @@ type seriesStore struct {
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Store, error) {
+func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
 	if err != nil {
 		return nil, err
@@ -75,6 +78,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient) (Stor
 			cfg:     cfg,
 			storage: storage,
 			schema:  schema,
+			limits:  limits,
 			Fetcher: fetcher,
 		},
 		cardinalityCache: cache.NewFifoCache("cardinality", cache.FifoCacheConfig{
@@ -91,6 +95,11 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
 	// Validate the query is within reasonable bounds.
 	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
 	if err != nil {
@@ -129,8 +138,9 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	chunksPerQuery.Observe(float64(len(filtered)))
 
 	// Protect ourselves against OOMing.
-	if len(chunkIDs) > c.cfg.QueryChunkLimit {
-		err := fmt.Errorf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), c.cfg.QueryChunkLimit)
+	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
+	if maxChunksPerQuery > 0 && len(chunkIDs) > maxChunksPerQuery {
+		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), maxChunksPerQuery)
 		level.Error(log).Log("err", err)
 		return nil, err
 	}

From f4b8e1bd09e39bc88c9a0232ab7744f28720aabe Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 5 Oct 2018 17:45:14 +0000
Subject: [PATCH 194/660] Split S3 and DynamoDB storage code into two Store
 objects

Following the same pattern as Bigtable column key

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/fixtures.go          |  33 ++++---
 aws/storage_client.go    | 192 ++++++---------------------------------
 aws/storage_client_s3.go | 183 +++++++++++++++++++++++++++++++++++++
 3 files changed, 227 insertions(+), 181 deletions(-)
 create mode 100644 aws/storage_client_s3.go

diff --git a/aws/fixtures.go b/aws/fixtures.go
index be46c1bef0e0d..f8e0ecd216dfc 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -37,13 +37,15 @@ var Fixtures = []testutils.Fixture{
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
-			storage := &storageClient{
-				DynamoDB:                dynamoDB,
-				S3:                      newMockS3(),
-				queryRequestFn:          dynamoDB.queryRequest,
-				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-				schemaCfg:               schemaConfig,
+			storage := &s3storageClient{
+				S3: newMockS3(),
+				storageClient: storageClient{
+					DynamoDB:                dynamoDB,
+					queryRequestFn:          dynamoDB.queryRequest,
+					batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+					batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+					schemaCfg:               schemaConfig,
+				},
 			}
 			return storage, table, schemaConfig, nil
 		},
@@ -70,19 +72,16 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				DynamoDB: dynamoDB,
 			}
 			storage := &storageClient{
-				cfg: StorageConfig{
-					DynamoDBConfig: DynamoDBConfig{
-						ChunkGangSize:          gangsize,
-						ChunkGetMaxParallelism: maxParallelism,
-						backoffConfig: util.BackoffConfig{
-							MinBackoff: 1 * time.Millisecond,
-							MaxBackoff: 5 * time.Millisecond,
-							MaxRetries: 20,
-						},
+				cfg: DynamoDBConfig{
+					ChunkGangSize:          gangsize,
+					ChunkGetMaxParallelism: maxParallelism,
+					backoffConfig: util.BackoffConfig{
+						MinBackoff: 1 * time.Millisecond,
+						MaxBackoff: 5 * time.Millisecond,
+						MaxRetries: 20,
 					},
 				},
 				DynamoDB:                dynamoDB,
-				S3:                      newMockS3(),
 				queryRequestFn:          dynamoDB.queryRequest,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
diff --git a/aws/storage_client.go b/aws/storage_client.go
index 396da0e05e290..e94d34a13ab2d 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -1,11 +1,9 @@
 package aws
 
 import (
-	"bytes"
 	"context"
 	"flag"
 	"fmt"
-	"io/ioutil"
 	"net/url"
 	"strings"
 	"time"
@@ -21,8 +19,6 @@ import (
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
-	"github.com/aws/aws-sdk-go/service/s3"
-	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 
@@ -89,12 +85,6 @@ var (
 		Help:      "Number of retries per DynamoDB operation.",
 		Buckets:   prometheus.LinearBuckets(0, 1, 21),
 	}, []string{"operation"})
-	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "s3_request_duration_seconds",
-		Help:      "Time spent doing S3 requests.",
-		Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
-	}, []string{"operation", "status_code"})
 )
 
 func init() {
@@ -103,7 +93,6 @@ func init() {
 	prometheus.MustRegister(dynamoFailures)
 	prometheus.MustRegister(dynamoQueryPagesCount)
 	prometheus.MustRegister(dynamoQueryRetryCount)
-	prometheus.MustRegister(s3RequestDuration)
 	prometheus.MustRegister(dynamoDroppedRequests)
 }
 
@@ -147,12 +136,10 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 }
 
 type storageClient struct {
-	cfg       StorageConfig
+	cfg       DynamoDBConfig
 	schemaCfg chunk.SchemaConfig
 
-	DynamoDB   dynamodbiface.DynamoDBAPI
-	S3         s3iface.S3API
-	bucketName string
+	DynamoDB dynamodbiface.DynamoDBAPI
 
 	// These functions exists for mocking, so we don't have to write a whole load
 	// of boilerplate.
@@ -163,40 +150,39 @@ type storageClient struct {
 
 // Opts returns the chunk.StorageOpt's for the config.
 func Opts(cfg StorageConfig, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	client, err := NewStorageClient(cfg, schemaCfg)
+	client, err := NewS3StorageClient(cfg, schemaCfg)
 	if err != nil {
 		return nil, err
 	}
-	return []chunk.StorageOpt{{
-		From:   model.Time(0),
-		Client: client,
-	}}, err
+
+	opts := []chunk.StorageOpt{}
+	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
+	if schemaCfg.ChunkTables.From.IsSet() {
+		client, err = NewStorageClient(cfg.DynamoDBConfig, schemaCfg)
+		if err != nil {
+			return nil, err
+		}
+
+		opts = append(opts, chunk.StorageOpt{
+			From:   schemaCfg.ChunkTables.From.Time,
+			Client: client,
+		})
+	}
+
+	return opts, nil
 }
 
 // NewStorageClient makes a new AWS-backed StorageClient.
-func NewStorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+func NewStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
 	}
 
-	if cfg.S3.URL == nil {
-		return nil, fmt.Errorf("no URL specified for S3")
-	}
-	s3Config, err := awscommon.ConfigFromURL(cfg.S3.URL)
-	if err != nil {
-		return nil, err
-	}
-	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
-	s3Client := s3.New(session.New(s3Config))
-	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
-
 	client := storageClient{
-		cfg:        cfg,
-		schemaCfg:  schemaCfg,
-		DynamoDB:   dynamoDB,
-		S3:         s3Client,
-		bucketName: bucketName,
+		cfg:       cfg,
+		schemaCfg: schemaCfg,
+		DynamoDB:  dynamoDB,
 	}
 	client.queryRequestFn = client.queryRequest
 	client.batchGetItemRequestFn = client.batchGetItemRequest
@@ -482,33 +468,12 @@ type chunksPlusError struct {
 }
 
 func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.DynamoDB")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
 
-	var (
-		s3Chunks       []chunk.Chunk
-		dynamoDBChunks []chunk.Chunk
-	)
-
-	for _, chunk := range chunks {
-		if !a.schemaCfg.ChunkTables.From.IsSet() || chunk.From.Before(a.schemaCfg.ChunkTables.From.Time) {
-			s3Chunks = append(s3Chunks, chunk)
-		} else {
-			dynamoDBChunks = append(dynamoDBChunks, chunk)
-		}
-	}
-
-	// Get chunks from S3, then get chunks from DynamoDB.  I don't expect us to be
-	// doing both simultaneously except for when we migrate, when it will only
-	// occur for a couple or hours. So I didn't think it is worth the extra code
-	// to parallelise.
-
+	dynamoDBChunks := chunks
 	var err error
-	s3Chunks, err = a.getS3Chunks(ctx, s3Chunks)
-	if err != nil {
-		return s3Chunks, err
-	}
 
 	gangSize := a.cfg.ChunkGangSize * dynamoDBMaxReadBatchSize
 	if gangSize == 0 { // zero means turn feature off
@@ -530,7 +495,7 @@ func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]c
 			results <- chunksPlusError{outChunks, err}
 		}(i)
 	}
-	finalChunks := s3Chunks
+	finalChunks := []chunk.Chunk{}
 	for i := 0; i < len(dynamoDBChunks); i += gangSize {
 		in := <-results
 		if in.err != nil {
@@ -547,62 +512,6 @@ func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]c
 	return finalChunks, err
 }
 
-func (a storageClient) getS3Chunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	incomingChunks := make(chan chunk.Chunk)
-	incomingErrors := make(chan error)
-	for _, c := range chunks {
-		go func(c chunk.Chunk) {
-			c, err := a.getS3Chunk(ctx, c)
-			if err != nil {
-				incomingErrors <- err
-				return
-			}
-			incomingChunks <- c
-		}(c)
-	}
-
-	result := []chunk.Chunk{}
-	errors := []error{}
-	for i := 0; i < len(chunks); i++ {
-		select {
-		case chunk := <-incomingChunks:
-			result = append(result, chunk)
-		case err := <-incomingErrors:
-			errors = append(errors, err)
-		}
-	}
-	if len(errors) > 0 {
-		// Return any chunks we did receive: a partial result may be useful
-		return result, errors[0]
-	}
-	return result, nil
-}
-
-func (a storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.Chunk, error) {
-	var resp *s3.GetObjectOutput
-	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
-		var err error
-		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
-			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(c.ExternalKey()),
-		})
-		return err
-	})
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-	defer resp.Body.Close()
-	buf, err := ioutil.ReadAll(resp.Body)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-	decodeContext := chunk.NewDecodeContext()
-	if err := c.Decode(decodeContext, buf); err != nil {
-		return chunk.Chunk{}, err
-	}
-	return c, nil
-}
-
 // As we're re-using the DynamoDB schema from the index for the chunk tables,
 // we need to provide a non-null, non-empty value for the range value.
 var placeholder = []byte{'c'}
@@ -730,8 +639,6 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 
 func (a storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
-		s3ChunkKeys    []string
-		s3ChunkBufs    [][]byte
 		dynamoDBWrites = dynamoDBWriteBatch{}
 	)
 
@@ -743,56 +650,13 @@ func (a storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) erro
 		}
 		key := chunks[i].ExternalKey()
 
-		if !a.schemaCfg.ChunkTables.From.IsSet() || chunks[i].From.Before(a.schemaCfg.ChunkTables.From.Time) {
-			s3ChunkKeys = append(s3ChunkKeys, key)
-			s3ChunkBufs = append(s3ChunkBufs, buf)
-		} else {
-			table := a.schemaCfg.ChunkTables.TableFor(chunks[i].From)
-			dynamoDBWrites.Add(table, key, placeholder, buf)
-		}
-	}
-
-	// Put chunks to S3, then put chunks to DynamoDB.  I don't expect us to be
-	// doing both simultaneously except for when we migrate, when it will only
-	// occur for a couple or hours. So I didn't think it is worth the extra code
-	// to parallelise.
-
-	if err := a.putS3Chunks(ctx, s3ChunkKeys, s3ChunkBufs); err != nil {
-		return err
+		table := a.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+		dynamoDBWrites.Add(table, key, placeholder, buf)
 	}
 
 	return a.BatchWrite(ctx, dynamoDBWrites)
 }
 
-func (a storageClient) putS3Chunks(ctx context.Context, keys []string, bufs [][]byte) error {
-	incomingErrors := make(chan error)
-	for i := range bufs {
-		go func(i int) {
-			incomingErrors <- a.putS3Chunk(ctx, keys[i], bufs[i])
-		}(i)
-	}
-
-	var lastErr error
-	for range keys {
-		err := <-incomingErrors
-		if err != nil {
-			lastErr = err
-		}
-	}
-	return lastErr
-}
-
-func (a storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
-	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
-		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
-			Body:   bytes.NewReader(buf),
-			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(key),
-		})
-		return err
-	})
-}
-
 // Slice of values returned; map key is attribute name
 type dynamoDBReadResponse struct {
 	items []map[string]*dynamodb.AttributeValue
diff --git a/aws/storage_client_s3.go b/aws/storage_client_s3.go
new file mode 100644
index 0000000000000..8b5c65c9e141d
--- /dev/null
+++ b/aws/storage_client_s3.go
@@ -0,0 +1,183 @@
+package aws
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io/ioutil"
+	"strings"
+
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/session"
+	"github.com/aws/aws-sdk-go/service/s3"
+	"github.com/aws/aws-sdk-go/service/s3/s3iface"
+	"github.com/prometheus/client_golang/prometheus"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	awscommon "github.com/weaveworks/common/aws"
+	"github.com/weaveworks/common/instrument"
+)
+
+var (
+	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "s3_request_duration_seconds",
+		Help:      "Time spent doing S3 requests.",
+		Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
+	}, []string{"operation", "status_code"})
+)
+
+func init() {
+	prometheus.MustRegister(s3RequestDuration)
+}
+
+type s3storageClient struct {
+	storageClient
+	bucketName string
+	S3         s3iface.S3API
+}
+
+// NewS3StorageClient makes a new AWS-backed StorageClient.
+func NewS3StorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
+	if err != nil {
+		return nil, err
+	}
+
+	if cfg.S3.URL == nil {
+		return nil, fmt.Errorf("no URL specified for S3")
+	}
+	s3Config, err := awscommon.ConfigFromURL(cfg.S3.URL)
+	if err != nil {
+		return nil, err
+	}
+	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
+	s3Client := s3.New(session.New(s3Config))
+	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
+
+	client := s3storageClient{
+		storageClient: storageClient{
+			cfg:       cfg.DynamoDBConfig,
+			schemaCfg: schemaCfg,
+			DynamoDB:  dynamoDB,
+		},
+		S3:         s3Client,
+		bucketName: bucketName,
+	}
+	client.queryRequestFn = client.queryRequest
+	client.batchGetItemRequestFn = client.batchGetItemRequest
+	client.batchWriteItemRequestFn = client.batchWriteItemRequest
+	return client, nil
+}
+
+func (a s3storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.S3")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
+
+	incomingChunks := make(chan chunk.Chunk)
+	incomingErrors := make(chan error)
+	for _, c := range chunks {
+		go func(c chunk.Chunk) {
+			c, err := a.getS3Chunk(ctx, c)
+			if err != nil {
+				incomingErrors <- err
+				return
+			}
+			incomingChunks <- c
+		}(c)
+	}
+
+	result := []chunk.Chunk{}
+	errors := []error{}
+	for i := 0; i < len(chunks); i++ {
+		select {
+		case chunk := <-incomingChunks:
+			result = append(result, chunk)
+		case err := <-incomingErrors:
+			errors = append(errors, err)
+		}
+	}
+
+	sp.LogFields(otlog.Int("chunks fetched", len(result)))
+	if len(errors) > 0 {
+		sp.LogFields(otlog.String("error", errors[0].Error()))
+		// Return any chunks we did receive: a partial result may be useful
+		return result, errors[0]
+	}
+	return result, nil
+}
+
+func (a s3storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.Chunk, error) {
+	var resp *s3.GetObjectOutput
+	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
+		var err error
+		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(a.bucketName),
+			Key:    aws.String(c.ExternalKey()),
+		})
+		return err
+	})
+	if err != nil {
+		return chunk.Chunk{}, err
+	}
+	defer resp.Body.Close()
+	buf, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return chunk.Chunk{}, err
+	}
+	decodeContext := chunk.NewDecodeContext()
+	if err := c.Decode(decodeContext, buf); err != nil {
+		return chunk.Chunk{}, err
+	}
+	return c, nil
+}
+
+func (a s3storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	var (
+		s3ChunkKeys []string
+		s3ChunkBufs [][]byte
+	)
+
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].ExternalKey()
+
+		s3ChunkKeys = append(s3ChunkKeys, key)
+		s3ChunkBufs = append(s3ChunkBufs, buf)
+	}
+
+	incomingErrors := make(chan error)
+	for i := range s3ChunkBufs {
+		go func(i int) {
+			incomingErrors <- a.putS3Chunk(ctx, s3ChunkKeys[i], s3ChunkBufs[i])
+		}(i)
+	}
+
+	var lastErr error
+	for range s3ChunkKeys {
+		err := <-incomingErrors
+		if err != nil {
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+func (a s3storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
+	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
+		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
+			Body:   bytes.NewReader(buf),
+			Bucket: aws.String(a.bucketName),
+			Key:    aws.String(key),
+		})
+		return err
+	})
+}

From 87212f6ce54f89d5e34a82fd1644f6aa7104cb1b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Mon, 8 Oct 2018 13:54:53 +0000
Subject: [PATCH 195/660] More useful printout for test differences

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/table_manager.go b/table_manager.go
index f29fd514c8709..fd3773167a996 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -308,7 +308,7 @@ func ExpectTables(ctx context.Context, client TableClient, expected []TableDesc)
 		}
 
 		if !desc.Equals(expect) {
-			return fmt.Errorf("Expected '%v', found '%v' for table '%s'", expect, desc, desc.Name)
+			return fmt.Errorf("Expected '%#v', found '%#v' for table '%s'", expect, desc, desc.Name)
 		}
 	}
 

From 52cd96d4046412ad49d82fc28467eb89dfe0c4b3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 19 Oct 2018 13:00:00 +0000
Subject: [PATCH 196/660] Convert schema config from flags to data structure

Also split schema from store creation in util
Move StorageClient param from storage to chunk package
Fix up main programs for new config
Remove TestNewStoreTimeConvergence() - it doesn't fit with new config
StorageOpts isn't used any more

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_table_client_test.go |  66 +++++---
 aws/fixtures.go                   |  13 +-
 aws/metrics_autoscaling_test.go   |  23 ++-
 aws/storage_client.go             |  29 +---
 cassandra/fixtures.go             |  12 +-
 cassandra/storage_client.go       |  17 +--
 chunk_store_test.go               |  81 +++++-----
 composite_store.go                | 168 +++-----------------
 composite_store_test.go           | 160 -------------------
 fixtures.go                       |  25 ++-
 gcp/fixtures.go                   |  14 +-
 gcp/storage_client.go             |  37 +----
 inmemory_storage_client.go        |  11 --
 schema.go                         |  83 ++--------
 schema_config.go                  | 246 +++++++++++++++++++++++++-----
 schema_config_test.go             |   8 +-
 schema_test.go                    |  43 +++---
 storage/factory.go                |  60 +++++---
 table_manager.go                  | 100 +++++++-----
 table_manager_test.go             | 138 ++++++++++-------
 testutils/testutils.go            |   5 +-
 21 files changed, 599 insertions(+), 740 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 666e0b0846dba..4f28aaf4badc9 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -15,7 +15,6 @@ import (
 	"github.com/weaveworks/common/mtime"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -45,7 +44,6 @@ func fixturePeriodicTableConfig(prefix string, inactLastN int64, writeScale, ina
 	return chunk.PeriodicTableConfig{
 		Prefix: prefix,
 		Period: tablePeriod,
-		From:   util.NewDayValue(model.TimeFromUnix(0)),
 		ProvisionedWriteThroughput: write,
 		ProvisionedReadThroughput:  read,
 		InactiveWriteThroughput:    inactiveWrite,
@@ -123,15 +121,28 @@ func TestTableManagerAutoScaling(t *testing.T) {
 	}
 
 	cfg := chunk.SchemaConfig{
-		UsePeriodicTables:   true,
-		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
-		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+		Configs: []chunk.PeriodConfig{
+			{
+				Store: "aws-dynamo",
+				IndexTables: chunk.PeriodicTableConfig{
+					InactiveReadThroughput:  inactiveRead,
+					InactiveWriteThroughput: inactiveWrite,
+				},
+			},
+			{
+				Store:       "aws-dynamo",
+				From:        model.TimeFromUnix(0),
+				IndexTables: fixturePeriodicTableConfig(tablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+			}},
+	}
+	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 	}
 
 	// Check tables are created with autoscale
 	{
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -147,10 +158,10 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are updated with new settings
 	{
-		cfg.IndexTables.WriteScale.OutCooldown = 200
-		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+		cfg.Configs[1].IndexTables.WriteScale.OutCooldown = 200
+		cfg.Configs[1].ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -166,10 +177,10 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are degristered when autoscaling is disabled for inactive tables
 	{
-		cfg.IndexTables.WriteScale.OutCooldown = 200
-		cfg.ChunkTables.WriteScale.TargetValue = 90.0
+		cfg.Configs[1].IndexTables.WriteScale.OutCooldown = 200
+		cfg.Configs[1].ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -186,10 +197,10 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are degristered when autoscaling is disabled entirely
 	{
-		cfg.IndexTables.WriteScale.Enabled = false
-		cfg.ChunkTables.WriteScale.Enabled = false
+		cfg.Configs[1].IndexTables.WriteScale.Enabled = false
+		cfg.Configs[1].ChunkTables.WriteScale.Enabled = false
 
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -214,15 +225,28 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 	}
 
 	cfg := chunk.SchemaConfig{
-		UsePeriodicTables:   true,
-		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
-		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+		Configs: []chunk.PeriodConfig{
+			{
+				Store: "aws-dynamo",
+				IndexTables: chunk.PeriodicTableConfig{
+					InactiveReadThroughput:  inactiveRead,
+					InactiveWriteThroughput: inactiveWrite,
+				},
+			},
+			{
+				Store:       "aws-dynamo",
+				IndexTables: fixturePeriodicTableConfig(tablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+			},
+		},
+	}
+	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 	}
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
 	{
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -238,7 +262,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables are autoscaled even if there are less than the limit.
 	{
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -255,7 +279,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables past the limit do not autoscale but the latest N do.
 	{
-		tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/aws/fixtures.go b/aws/fixtures.go
index f8e0ecd216dfc..22c4b645555d1 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -62,11 +62,14 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
 			dynamoDB := newMockDynamoDB(0, provisionedErr)
 			schemaCfg := chunk.SchemaConfig{
-				ChunkTables: chunk.PeriodicTableConfig{
-					From:   util.NewDayValue(model.Now()),
-					Period: 10 * time.Minute,
-					Prefix: "chunks",
-				},
+				Configs: []chunk.PeriodConfig{{
+					Store: "aws",
+					From:  model.Now(),
+					ChunkTables: chunk.PeriodicTableConfig{
+						Prefix: "chunks",
+						Period: 10 * time.Minute,
+					},
+				}},
 			}
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 08eb87328ed0a..20409f9e4080a 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -38,14 +38,27 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 
 	// Set up table-manager config
 	cfg := chunk.SchemaConfig{
-		OriginalTableName:   "a",
-		UsePeriodicTables:   true,
-		IndexTables:         fixturePeriodicTableConfig(tablePrefix, 2, indexWriteScale, inactiveWriteScale),
-		ChunkTables:         fixturePeriodicTableConfig(chunkTablePrefix, 2, chunkWriteScale, inactiveWriteScale),
+		Configs: []chunk.PeriodConfig{
+			{
+				Store: "aws-dynamo",
+				IndexTables: chunk.PeriodicTableConfig{
+					Prefix:                  "a",
+					InactiveReadThroughput:  inactiveRead,
+					InactiveWriteThroughput: inactiveWrite,
+				},
+			},
+			{
+				Store:       "aws-dynamo",
+				IndexTables: fixturePeriodicTableConfig(tablePrefix, 2, indexWriteScale, inactiveWriteScale),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 2, chunkWriteScale, inactiveWriteScale),
+			},
+		},
+	}
+	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 	}
 
-	tableManager, err := chunk.NewTableManager(cfg, maxChunkAge, client)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/aws/storage_client.go b/aws/storage_client.go
index e94d34a13ab2d..46ed06e6ec2b1 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -20,7 +20,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
@@ -148,30 +147,6 @@ type storageClient struct {
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
-// Opts returns the chunk.StorageOpt's for the config.
-func Opts(cfg StorageConfig, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	client, err := NewS3StorageClient(cfg, schemaCfg)
-	if err != nil {
-		return nil, err
-	}
-
-	opts := []chunk.StorageOpt{}
-	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
-	if schemaCfg.ChunkTables.From.IsSet() {
-		client, err = NewStorageClient(cfg.DynamoDBConfig, schemaCfg)
-		if err != nil {
-			return nil, err
-		}
-
-		opts = append(opts, chunk.StorageOpt{
-			From:   schemaCfg.ChunkTables.From.Time,
-			Client: client,
-		})
-	}
-
-	return opts, nil
-}
-
 // NewStorageClient makes a new AWS-backed StorageClient.
 func NewStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
@@ -527,7 +502,7 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 	for _, chunk := range chunks {
 		key := chunk.ExternalKey()
 		chunksByKey[key] = chunk
-		tableName := a.schemaCfg.ChunkTables.TableFor(chunk.From)
+		tableName := a.schemaCfg.ChunkTableFor(chunk.From)
 		outstanding.Add(tableName, key, placeholder)
 	}
 
@@ -650,7 +625,7 @@ func (a storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) erro
 		}
 		key := chunks[i].ExternalKey()
 
-		table := a.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+		table := a.schemaCfg.ChunkTableFor(chunks[i].From)
 		dynamoDBWrites.Add(table, key, placeholder, buf)
 	}
 
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index acffba4345444..361cb0acce7d8 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -2,12 +2,10 @@ package cassandra
 
 import (
 	"context"
-	"flag"
 	"os"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
 )
 
@@ -50,15 +48,7 @@ func Fixtures() ([]testutils.Fixture, error) {
 	}
 
 	// Get a SchemaConfig with the defaults.
-	flagSet := flag.NewFlagSet("flags", flag.PanicOnError)
-	schemaConfig := chunk.SchemaConfig{}
-	schemaConfig.RegisterFlags(flagSet)
-	err := flagSet.Parse([]string{})
-	if err != nil {
-		return nil, err
-	}
-	schemaConfig.IndexTables.From = util.NewDayValue(model.Now())
-	schemaConfig.ChunkTables.From = util.NewDayValue(model.Now())
+	schemaConfig := chunk.DefaultSchemaConfig("cassandra", "v1", model.Now())
 
 	storageClient, err := NewStorageClient(cfg, schemaConfig)
 	if err != nil {
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index dace1364e6ee4..42a862c592894 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -9,7 +9,6 @@ import (
 
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
-	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
@@ -125,18 +124,6 @@ type storageClient struct {
 	session   *gocql.Session
 }
 
-// Opts returns the chunk.StorageOpt's for the config.
-func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	client, err := NewStorageClient(cfg, schemaCfg)
-	if err != nil {
-		return nil, err
-	}
-	return []chunk.StorageOpt{{
-		From:   model.Time(0),
-		Client: client,
-	}}, err
-}
-
 // NewStorageClient returns a new StorageClient.
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
 	session, err := cfg.session()
@@ -278,7 +265,7 @@ func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 			return errors.WithStack(err)
 		}
 		key := chunks[i].ExternalKey()
-		tableName := s.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
 
 		// Must provide a range key, even though its not useds - hence 0x00.
 		q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
@@ -321,7 +308,7 @@ func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 }
 
 func (s *storageClient) getChunk(ctx context.Context, input chunk.Chunk) (chunk.Chunk, error) {
-	tableName := s.schemaCfg.ChunkTables.TableFor(input.From)
+	tableName := s.schemaCfg.ChunkTableFor(input.From)
 	var buf []byte
 	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
 		WithContext(ctx).Scan(&buf); err != nil {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index c52e8525ca277..310ea8fbe16fa 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -24,23 +24,19 @@ import (
 	"github.com/weaveworks/common/user"
 )
 
-type schemaFactory func(cfg SchemaConfig) Schema
-type storeFactory func(StoreConfig, Schema, StorageClient, *validation.Overrides) (Store, error)
-type configFactory func() (StoreConfig, SchemaConfig)
+type configFactory func() StoreConfig
 
 var schemas = []struct {
 	name              string
-	schemaFn          schemaFactory
-	storeFn           storeFactory
 	requireMetricName bool
 }{
-	{"v1 schema", v1Schema, newStore, true},
-	{"v2 schema", v2Schema, newStore, true},
-	{"v3 schema", v3Schema, newStore, true},
-	{"v4 schema", v4Schema, newStore, true},
-	{"v5 schema", v5Schema, newStore, true},
-	{"v6 schema", v6Schema, newStore, true},
-	{"v9 schema", v9Schema, newSeriesStore, true},
+	{"v1", true},
+	{"v2", true},
+	{"v3", true},
+	{"v4", true},
+	{"v5", true},
+	{"v6", true},
+	{"v9", true},
 }
 
 var stores = []struct {
@@ -49,58 +45,61 @@ var stores = []struct {
 }{
 	{
 		name: "store",
-		configFn: func() (StoreConfig, SchemaConfig) {
+		configFn: func() StoreConfig {
 			var (
-				storeCfg  StoreConfig
-				schemaCfg SchemaConfig
+				storeCfg StoreConfig
 			)
-			util.DefaultValues(&storeCfg, &schemaCfg)
-			return storeCfg, schemaCfg
+			util.DefaultValues(&storeCfg)
+			return storeCfg
 		},
 	},
 	{
 		name: "cached_store",
-		configFn: func() (StoreConfig, SchemaConfig) {
+		configFn: func() StoreConfig {
 			var (
-				storeCfg  StoreConfig
-				schemaCfg SchemaConfig
+				storeCfg StoreConfig
 			)
-			util.DefaultValues(&storeCfg, &schemaCfg)
+			util.DefaultValues(&storeCfg)
 
 			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
 				Size: 500,
 			})
 
-			return storeCfg, schemaCfg
+			return storeCfg
 		},
 	},
 }
 
 // newTestStore creates a new Store for testing.
-func newTestChunkStore(t *testing.T, schemaFactory schemaFactory, storeFactory storeFactory) Store {
+func newTestChunkStore(t *testing.T, schemaName string) Store {
 	var (
-		storeCfg  StoreConfig
-		schemaCfg SchemaConfig
+		storeCfg StoreConfig
 	)
-	util.DefaultValues(&storeCfg, &schemaCfg)
-	return newTestChunkStoreConfig(t, storeCfg, schemaCfg, schemaFactory, storeFactory)
+	util.DefaultValues(&storeCfg)
+	return newTestChunkStoreConfig(t, schemaName, storeCfg)
 }
 
-func newTestChunkStoreConfig(t *testing.T, storeCfg StoreConfig, schemaCfg SchemaConfig, schemaFactory schemaFactory, storeFactory storeFactory) Store {
+func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConfig) Store {
+	var (
+		tbmConfig TableManagerConfig
+		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
+	)
+	util.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(schemaCfg, maxChunkAge, storage)
+	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
 
 	err = tableManager.SyncTables(context.Background())
 	require.NoError(t, err)
 
 	var limits validation.Limits
-	util.DefaultValues(&storeCfg, &schemaCfg, &limits)
+	util.DefaultValues(&limits)
 	limits.MaxQueryLength = 30 * 24 * time.Hour
 	overrides, err := validation.NewOverrides(limits)
 	require.NoError(t, err)
 
-	store, err := storeFactory(storeCfg, schemaFactory(schemaCfg), storage, overrides)
+	store := NewCompositeStore()
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, overrides)
 	require.NoError(t, err)
 	return store
 }
@@ -250,8 +249,8 @@ func TestChunkStore_Get(t *testing.T) {
 			for _, storeCase := range stores {
 				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
 					t.Log("========= Running query", tc.query, "with schema", schema.name)
-					storeCfg, schemaCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, schema.schemaFn, schema.storeFn)
+					storeCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
 					defer store.Stop()
 
 					if err := store.Put(ctx, []Chunk{
@@ -371,8 +370,8 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			for _, storeCase := range stores {
 				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
 					t.Log("========= Running query", tc.query, "with schema", schema.name)
-					storeCfg, schemaCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, schema.schemaFn, schema.storeFn)
+					storeCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
 					defer store.Stop()
 
 					if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
@@ -409,7 +408,7 @@ func TestChunkStoreRandom(t *testing.T) {
 
 	for _, schema := range schemas {
 		t.Run(schema.name, func(t *testing.T) {
-			store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+			store := newTestChunkStore(t, schema.name)
 			defer store.Stop()
 
 			// put 100 chunks from 0 to 99
@@ -473,7 +472,7 @@ func TestChunkStoreRandom(t *testing.T) {
 func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
 	ctx := user.InjectOrgID(context.Background(), userID)
-	store := newTestChunkStore(t, v6Schema, newStore)
+	store := newTestChunkStore(t, "v6")
 	defer store.Stop()
 
 	// Put 24 chunks 1hr chunks in the store
@@ -538,12 +537,12 @@ func TestIndexCachingWorks(t *testing.T) {
 		"bar": "baz",
 	}
 	storeMaker := stores[1]
-	storeCfg, schemaCfg := storeMaker.configFn()
+	storeCfg := storeMaker.configFn()
 
-	store := newTestChunkStoreConfig(t, storeCfg, schemaCfg, v9Schema, newSeriesStore)
+	store := newTestChunkStoreConfig(t, "v9", storeCfg)
 	defer store.Stop()
 
-	storage := store.(*seriesStore).storage.(*MockStorage)
+	storage := store.(CompositeStore).stores[0].Store.(*seriesStore).storage.(*MockStorage)
 
 	fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
 	err := store.Put(ctx, []Chunk{fooChunk1})
@@ -591,7 +590,7 @@ func TestChunkStoreError(t *testing.T) {
 	} {
 		for _, schema := range schemas {
 			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				store := newTestChunkStore(t, schema.schemaFn, schema.storeFn)
+				store := newTestChunkStore(t, schema.name)
 				defer store.Stop()
 
 				matchers, err := promql.ParseMetricSelector(tc.query)
diff --git a/composite_store.go b/composite_store.go
index 5d156499bd6e3..185c5691c37b7 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -7,7 +7,6 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
@@ -19,8 +18,12 @@ type Store interface {
 	Stop()
 }
 
-// compositeStore is a Store which delegates to various stores depending
+// CompositeStore is a Store which delegates to various stores depending
 // on when they were activated.
+type CompositeStore struct {
+	compositeStore
+}
+
 type compositeStore struct {
 	stores []compositeStoreEntry
 }
@@ -30,157 +33,28 @@ type compositeStoreEntry struct {
 	Store
 }
 
-type byStart []compositeStoreEntry
-
-func (a byStart) Len() int           { return len(a) }
-func (a byStart) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
-func (a byStart) Less(i, j int) bool { return a[i].start < a[j].start }
-
-// SchemaOpt stores when a schema starts.
-type SchemaOpt struct {
-	From     model.Time
-	NewStore func(StorageClient) (Store, error)
-}
-
-// SchemaOpts returns the schemas and the times when they activate.
-func SchemaOpts(cfg StoreConfig, schemaCfg SchemaConfig, limits *validation.Overrides) []SchemaOpt {
-	opts := []SchemaOpt{{
-		From: 0,
-		NewStore: func(storage StorageClient) (Store, error) {
-			return newStore(cfg, v1Schema(schemaCfg), storage, limits)
-		},
-	}}
-
-	if schemaCfg.DailyBucketsFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.DailyBucketsFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v2Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	if schemaCfg.Base64ValuesFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.Base64ValuesFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v3Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	if schemaCfg.V4SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V4SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v4Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	if schemaCfg.V5SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V5SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v5Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	if schemaCfg.V6SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V6SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newStore(cfg, v6Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	if schemaCfg.V9SchemaFrom.IsSet() {
-		opts = append(opts, SchemaOpt{
-			From: schemaCfg.V9SchemaFrom.Time,
-			NewStore: func(storage StorageClient) (Store, error) {
-				return newSeriesStore(cfg, v9Schema(schemaCfg), storage, limits)
-			},
-		})
-	}
-
-	return opts
-}
-
-// StorageOpt stores when a StorageClient is to be used.
-type StorageOpt struct {
-	From   model.Time
-	Client StorageClient
+// NewCompositeStore creates a new Store which delegates to different stores depending
+// on time.
+func NewCompositeStore() CompositeStore {
+	return CompositeStore{}
 }
 
-func latest(a, b model.Time) model.Time {
-	if a.Before(b) {
-		return b
+// AddPeriod adds the configuration for a period of time to the CompositeStore
+func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, storage StorageClient, limits *validation.Overrides) error {
+	schema := cfg.createSchema()
+	var store Store
+	var err error
+	switch cfg.Schema {
+	case "v9":
+		store, err = newSeriesStore(storeCfg, schema, storage, limits)
+	default:
+		store, err = newStore(storeCfg, schema, storage, limits)
 	}
-	return a
-}
-
-// NewStore creates a new Store which delegates to different stores depending
-// on time.
-func NewStore(cfg StoreConfig, schemaCfg SchemaConfig, storageOpts []StorageOpt, limits *validation.Overrides) (Store, error) {
-	cache, err := cache.New(cfg.ChunkCacheConfig)
 	if err != nil {
-		return nil, err
-	}
-	cfg.ChunkCacheConfig.Cache = cache
-
-	schemaOpts := SchemaOpts(cfg, schemaCfg, limits)
-
-	return newCompositeStore(cfg, schemaCfg, schemaOpts, storageOpts)
-}
-
-func newCompositeStore(cfg StoreConfig, schemaCfg SchemaConfig, schemaOpts []SchemaOpt, storageOpts []StorageOpt) (Store, error) {
-	stores := []compositeStoreEntry{}
-	add := func(i, j int) error {
-		schemaOpt := schemaOpts[i]
-		storageOpt := storageOpts[j]
-		store, err := schemaOpt.NewStore(storageOpt.Client)
-		stores = append(stores, compositeStoreEntry{latest(schemaOpt.From, storageOpt.From), store})
 		return err
 	}
-
-	i, j := 0, 0
-	for i+1 < len(schemaOpts) && j+1 < len(storageOpts) {
-		if err := add(i, j); err != nil {
-			return nil, err
-		}
-
-		// Increment the interval that finished first.
-		nextSchemaOpt := schemaOpts[i+1]
-		nextStorageOpt := storageOpts[j+1]
-		if nextSchemaOpt.From.Before(nextStorageOpt.From) {
-			i++
-		} else if nextSchemaOpt.From.After(nextStorageOpt.From) {
-			j++
-		} else {
-			i++
-			j++
-		}
-	}
-
-	for ; i+1 < len(schemaOpts); i++ {
-		if err := add(i, j); err != nil {
-			return nil, err
-		}
-	}
-
-	for ; j+1 < len(storageOpts); j++ {
-		if err := add(i, j); err != nil {
-			return nil, err
-		}
-	}
-
-	if err := add(i, j); err != nil {
-		return nil, err
-	}
-
-	return compositeStore{stores}, nil
+	c.stores = append(c.stores, compositeStoreEntry{start: cfg.From, Store: store})
+	return nil
 }
 
 func (c compositeStore) Put(ctx context.Context, chunks []Chunk) error {
diff --git a/composite_store_test.go b/composite_store_test.go
index 947a5003627b5..3bb1cb2cb657e 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -4,13 +4,10 @@ import (
 	"context"
 	"fmt"
 	"reflect"
-	"strconv"
 	"testing"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/stretchr/testify/assert"
-	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 )
 
@@ -172,160 +169,3 @@ func TestCompositeStore(t *testing.T) {
 		})
 	}
 }
-
-type dummy struct {
-	version int
-
-	// Include nil-implementations of these interfaces so I don't have to stub out
-	// the methods.
-	StorageClient
-	Store
-}
-
-func dummySchema(from model.Time, version int) SchemaOpt {
-	return SchemaOpt{
-		From: from,
-		NewStore: func(s StorageClient) (Store, error) {
-			return dummy{
-				version:       version,
-				StorageClient: s,
-			}, nil
-		},
-	}
-}
-
-func TestNewStoreTimeConvergence(t *testing.T) {
-	oldClient := dummy{version: -1}
-	newClient := dummy{version: -2}
-	newerClient := dummy{version: -3}
-
-	type expectation struct {
-		time   model.Time
-		schema int
-		client StorageClient
-	}
-
-	for i, testcase := range []struct {
-		schemaOpts  []SchemaOpt
-		storageOpts []StorageOpt
-
-		expected []expectation
-	}{
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-				dummySchema(10, 2),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 2, oldClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-				{10, newClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 1, newClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-				dummySchema(10, 2),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-				{10, newClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 2, newClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-				dummySchema(20, 2),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-				{10, newClient},
-				{30, newerClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 1, newClient},
-				{20, 2, newClient},
-				{30, 2, newerClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-				dummySchema(10, 2),
-				dummySchema(30, 3),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-				{20, newClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 2, oldClient},
-				{20, 2, newClient},
-				{30, 3, newClient},
-			},
-		},
-		{
-			schemaOpts: []SchemaOpt{
-				dummySchema(0, 1),
-				dummySchema(10, 2),
-				dummySchema(20, 3),
-				dummySchema(40, 4),
-			},
-			storageOpts: []StorageOpt{
-				{0, oldClient},
-				{20, newClient},
-			},
-			expected: []expectation{
-				{0, 1, oldClient},
-				{10, 2, oldClient},
-				{20, 3, newClient},
-				{40, 4, newClient},
-			},
-		},
-	} {
-		t.Run(strconv.Itoa(i), func(t *testing.T) {
-			store, err := newCompositeStore(StoreConfig{}, SchemaConfig{}, testcase.schemaOpts, testcase.storageOpts)
-			require.NoError(t, err)
-			cs := store.(compositeStore)
-			require.Equal(t, len(testcase.expected), len(cs.stores))
-
-			for i, store := range cs.stores {
-				assert.Equal(t, testcase.expected[i].time, store.start, "%d", i)
-				assert.Equal(t, testcase.expected[i].schema, store.Store.(dummy).version, "%d", i)
-				assert.Equal(t, testcase.expected[i].client, store.Store.(dummy).StorageClient, "%d", i)
-			}
-		})
-	}
-}
diff --git a/fixtures.go b/fixtures.go
index 330a4d1d5e4e2..7517dcee3bd51 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -1,6 +1,10 @@
 package chunk
 
-import "github.com/prometheus/common/model"
+import (
+	"time"
+
+	"github.com/prometheus/common/model"
+)
 
 // BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
 var BenchmarkMetric = model.Metric{
@@ -22,3 +26,22 @@ var BenchmarkMetric = model.Metric{
 	"namespace":              "kube-system",
 	"pod_name":               "some-other-name-5j8s8",
 }
+
+// DefaultSchemaConfig creates a simple schema config for testing
+func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
+	return SchemaConfig{
+		Configs: []PeriodConfig{{
+			Store:  store,
+			Schema: schema,
+			From:   from,
+			ChunkTables: PeriodicTableConfig{
+				Prefix: "cortex",
+				Period: 7 * 24 * time.Hour,
+			},
+			IndexTables: PeriodicTableConfig{
+				Prefix: "cortex_chunks",
+				Period: 7 * 24 * time.Hour,
+			},
+		}},
+	}
+}
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index ca1c3f48fd64b..0f0b607eaa92b 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -12,7 +12,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -56,11 +55,14 @@ func (f *fixture) Clients() (
 	}
 
 	schemaConfig = chunk.SchemaConfig{
-		ChunkTables: chunk.PeriodicTableConfig{
-			From:   util.NewDayValue(model.Now()),
-			Period: 10 * time.Minute,
-			Prefix: "chunks",
-		},
+		Configs: []chunk.PeriodConfig{{
+			Store: "gcp",
+			From:  model.Now(),
+			ChunkTables: chunk.PeriodicTableConfig{
+				Prefix: "chunks",
+				Period: 10 * time.Minute,
+			},
+		}},
 	}
 	tClient = &tableClient{
 		client: adminClient,
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 8593bc957bb08..288c1329311bf 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -10,7 +10,6 @@ import (
 	"cloud.google.com/go/bigtable"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
-	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
@@ -40,38 +39,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.instance, "bigtable.instance", "", "Bigtable instance ID.")
 }
 
-// Opts returns the chunk.StorageOpt's for the config.
-func Opts(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	client, err := NewStorageClientV1(ctx, cfg, schemaCfg)
-	if err != nil {
-		return nil, err
-	}
-
-	opts := []chunk.StorageOpt{}
-	opts = append(opts, chunk.StorageOpt{From: model.Time(0), Client: client})
-	if schemaCfg.BigtableColumnKeyFrom.IsSet() {
-		client, err = NewStorageClientColumnKey(context.Background(), cfg, schemaCfg)
-		if err != nil {
-			return nil, errors.Wrap(err, "error creating storage client")
-		}
-
-		opts = append(opts, chunk.StorageOpt{
-			From:   schemaCfg.BigtableColumnKeyFrom.Time,
-			Client: client,
-		})
-	}
-
-	return opts, nil
-}
-
-// NewStorageClient returns a new StorageClient.
-func NewStorageClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
-	if cfg.ColumnKey {
-		return NewStorageClientColumnKey(ctx, cfg, schemaCfg)
-	}
-	return NewStorageClientV1(ctx, cfg, schemaCfg)
-}
-
 // storageClientColumnKey implements chunk.storageClient for GCP.
 type storageClientColumnKey struct {
 	cfg       Config
@@ -353,7 +320,7 @@ func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.C
 			return err
 		}
 		key := chunks[i].ExternalKey()
-		tableName := s.schemaCfg.ChunkTables.TableFor(chunks[i].From)
+		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
 		keys[tableName] = append(keys[tableName], key)
 
 		mut := bigtable.NewMutation()
@@ -384,7 +351,7 @@ func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Ch
 	chunks := map[string]map[string]chunk.Chunk{}
 	keys := map[string]bigtable.RowList{}
 	for _, c := range input {
-		tableName := s.schemaCfg.ChunkTables.TableFor(c.From)
+		tableName := s.schemaCfg.ChunkTableFor(c.From)
 		key := c.ExternalKey()
 		keys[tableName] = append(keys[tableName], key)
 		if _, ok := chunks[tableName]; !ok {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index b764684764117..1a387992a4d81 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -9,7 +9,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
-	"github.com/prometheus/common/model"
 )
 
 // MockStorage is a fake in-memory StorageClient.
@@ -31,16 +30,6 @@ type mockItem struct {
 	value      []byte
 }
 
-// Opts returns the chunk.StorageOpt's for the config.
-func Opts() ([]StorageOpt, error) {
-	client := NewMockStorage()
-
-	return []StorageOpt{{
-		From:   model.Time(0),
-		Client: client,
-	}}, nil
-}
-
 // NewMockStorage creates a new MockStorage.
 func NewMockStorage() *MockStorage {
 	return &MockStorage{
diff --git a/schema.go b/schema.go
index d04ca94cba318..c326137b23c99 100644
--- a/schema.go
+++ b/schema.go
@@ -72,73 +72,6 @@ type IndexEntry struct {
 	Value []byte
 }
 
-// v1Schema was:
-// - hash key: <userid>:<hour bucket>:<metric name>
-// - range key: <label name>\0<label value>\0<chunk name>
-func v1Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.hourlyBuckets,
-		originalEntries{},
-	}
-}
-
-// v2Schema went to daily buckets in the hash key
-// - hash key: <userid>:d<day bucket>:<metric name>
-func v2Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		originalEntries{},
-	}
-}
-
-// v3Schema went to base64 encoded label values & a version ID
-// - range key: <label name>\0<base64(label value)>\0<chunk name>\0<version 1>
-func v3Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		base64Entries{originalEntries{}},
-	}
-}
-
-// v4 schema went to two schemas in one:
-// 1) - hash key: <userid>:<hour bucket>:<metric name>:<label name>
-//    - range key: \0<base64(label value)>\0<chunk name>\0<version 2>
-// 2) - hash key: <userid>:<hour bucket>:<metric name>
-//    - range key: \0\0<chunk name>\0<version 3>
-func v4Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		labelNameInHashKeyEntries{},
-	}
-}
-
-// v5 schema is an extension of v4, with the chunk end time in the
-// range key to improve query latency.  However, it did it wrong
-// so the chunk end times are ignored.
-func v5Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		v5Entries{},
-	}
-}
-
-// v6 schema is an extension of v5, with correct chunk end times, and
-// the label value moved out of the range key.
-func v6Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		v6Entries{},
-	}
-}
-
-// v9 schema index series, not chunks.
-func v9Schema(cfg SchemaConfig) Schema {
-	return schema{
-		cfg.dailyBuckets,
-		v9Entries{},
-	}
-}
-
 // schema implements Schema given a bucketing function and and set of range key callbacks
 type schema struct {
 	buckets func(from, through model.Time, userID string) []Bucket
@@ -270,6 +203,10 @@ type entries interface {
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 }
 
+// original entries:
+// - hash key: <userid>:<bucket>:<metric name>
+// - range key: <label name>\0<label value>\0<chunk name>
+
 type originalEntries struct{}
 
 func (originalEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
@@ -335,6 +272,9 @@ func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, err
 	return nil, ErrNotSupported
 }
 
+// v3Schema went to base64 encoded label values & a version ID
+// - range key: <label name>\0<base64(label value)>\0<chunk name>\0<version 1>
+
 type base64Entries struct {
 	originalEntries
 }
@@ -375,6 +315,11 @@ func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName mo
 	}, nil
 }
 
+// v4 schema went to two schemas in one:
+// 1) - hash key: <userid>:<hour bucket>:<metric name>:<label name>
+//    - range key: \0<base64(label value)>\0<chunk name>\0<version 2>
+// 2) - hash key: <userid>:<hour bucket>:<metric name>
+//    - range key: \0\0<chunk name>\0<version 3>
 type labelNameInHashKeyEntries struct{}
 
 func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
@@ -442,7 +387,9 @@ func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]Index
 	return nil, ErrNotSupported
 }
 
-// v5Entries includes chunk end time in range key - see #298.
+// v5 schema is an extension of v4, with the chunk end time in the
+// range key to improve query latency.  However, it did it wrong
+// so the chunk end times are ignored.
 type v5Entries struct{}
 
 func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
diff --git a/schema_config.go b/schema_config.go
index b430587278a3f..f861472693af4 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -3,10 +3,12 @@ package chunk
 import (
 	"flag"
 	"fmt"
+	"os"
 	"strconv"
 	"time"
 
 	"github.com/prometheus/common/model"
+	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/mtime"
@@ -19,8 +21,27 @@ const (
 	millisecondsInDay  = int64(24 * time.Hour / time.Millisecond)
 )
 
+// PeriodConfig defines the schema and tables to use for a period of time
+type PeriodConfig struct {
+	From        model.Time          `yaml:"from"`
+	Store       string              `yaml:"store"`
+	Schema      string              `yaml:"schema"`
+	IndexTables PeriodicTableConfig `yaml:"index"`
+	ChunkTables PeriodicTableConfig `yaml:"chunks,omitempty"`
+}
+
 // SchemaConfig contains the config for our chunk index schemas
 type SchemaConfig struct {
+	Configs []PeriodConfig `yaml:"configs"`
+
+	fileName string
+	legacy   LegacySchemaConfig // if fileName is set then legacy config is ignored
+}
+
+// LegacySchemaConfig lets you configure schema via command-line flags
+type LegacySchemaConfig struct {
+	StorageClient string // aws, gcp, etc.
+
 	// After midnight on this day, we start bucketing indexes by day instead of by
 	// hour.  Only the day matters, not the time within the day.
 	DailyBucketsFrom      util.DayValue
@@ -31,24 +52,24 @@ type SchemaConfig struct {
 	V9SchemaFrom          util.DayValue
 	BigtableColumnKeyFrom util.DayValue
 
-	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
-	ThroughputUpdatesDisabled bool
-
-	// Period with which the table manager will poll for tables.
-	DynamoDBPollInterval time.Duration
-
-	// duration a table will be created before it is needed.
-	CreationGracePeriod time.Duration
-
 	// Config for the index & chunk tables.
 	OriginalTableName string
 	UsePeriodicTables bool
+	IndexTablesFrom   util.DayValue
 	IndexTables       PeriodicTableConfig
+	ChunkTablesFrom   util.DayValue
 	ChunkTables       PeriodicTableConfig
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.StringVar(&cfg.fileName, "config-yaml", "", "Schema config yaml")
+	cfg.legacy.RegisterFlags(f)
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *LegacySchemaConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, cassandra, inmemory).")
 	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
 	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
 	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
@@ -57,25 +78,159 @@ func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
 	f.Var(&cfg.BigtableColumnKeyFrom, "bigtable.column-key-from", "The date (in the format YYYY-MM-DD) after which we use bigtable column keys.")
 
-	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
-	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
-	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
-
 	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
 	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
 
+	f.Var(&cfg.IndexTablesFrom, "dynamodb.periodic-table.from", "Date after which to use periodic tables.")
 	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", "cortex_", f)
+	f.Var(&cfg.ChunkTablesFrom, "dynamodb.chunk-table.from", "Date after which to write chunks to DynamoDB.")
 	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
 }
 
-func (cfg *SchemaConfig) tableForBucket(bucketStart int64) string {
-	if !cfg.UsePeriodicTables || bucketStart < (cfg.IndexTables.From.Unix()) {
-		return cfg.OriginalTableName
+// translate from command-line parameters into new config data structure
+func (cfg *SchemaConfig) translate() error {
+	cfg.Configs = []PeriodConfig{}
+
+	add := func(t string, f model.Time) {
+		cfg.Configs = append(cfg.Configs, PeriodConfig{
+			From:   f,
+			Schema: t,
+			Store:  cfg.legacy.StorageClient,
+			IndexTables: PeriodicTableConfig{
+				Prefix: cfg.legacy.OriginalTableName,
+			},
+		})
+	}
+
+	add("v1", 0)
+
+	if cfg.legacy.DailyBucketsFrom.IsSet() {
+		add("v2", cfg.legacy.DailyBucketsFrom.Time)
+	}
+	if cfg.legacy.Base64ValuesFrom.IsSet() {
+		add("v3", cfg.legacy.Base64ValuesFrom.Time)
+	}
+	if cfg.legacy.V4SchemaFrom.IsSet() {
+		add("v4", cfg.legacy.V4SchemaFrom.Time)
+	}
+	if cfg.legacy.V5SchemaFrom.IsSet() {
+		add("v5", cfg.legacy.V5SchemaFrom.Time)
+	}
+	if cfg.legacy.V6SchemaFrom.IsSet() {
+		add("v6", cfg.legacy.V6SchemaFrom.Time)
+	}
+	if cfg.legacy.V9SchemaFrom.IsSet() {
+		add("v9", cfg.legacy.V9SchemaFrom.Time)
+	}
+
+	cfg.ForEachAfter(cfg.legacy.IndexTablesFrom.Time, func(config *PeriodConfig) {
+		config.IndexTables = cfg.legacy.IndexTables.clean()
+	})
+	if cfg.legacy.ChunkTablesFrom.IsSet() {
+		cfg.ForEachAfter(cfg.legacy.ChunkTablesFrom.Time, func(config *PeriodConfig) {
+			config.Store = "aws-dynamo"
+			config.ChunkTables = cfg.legacy.ChunkTables.clean()
+		})
+	}
+	if cfg.legacy.BigtableColumnKeyFrom.IsSet() {
+		cfg.ForEachAfter(cfg.legacy.BigtableColumnKeyFrom.Time, func(config *PeriodConfig) {
+			config.Store = "gcp-columnkey"
+		})
+	}
+	return nil
+}
+
+func (cfg PeriodicTableConfig) clean() PeriodicTableConfig {
+	cfg.WriteScale.clean()
+	cfg.InactiveWriteScale.clean()
+	return cfg
+}
+
+func (cfg *AutoScalingConfig) clean() {
+	if !cfg.Enabled {
+		// Blank the default values from flag since they aren't used
+		cfg.MinCapacity = 0
+		cfg.MaxCapacity = 0
+		cfg.OutCooldown = 0
+		cfg.InCooldown = 0
+		cfg.TargetValue = 0
+	}
+}
+
+// ForEachAfter will call f() on every entry after t, splitting
+// entries if necessary so there is an entry starting at t
+func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
+	for i := 0; i < len(cfg.Configs); i++ {
+		if t > cfg.Configs[i].From &&
+			(i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
+			// Split the i'th entry by duplicating then overwriting the From time
+			cfg.Configs = append(cfg.Configs[:i+1], cfg.Configs[i:]...)
+			cfg.Configs[i+1].From = t
+		}
+		if cfg.Configs[i].From >= t {
+			f(&cfg.Configs[i])
+		}
+	}
+}
+
+func (cfg PeriodConfig) createSchema() Schema {
+	var s schema
+	switch cfg.Schema {
+	case "v1":
+		s = schema{cfg.hourlyBuckets, originalEntries{}}
+	case "v2":
+		s = schema{cfg.dailyBuckets, originalEntries{}}
+	case "v3":
+		s = schema{cfg.dailyBuckets, base64Entries{originalEntries{}}}
+	case "v4":
+		s = schema{cfg.dailyBuckets, labelNameInHashKeyEntries{}}
+	case "v5":
+		s = schema{cfg.dailyBuckets, v5Entries{}}
+	case "v6":
+		s = schema{cfg.dailyBuckets, v6Entries{}}
+	case "v9":
+		s = schema{cfg.dailyBuckets, v9Entries{}}
+	}
+	return s
+}
+
+func (cfg *PeriodConfig) tableForBucket(bucketStart int64) string {
+	if cfg.IndexTables.Period == 0 {
+		return cfg.IndexTables.Prefix
 	}
 	// TODO remove reference to time package here
 	return cfg.IndexTables.Prefix + strconv.Itoa(int(bucketStart/int64(cfg.IndexTables.Period/time.Second)))
 }
 
+// Load the yaml file, or build the config from legacy command-line flags
+func (cfg *SchemaConfig) Load() error {
+	if len(cfg.Configs) > 0 {
+		return nil
+	}
+	if cfg.fileName == "" {
+		return cfg.translate()
+	}
+
+	f, err := os.Open(cfg.fileName)
+	if err != nil {
+		return err
+	}
+
+	decoder := yaml.NewDecoder(f)
+	decoder.SetStrict(true)
+	if err := decoder.Decode(&cfg); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// PrintYaml dumps the yaml to stdout, to aid in migration
+func (cfg SchemaConfig) PrintYaml() {
+	encoder := yaml.NewEncoder(os.Stdout)
+	encoder.Encode(cfg)
+}
+
 // Bucket describes a range of time with a tableName and hashKey
 type Bucket struct {
 	from      uint32
@@ -84,7 +239,7 @@ type Bucket struct {
 	hashKey   string
 }
 
-func (cfg SchemaConfig) hourlyBuckets(from, through model.Time, userID string) []Bucket {
+func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string) []Bucket {
 	var (
 		fromHour    = from.Unix() / secondsInHour
 		throughHour = through.Unix() / secondsInHour
@@ -109,7 +264,7 @@ func (cfg SchemaConfig) hourlyBuckets(from, through model.Time, userID string) [
 	return result
 }
 
-func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []Bucket {
+func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) []Bucket {
 	var (
 		fromDay    = from.Unix() / secondsInDay
 		throughDay = through.Unix() / secondsInDay
@@ -146,24 +301,22 @@ func (cfg SchemaConfig) dailyBuckets(from, through model.Time, userID string) []
 
 // PeriodicTableConfig is configuration for a set of time-sharded tables.
 type PeriodicTableConfig struct {
-	From   util.DayValue
-	Prefix string
-	Period time.Duration
-	Tags   Tags
-
-	ProvisionedWriteThroughput int64
-	ProvisionedReadThroughput  int64
-	InactiveWriteThroughput    int64
-	InactiveReadThroughput     int64
-
-	WriteScale              AutoScalingConfig
-	InactiveWriteScale      AutoScalingConfig
-	InactiveWriteScaleLastN int64
+	Prefix string        `yaml:"prefix"`
+	Period time.Duration `yaml:"period,omitempty"`
+	Tags   Tags          `yaml:"tags,omitempty"`
+
+	ProvisionedWriteThroughput int64 `yaml:"write_throughput,omitempty"`
+	ProvisionedReadThroughput  int64 `yaml:"read_throughput,omitempty"`
+	InactiveWriteThroughput    int64 `yaml:"inactive_write_throughput,omitempty"`
+	InactiveReadThroughput     int64 `yaml:"inactive_read_throughput,omitempty"`
+
+	WriteScale              AutoScalingConfig `yaml:"write_scale,omitempty"`
+	InactiveWriteScale      AutoScalingConfig `yaml:"inactive_write_scale,omitempty"`
+	InactiveWriteScaleLastN int64             `yaml:"inactive_write_scale_last_n,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
-	f.Var(&cfg.From, argPrefix+".from", "Date after which to write chunks to DynamoDB.")
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
@@ -172,7 +325,6 @@ func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
 	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
 	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
-	f.Var(&cfg.From, argPrefix+".start", fmt.Sprintf("Deprecated: use '%s.from'.", argPrefix))
 
 	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
 	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
@@ -181,13 +333,13 @@ func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 
 // AutoScalingConfig for DynamoDB tables.
 type AutoScalingConfig struct {
-	Enabled     bool
-	RoleARN     string
-	MinCapacity int64
-	MaxCapacity int64
-	OutCooldown int64
-	InCooldown  int64
-	TargetValue float64
+	Enabled     bool    `yaml:"enabled,omitempty"`
+	RoleARN     string  `yaml:"role_arn,omitempty"`
+	MinCapacity int64   `yaml:"min_capacity,omitempty"`
+	MaxCapacity int64   `yaml:"max_capacity,omitempty"`
+	OutCooldown int64   `yaml:"out_cooldown,omitempty"`
+	InCooldown  int64   `yaml:"in_cooldown,omitempty"`
+	TargetValue float64 `yaml:"target,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -201,12 +353,12 @@ func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
-func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duration) []TableDesc {
+func (cfg *PeriodicTableConfig) periodicTables(from model.Time, beginGrace, endGrace time.Duration) []TableDesc {
 	var (
 		periodSecs     = int64(cfg.Period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
 		endGraceSecs   = int64(endGrace / time.Second)
-		firstTable     = cfg.From.Unix() / periodSecs
+		firstTable     = from.Unix() / periodSecs
 		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
 		now            = mtime.Now().Unix()
 		result         = []TableDesc{}
@@ -238,6 +390,16 @@ func (cfg *PeriodicTableConfig) periodicTables(beginGrace, endGrace time.Duratio
 	return result
 }
 
+// ChunkTableFor calculates the chunk table shard for a given point in time.
+func (cfg SchemaConfig) ChunkTableFor(t model.Time) string {
+	for i := range cfg.Configs {
+		if t > cfg.Configs[i].From && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
+			return cfg.Configs[i].ChunkTables.TableFor(t)
+		}
+	}
+	return ""
+}
+
 // TableFor calculates the table shard for a given point in time.
 func (cfg *PeriodicTableConfig) TableFor(t model.Time) string {
 	var (
diff --git a/schema_config_test.go b/schema_config_test.go
index 3ff0455e2d3a9..ef1c9bf230f2d 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -13,8 +13,8 @@ func TestHourlyBuckets(t *testing.T) {
 		metricName = model.LabelValue("name")
 		tableName  = "table"
 	)
-	var cfg = SchemaConfig{
-		OriginalTableName: tableName,
+	var cfg = PeriodConfig{
+		IndexTables: PeriodicTableConfig{Prefix: tableName},
 	}
 
 	type args struct {
@@ -99,8 +99,8 @@ func TestDailyBuckets(t *testing.T) {
 		metricName = model.LabelValue("name")
 		tableName  = "table"
 	)
-	var cfg = SchemaConfig{
-		OriginalTableName: tableName,
+	var cfg = PeriodConfig{
+		IndexTables: PeriodicTableConfig{Prefix: tableName},
 	}
 
 	type args struct {
diff --git a/schema_test.go b/schema_test.go
index 3306de379155a..9760362ca1487 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -7,9 +7,7 @@ import (
 	"reflect"
 	"sort"
 	"testing"
-	"time"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
@@ -34,6 +32,15 @@ func mergeResults(rss ...[]IndexEntry) []IndexEntry {
 	return results
 }
 
+const table = "table"
+
+func makeSchema(schemaName string) Schema {
+	return PeriodConfig{
+		Schema:      schemaName,
+		IndexTables: PeriodicTableConfig{Prefix: table},
+	}.createSchema()
+}
+
 func TestSchemaHashKeys(t *testing.T) {
 	mkResult := func(tableName, fmtStr string, from, through int) []IndexEntry {
 		want := []IndexEntry{}
@@ -48,22 +55,12 @@ func TestSchemaHashKeys(t *testing.T) {
 
 	const (
 		userID         = "userid"
-		table          = "table"
 		periodicPrefix = "periodicPrefix"
 	)
 
-	cfg := SchemaConfig{
-		OriginalTableName: table,
-		UsePeriodicTables: true,
-		IndexTables: PeriodicTableConfig{
-			Prefix: periodicPrefix,
-			Period: 2 * 24 * time.Hour,
-			From:   util.NewDayValue(model.TimeFromUnix(5 * 24 * 60 * 60)),
-		},
-	}
-	hourlyBuckets := v1Schema(cfg)
-	dailyBuckets := v3Schema(cfg)
-	labelBuckets := v4Schema(cfg)
+	hourlyBuckets := makeSchema("v1")
+	dailyBuckets := makeSchema("v3")
+	labelBuckets := makeSchema("v4")
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
 		"bar": "baz",
@@ -178,21 +175,17 @@ func parseRangeValueType(rangeValue []byte) (int, error) {
 func TestSchemaRangeKey(t *testing.T) {
 	const (
 		userID     = "userid"
-		table      = "table"
 		metricName = "foo"
 		chunkID    = "chunkID"
 	)
 
 	var (
-		cfg = SchemaConfig{
-			OriginalTableName: table,
-		}
-		hourlyBuckets = v1Schema(cfg)
-		dailyBuckets  = v2Schema(cfg)
-		base64Keys    = v3Schema(cfg)
-		labelBuckets  = v4Schema(cfg)
-		tsRangeKeys   = v5Schema(cfg)
-		v6RangeKeys   = v6Schema(cfg)
+		hourlyBuckets = makeSchema("v1")
+		dailyBuckets  = makeSchema("v2")
+		base64Keys    = makeSchema("v3")
+		labelBuckets  = makeSchema("v4")
+		tsRangeKeys   = makeSchema("v5")
+		v6RangeKeys   = makeSchema("v6")
 		metric        = model.Metric{
 			model.MetricNameLabel: metricName,
 			"bar": "bary",
diff --git a/storage/factory.go b/storage/factory.go
index bcc83d60a8e86..582fe62a7ac0d 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -13,13 +13,13 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 )
 
 // Config chooses which storage client to use.
 type Config struct {
-	StorageClient          string
 	AWSStorageConfig       aws.StorageConfig
 	GCPStorageConfig       gcp.Config
 	CassandraStorageConfig cassandra.Config
@@ -33,7 +33,6 @@ type Config struct {
 
 // RegisterFlags adds the flags required to configure this flag set.
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, cassandra, inmemory).")
 	cfg.AWSStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
@@ -46,8 +45,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 }
 
-// Opts makes the storage clients based on the configuration.
-func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
+// NewStore makes the storage clients based on the configuration.
+func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits *validation.Overrides) (chunk.Store, error) {
 	var tieredCache cache.Cache
 	var err error
 
@@ -78,23 +77,39 @@ func Opts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error)
 		}
 	}
 
-	opts, err := newStorageOpts(cfg, schemaCfg)
+	err = schemaCfg.Load()
 	if err != nil {
-		return nil, errors.Wrap(err, "error creating storage client")
+		return nil, errors.Wrap(err, "error loading schema config")
 	}
+	stores := chunk.NewCompositeStore()
 
-	for i := range opts {
-		opts[i].Client = newCachingStorageClient(opts[i].Client, tieredCache, cfg.indexQueriesCacheConfig.DefaultValidity)
+	for _, s := range schemaCfg.Configs {
+		storage, err := nameToStorage(s.Store, cfg, schemaCfg)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating storage client")
+		}
+		storage = newCachingStorageClient(storage, tieredCache, cfg.IndexCacheValidity)
+
+		if tieredCache != nil {
+			storage = newCachingStorageClient(storage, tieredCache, cfg.IndexCacheValidity)
+		}
+
+		err = stores.AddPeriod(storeCfg, s, storage, limits)
+		if err != nil {
+			return nil, err
+		}
 	}
 
-	return opts, nil
+	return stores, nil
 }
 
-func newStorageOpts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOpt, error) {
-	switch cfg.StorageClient {
+func nameToStorage(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+	switch name {
 	case "inmemory":
-		return chunk.Opts()
+		return chunk.NewMockStorage(), nil
 	case "aws":
+		return aws.NewS3StorageClient(cfg.AWSStorageConfig, schemaCfg)
+	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -102,32 +117,33 @@ func newStorageOpts(cfg Config, schemaCfg chunk.SchemaConfig) ([]chunk.StorageOp
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.Opts(cfg.AWSStorageConfig, schemaCfg)
+		return aws.NewStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
-		return gcp.Opts(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "gcp-columnkey":
+		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
-		return cassandra.Opts(cfg.CassandraStorageConfig, schemaCfg)
-	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", cfg.StorageClient)
+		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	}
+	return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
 }
 
 // NewTableClient makes a new table client based on the configuration.
-func NewTableClient(cfg Config) (chunk.TableClient, error) {
-	switch cfg.StorageClient {
+func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
+	switch name {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
-	case "aws":
+	case "aws", "aws-dynamo":
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
-	case "gcp":
+	case "gcp", "gcp-columnkey":
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
 	case "cassandra":
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", cfg.StorageClient)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", name)
 	}
 }
diff --git a/table_manager.go b/table_manager.go
index fd3773167a996..f16dadea2f8a0 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"flag"
 	"fmt"
 	"sort"
 	"strings"
@@ -40,6 +41,26 @@ func init() {
 	prometheus.MustRegister(syncTableDuration)
 }
 
+// TableManagerConfig holds config for a TableManager
+type TableManagerConfig struct {
+	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
+	ThroughputUpdatesDisabled bool
+
+	// Period with which the table manager will poll for tables.
+	DynamoDBPollInterval time.Duration
+
+	// duration a table will be created before it is needed.
+	CreationGracePeriod time.Duration
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
+	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
+	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
+	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
+
+}
+
 // Tags is a string-string map that implements flag.Value.
 type Tags map[string]string
 
@@ -85,16 +106,18 @@ func (ts Tags) Equals(other Tags) bool {
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	client      TableClient
-	cfg         SchemaConfig
+	cfg         TableManagerConfig
+	schemaCfg   SchemaConfig
 	maxChunkAge time.Duration
 	done        chan struct{}
 	wait        sync.WaitGroup
 }
 
 // NewTableManager makes a new TableManager
-func NewTableManager(cfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient) (*TableManager, error) {
+func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient) (*TableManager, error) {
 	return &TableManager{
 		cfg:         cfg,
+		schemaCfg:   schemaCfg,
 		maxChunkAge: maxChunkAge,
 		client:      tableClient,
 		done:        make(chan struct{}),
@@ -160,47 +183,46 @@ func (m *TableManager) SyncTables(ctx context.Context) error {
 func (m *TableManager) calculateExpectedTables() []TableDesc {
 	result := []TableDesc{}
 
-	// Add the legacy table
-	legacyTable := TableDesc{
-		Name:             m.cfg.OriginalTableName,
-		ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
-		ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
-		Tags:             m.cfg.IndexTables.Tags,
-	}
-
-	if m.cfg.UsePeriodicTables {
-		// if we are before the switch to periodic table, we need to give this table write throughput
-
-		var (
-			tablePeriodSecs = int64(m.cfg.IndexTables.Period / time.Second)
-			gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
-			maxChunkAgeSecs = int64(m.maxChunkAge / time.Second)
-			firstTable      = m.cfg.IndexTables.From.Unix() / tablePeriodSecs
-			now             = mtime.Now().Unix()
-		)
-
-		if now < (firstTable*tablePeriodSecs)+gracePeriodSecs+maxChunkAgeSecs {
-			legacyTable.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput
-			legacyTable.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput
+	for i, config := range m.schemaCfg.Configs {
+		if config.IndexTables.Period == 0 { // non-periodic table
+			table := TableDesc{
+				Name:             config.IndexTables.Prefix,
+				ProvisionedRead:  config.IndexTables.InactiveReadThroughput,
+				ProvisionedWrite: config.IndexTables.InactiveWriteThroughput,
+				Tags:             config.IndexTables.Tags,
+			}
+			isActive := true
+			if i+1 < len(m.schemaCfg.Configs) {
+				var (
+					endTime         = m.schemaCfg.Configs[i+1].From.Unix()
+					gracePeriodSecs = int64(m.cfg.CreationGracePeriod / time.Second)
+					maxChunkAgeSecs = int64(m.maxChunkAge / time.Second)
+					now             = mtime.Now().Unix()
+				)
+				if now >= endTime+gracePeriodSecs+maxChunkAgeSecs {
+					isActive = false
+				}
+			}
+			if isActive {
+				table.ProvisionedRead = config.IndexTables.ProvisionedReadThroughput
+				table.ProvisionedWrite = config.IndexTables.ProvisionedWriteThroughput
 
-			if m.cfg.IndexTables.WriteScale.Enabled {
-				legacyTable.WriteScale = m.cfg.IndexTables.WriteScale
+				if config.IndexTables.WriteScale.Enabled {
+					table.WriteScale = config.IndexTables.WriteScale
+				}
+			}
+			result = append(result, table)
+		} else {
+			result = append(result, config.IndexTables.periodicTables(
+				config.From, m.cfg.CreationGracePeriod, m.maxChunkAge,
+			)...)
+			if config.ChunkTables.Prefix != "" {
+				result = append(result, config.ChunkTables.periodicTables(
+					config.From, m.cfg.CreationGracePeriod, m.maxChunkAge,
+				)...)
 			}
 		}
 	}
-	result = append(result, legacyTable)
-
-	if m.cfg.UsePeriodicTables {
-		result = append(result, m.cfg.IndexTables.periodicTables(
-			m.cfg.CreationGracePeriod, m.maxChunkAge,
-		)...)
-	}
-
-	if m.cfg.ChunkTables.From.IsSet() {
-		result = append(result, m.cfg.ChunkTables.periodicTables(
-			m.cfg.CreationGracePeriod, m.maxChunkAge,
-		)...)
-	}
 
 	sort.Sort(byName(result))
 	return result
diff --git a/table_manager_test.go b/table_manager_test.go
index e634506c05588..6ff85b985da33 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -6,11 +6,8 @@ import (
 	"testing"
 	"time"
 
-	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
-
-	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -104,33 +101,45 @@ func TestTableManager(t *testing.T) {
 	client := newMockTableClient()
 
 	cfg := SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: PeriodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale:                 activeScalingConfig,
-			InactiveWriteScale:         inactiveScalingConfig,
-			InactiveWriteScaleLastN:    autoScaleLastN,
-		},
-
-		ChunkTables: PeriodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
+		Configs: []PeriodConfig{
+			{
+				IndexTables: PeriodicTableConfig{
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+					WriteScale:                 activeScalingConfig,
+					InactiveWriteScale:         inactiveScalingConfig,
+				},
+			},
+			{
+				IndexTables: PeriodicTableConfig{
+					Prefix: tablePrefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+					WriteScale:                 activeScalingConfig,
+					InactiveWriteScale:         inactiveScalingConfig,
+					InactiveWriteScaleLastN:    autoScaleLastN,
+				},
+
+				ChunkTables: PeriodicTableConfig{
+					Prefix: chunkTablePrefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+				},
+			},
 		},
-
+	}
+	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 	}
-	tableManager, err := NewTableManager(cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -237,32 +246,43 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	client := newMockTableClient()
 
 	cfg := SchemaConfig{
-		UsePeriodicTables: true,
-		IndexTables: PeriodicTableConfig{
-			Prefix: tablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale:         inactiveScalingConfig,
-			InactiveWriteScaleLastN:    autoScaleLastN,
-		},
-
-		ChunkTables: PeriodicTableConfig{
-			Prefix: chunkTablePrefix,
-			Period: tablePeriod,
-			From:   util.NewDayValue(model.TimeFromUnix(0)),
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
+		Configs: []PeriodConfig{
+			{
+				IndexTables: PeriodicTableConfig{
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+					InactiveWriteScale:         inactiveScalingConfig,
+				},
+			},
+			{
+				IndexTables: PeriodicTableConfig{
+					Prefix: tablePrefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+					InactiveWriteScale:         inactiveScalingConfig,
+					InactiveWriteScaleLastN:    autoScaleLastN,
+				},
+
+				ChunkTables: PeriodicTableConfig{
+					Prefix: chunkTablePrefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+				},
+			},
 		},
-
+	}
+	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 	}
-	tableManager, err := NewTableManager(cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -323,7 +343,12 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check at time zero, we have the base table with no tags.
 	{
-		tableManager, err := NewTableManager(SchemaConfig{}, maxChunkAge, client)
+		cfg := SchemaConfig{
+			Configs: []PeriodConfig{{
+				IndexTables: PeriodicTableConfig{},
+			}},
+		}
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -340,9 +365,14 @@ func TestTableManagerTags(t *testing.T) {
 
 	// Check after restarting table manager we get some tags.
 	{
-		cfg := SchemaConfig{}
-		cfg.IndexTables.Tags.Set("foo=bar")
-		tableManager, err := NewTableManager(cfg, maxChunkAge, client)
+		cfg := SchemaConfig{
+			Configs: []PeriodConfig{{
+				IndexTables: PeriodicTableConfig{
+					Tags: Tags{"foo": "bar"},
+				},
+			}},
+		}
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client)
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 7b2da64df7255..8d0f0e102b343 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -9,6 +9,7 @@ import (
 	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -24,12 +25,14 @@ type Fixture interface {
 
 // Setup a fixture with initial tables
 func Setup(fixture Fixture, tableName string) (chunk.StorageClient, error) {
+	var tbmConfig chunk.TableManagerConfig
+	util.DefaultValues(&tbmConfig)
 	storageClient, tableClient, schemaConfig, err := fixture.Clients()
 	if err != nil {
 		return nil, err
 	}
 
-	tableManager, err := chunk.NewTableManager(schemaConfig, 12*time.Hour, tableClient)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient)
 	if err != nil {
 		return nil, err
 	}

From d25081647319038c5058ded8908667aa3a8981dc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 17 Oct 2018 13:14:22 +0000
Subject: [PATCH 197/660] Make table manager work for multi-section schema
 config

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema_config.go |  8 ++++++--
 table_manager.go | 16 ++++++++++++++--
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index f861472693af4..b8983e5101cc2 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -353,16 +353,20 @@ func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
-func (cfg *PeriodicTableConfig) periodicTables(from model.Time, beginGrace, endGrace time.Duration) []TableDesc {
+func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, beginGrace, endGrace time.Duration) []TableDesc {
 	var (
 		periodSecs     = int64(cfg.Period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
 		endGraceSecs   = int64(endGrace / time.Second)
 		firstTable     = from.Unix() / periodSecs
-		lastTable      = (mtime.Now().Unix() + beginGraceSecs) / periodSecs
+		lastTable      = through.Unix() / periodSecs
 		now            = mtime.Now().Unix()
 		result         = []TableDesc{}
 	)
+	// If through ends on 00:00 of the day, don't include the upcoming day
+	if through.Unix()%secondsInDay == 0 {
+		lastTable--
+	}
 	for i := firstTable; i <= lastTable; i++ {
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
diff --git a/table_manager.go b/table_manager.go
index f16dadea2f8a0..311619d7015f5 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/instrument"
@@ -184,6 +185,9 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 	result := []TableDesc{}
 
 	for i, config := range m.schemaCfg.Configs {
+		if config.From.Time().After(mtime.Now()) {
+			continue
+		}
 		if config.IndexTables.Period == 0 { // non-periodic table
 			table := TableDesc{
 				Name:             config.IndexTables.Prefix,
@@ -213,12 +217,20 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 			}
 			result = append(result, table)
 		} else {
+			endTime := mtime.Now().Add(m.cfg.CreationGracePeriod)
+			if i+1 < len(m.schemaCfg.Configs) {
+				nextFrom := m.schemaCfg.Configs[i+1].From.Time()
+				if endTime.After(nextFrom) {
+					endTime = nextFrom
+				}
+			}
+			endModelTime := model.TimeFromUnix(endTime.Unix())
 			result = append(result, config.IndexTables.periodicTables(
-				config.From, m.cfg.CreationGracePeriod, m.maxChunkAge,
+				config.From, endModelTime, m.cfg.CreationGracePeriod, m.maxChunkAge,
 			)...)
 			if config.ChunkTables.Prefix != "" {
 				result = append(result, config.ChunkTables.periodicTables(
-					config.From, m.cfg.CreationGracePeriod, m.maxChunkAge,
+					config.From, endModelTime, m.cfg.CreationGracePeriod, m.maxChunkAge,
 				)...)
 			}
 		}

From ae5dca7d3a248c90dd78debef5a74127bd93ab3c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 15 Oct 2018 18:36:42 +0000
Subject: [PATCH 198/660] Improve table manager tests

Make weekly tables start a couple of weeks after the base table, and
remove most hard-coded constants.  Give the base table a name.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager_test.go | 143 +++++++++++++++++++++++++-----------------
 1 file changed, 84 insertions(+), 59 deletions(-)

diff --git a/table_manager_test.go b/table_manager_test.go
index 6ff85b985da33..894ac0acb1284 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -6,11 +6,14 @@ import (
 	"testing"
 	"time"
 
+	"github.com/prometheus/common/model"
+
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 )
 
 const (
+	baseTableName    = "cortex_base"
 	tablePrefix      = "cortex_"
 	chunkTablePrefix = "chunks_"
 	tablePeriod      = 7 * 24 * time.Hour
@@ -26,6 +29,13 @@ const (
 	autoScaleTarget  = 80
 )
 
+var (
+	baseTableStart   = time.Unix(0, 0)
+	weeklyTableStart = baseTableStart.Add(tablePeriod * 3)
+	week1Suffix      = "3"
+	week2Suffix      = "4"
+)
+
 type mockTableClient struct {
 	sync.Mutex
 	tables map[string]TableDesc
@@ -103,7 +113,9 @@ func TestTableManager(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
+				From: model.TimeFromUnix(baseTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
+					Prefix: baseTableName,
 					ProvisionedWriteThroughput: write,
 					ProvisionedReadThroughput:  read,
 					InactiveWriteThroughput:    inactiveWrite,
@@ -113,6 +125,7 @@ func TestTableManager(t *testing.T) {
 				},
 			},
 			{
+				From: model.TimeFromUnix(weeklyTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -144,36 +157,45 @@ func TestTableManager(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	// Check at time zero, we have the base table and one weekly table
+	// Check at time zero, we have the base table only
 	tmTest(t, client, tableManager,
 		"Initial test",
-		time.Unix(0, 0),
+		baseTableStart,
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+		},
+	)
+
+	// Check at start of weekly tables, we have the base table and one weekly table
+	tmTest(t, client, tableManager,
+		"Initial test weekly",
+		weeklyTableStart,
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Check running twice doesn't change anything
 	tmTest(t, client, tableManager,
 		"Nothing changed",
-		time.Unix(0, 0),
+		weeklyTableStart,
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward grace period, check we still have write throughput on base table
 	tmTest(t, client, tableManager,
 		"Move forward by grace period",
-		time.Unix(0, 0).Add(gracePeriod),
+		weeklyTableStart.Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
@@ -181,63 +203,63 @@ func TestTableManager(t *testing.T) {
 	// (and we don't put inactive auto-scaling on base table)
 	tmTest(t, client, tableManager,
 		"Move forward by max chunk age + grace period",
-		time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
+		weeklyTableStart.Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period - grace period, check we add another weekly table
 	tmTest(t, client, tableManager,
 		"Move forward by table period - grace period",
-		time.Unix(0, 0).Add(tablePeriod).Add(-gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(-gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + grace period, check we still have provisioned throughput
 	tmTest(t, client, tableManager,
 		"Move forward by table period + grace period",
-		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
 	tmTest(t, client, tableManager,
 		"Move forward by table period + max chunk age + grace period",
-		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Check running twice doesn't change anything
 	tmTest(t, client, tableManager,
 		"Nothing changed",
-		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 }
@@ -248,7 +270,9 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
+				From: model.TimeFromUnix(baseTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
+					Prefix: baseTableName,
 					ProvisionedWriteThroughput: write,
 					ProvisionedReadThroughput:  read,
 					InactiveWriteThroughput:    inactiveWrite,
@@ -257,6 +281,7 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 				},
 			},
 			{
+				From: model.TimeFromUnix(weeklyTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -290,37 +315,37 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	// Check at time zero, we have the base table and one weekly table
 	tmTest(t, client, tableManager,
 		"Initial test",
-		time.Unix(0, 0),
+		weeklyTableStart,
 		[]TableDesc{
-			{Name: "", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + grace period, check we still have provisioned throughput
 	tmTest(t, client, tableManager,
 		"Move forward by table period + grace period",
-		time.Unix(0, 0).Add(tablePeriod).Add(gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 
 	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
 	tmTest(t, client, tableManager,
 		"Move forward by table period + max chunk age + grace period",
-		time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
 		[]TableDesc{
-			{Name: "", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
-			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
-			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
-			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
 }
@@ -356,7 +381,7 @@ func TestTableManagerTags(t *testing.T) {
 		test(
 			tableManager,
 			"Initial test",
-			time.Unix(0, 0),
+			baseTableStart,
 			[]TableDesc{
 				{Name: ""},
 			},
@@ -380,7 +405,7 @@ func TestTableManagerTags(t *testing.T) {
 		test(
 			tableManager,
 			"Tagged test",
-			time.Unix(0, 0),
+			baseTableStart,
 			[]TableDesc{
 				{Name: "", Tags: Tags{"foo": "bar"}},
 			},

From 2d5a0c19c0a71e5c01b727b803c5f5d20d1beb7e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 17 Oct 2018 13:10:26 +0000
Subject: [PATCH 199/660] Extend table manager test with multi-section schema
 config

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager_test.go | 80 +++++++++++++++++++++++++++++++++----------
 1 file changed, 61 insertions(+), 19 deletions(-)

diff --git a/table_manager_test.go b/table_manager_test.go
index 894ac0acb1284..d8d2691f78772 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -13,27 +13,30 @@ import (
 )
 
 const (
-	baseTableName    = "cortex_base"
-	tablePrefix      = "cortex_"
-	chunkTablePrefix = "chunks_"
-	tablePeriod      = 7 * 24 * time.Hour
-	gracePeriod      = 15 * time.Minute
-	maxChunkAge      = 12 * time.Hour
-	inactiveWrite    = 1
-	inactiveRead     = 2
-	write            = 200
-	read             = 100
-	autoScaleLastN   = 2
-	autoScaleMin     = 50
-	autoScaleMax     = 500
-	autoScaleTarget  = 80
+	baseTableName     = "cortex_base"
+	tablePrefix       = "cortex_"
+	table2Prefix      = "cortex2_"
+	chunkTablePrefix  = "chunks_"
+	chunkTable2Prefix = "chunks2_"
+	tablePeriod       = 7 * 24 * time.Hour
+	gracePeriod       = 15 * time.Minute
+	maxChunkAge       = 12 * time.Hour
+	inactiveWrite     = 1
+	inactiveRead      = 2
+	write             = 200
+	read              = 100
+	autoScaleLastN    = 2
+	autoScaleMin      = 50
+	autoScaleMax      = 500
+	autoScaleTarget   = 80
 )
 
 var (
-	baseTableStart   = time.Unix(0, 0)
-	weeklyTableStart = baseTableStart.Add(tablePeriod * 3)
-	week1Suffix      = "3"
-	week2Suffix      = "4"
+	baseTableStart    = time.Unix(0, 0)
+	weeklyTableStart  = baseTableStart.Add(tablePeriod * 3)
+	weeklyTable2Start = baseTableStart.Add(tablePeriod * 5)
+	week1Suffix       = "3"
+	week2Suffix       = "4"
 )
 
 type mockTableClient struct {
@@ -147,6 +150,29 @@ func TestTableManager(t *testing.T) {
 					InactiveReadThroughput:     inactiveRead,
 				},
 			},
+			{
+				From: model.TimeFromUnix(weeklyTable2Start.Unix()),
+				IndexTables: PeriodicTableConfig{
+					Prefix: table2Prefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+					WriteScale:                 activeScalingConfig,
+					InactiveWriteScale:         inactiveScalingConfig,
+					InactiveWriteScaleLastN:    autoScaleLastN,
+				},
+
+				ChunkTables: PeriodicTableConfig{
+					Prefix: chunkTable2Prefix,
+					Period: tablePeriod,
+					ProvisionedWriteThroughput: write,
+					ProvisionedReadThroughput:  read,
+					InactiveWriteThroughput:    inactiveWrite,
+					InactiveReadThroughput:     inactiveRead,
+				},
+			},
 		},
 	}
 	tbmConfig := TableManagerConfig{
@@ -214,7 +240,7 @@ func TestTableManager(t *testing.T) {
 	// Fast forward table period - grace period, check we add another weekly table
 	tmTest(t, client, tableManager,
 		"Move forward by table period - grace period",
-		weeklyTableStart.Add(tablePeriod).Add(-gracePeriod),
+		weeklyTableStart.Add(tablePeriod).Add(-gracePeriod+time.Second),
 		[]TableDesc{
 			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
 			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
@@ -262,6 +288,22 @@ func TestTableManager(t *testing.T) {
 			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
+
+	// Move to the next section of the config
+	tmTest(t, client, tableManager,
+		"Move forward to next section of schema config",
+		weeklyTable2Start,
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: table2Prefix + "5", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTable2Prefix + "5", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
 }
 
 func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {

From 1f9962c95b504f5bc1961941345c93ce1c8e6c9b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 30 Oct 2018 17:12:38 +0000
Subject: [PATCH 200/660] Don't add multiple instances of a non-periodic table

This is most likely to arise from legacy config, so just do a simple
test to see if the table is repeated.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/table_manager.go b/table_manager.go
index 311619d7015f5..803a0831103f8 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -189,6 +189,10 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 			continue
 		}
 		if config.IndexTables.Period == 0 { // non-periodic table
+			if len(result) > 0 && result[len(result)-1].Name == config.IndexTables.Prefix {
+				continue // already got a non-periodic table with this name
+			}
+
 			table := TableDesc{
 				Name:             config.IndexTables.Prefix,
 				ProvisionedRead:  config.IndexTables.InactiveReadThroughput,

From 1cc9cb96c175d45a7f1842391aaa5107e59d1ced Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 30 Oct 2018 14:26:45 +0000
Subject: [PATCH 201/660] Don't do metrics autoscaling unless enabled on table

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/metrics_autoscaling.go | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 32acabbe23cb9..e4385a491b5f0 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -86,6 +86,12 @@ func (m *metricsData) DescribeTable(ctx context.Context, desc *chunk.TableDesc)
 }
 
 func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
+	// If we don't take explicit action, return the current provision as the expected provision
+	expected.ProvisionedWrite = current.ProvisionedWrite
+
+	if !expected.WriteScale.Enabled {
+		return nil
+	}
 	if err := m.update(ctx); err != nil {
 		return err
 	}
@@ -95,9 +101,6 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 
 	level.Info(util.Logger).Log("msg", "checking metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "errorRate", errorRate, "usageRate", usageRate)
 
-	// If we don't take explicit action, return the current provision as the expected provision
-	expected.ProvisionedWrite = current.ProvisionedWrite
-
 	switch {
 	case errorRate < errorFractionScaledown*float64(current.ProvisionedWrite) &&
 		m.queueLengths[2] < float64(m.cfg.TargetQueueLen)*targetScaledown:

From 4bf217cd58315c405af93e83f8b50d3671db072b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 30 Oct 2018 18:21:26 +0000
Subject: [PATCH 202/660] Add tags to original table

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema_config.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/schema_config.go b/schema_config.go
index b8983e5101cc2..7e8d498d89aa5 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -98,6 +98,7 @@ func (cfg *SchemaConfig) translate() error {
 			Store:  cfg.legacy.StorageClient,
 			IndexTables: PeriodicTableConfig{
 				Prefix: cfg.legacy.OriginalTableName,
+				Tags:   cfg.legacy.IndexTables.Tags,
 			},
 		})
 	}

From cf442275c2830d3f0d80bd829edf9b4c4b7aa3fd Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 30 Oct 2018 15:47:58 +0000
Subject: [PATCH 203/660] Move capacity info from PeriodicTableConfig to
 TableManagerConfig

Having it configurable on every different section of the config is too
much.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_table_client_test.go | 42 +++++++++---------
 aws/metrics_autoscaling_test.go   | 10 ++---
 schema_config.go                  | 57 +++++-------------------
 table_manager.go                  | 45 +++++++++++++++----
 table_manager_test.go             | 72 +++++++++++++------------------
 5 files changed, 104 insertions(+), 122 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 4f28aaf4badc9..81647a93cc06d 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -40,10 +40,15 @@ func fixtureWriteScale() chunk.AutoScalingConfig {
 	}
 }
 
-func fixturePeriodicTableConfig(prefix string, inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.PeriodicTableConfig {
+func fixturePeriodicTableConfig(prefix string) chunk.PeriodicTableConfig {
 	return chunk.PeriodicTableConfig{
 		Prefix: prefix,
 		Period: tablePeriod,
+	}
+}
+
+func fixtureProvisionConfig(inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
+	return chunk.ProvisionConfig{
 		ProvisionedWriteThroughput: write,
 		ProvisionedReadThroughput:  read,
 		InactiveWriteThroughput:    inactiveWrite,
@@ -124,20 +129,18 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		Configs: []chunk.PeriodConfig{
 			{
 				Store: "aws-dynamo",
-				IndexTables: chunk.PeriodicTableConfig{
-					InactiveReadThroughput:  inactiveRead,
-					InactiveWriteThroughput: inactiveWrite,
-				},
 			},
 			{
 				Store:       "aws-dynamo",
 				From:        model.TimeFromUnix(0),
-				IndexTables: fixturePeriodicTableConfig(tablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
-				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+				IndexTables: fixturePeriodicTableConfig(tablePrefix),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			}},
 	}
 	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
+		IndexTables:         fixtureProvisionConfig(0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
+		ChunkTables:         fixtureProvisionConfig(0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
 	}
 
 	// Check tables are created with autoscale
@@ -158,8 +161,8 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are updated with new settings
 	{
-		cfg.Configs[1].IndexTables.WriteScale.OutCooldown = 200
-		cfg.Configs[1].ChunkTables.WriteScale.TargetValue = 90.0
+		tbm.IndexTables.WriteScale.OutCooldown = 200
+		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
 		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
@@ -177,8 +180,8 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are degristered when autoscaling is disabled for inactive tables
 	{
-		cfg.Configs[1].IndexTables.WriteScale.OutCooldown = 200
-		cfg.Configs[1].ChunkTables.WriteScale.TargetValue = 90.0
+		tbm.IndexTables.WriteScale.OutCooldown = 200
+		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
 		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
@@ -197,8 +200,8 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are degristered when autoscaling is disabled entirely
 	{
-		cfg.Configs[1].IndexTables.WriteScale.Enabled = false
-		cfg.Configs[1].ChunkTables.WriteScale.Enabled = false
+		tbm.IndexTables.WriteScale.Enabled = false
+		tbm.ChunkTables.WriteScale.Enabled = false
 
 		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
 		if err != nil {
@@ -227,21 +230,20 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 	cfg := chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{
 			{
-				Store: "aws-dynamo",
-				IndexTables: chunk.PeriodicTableConfig{
-					InactiveReadThroughput:  inactiveRead,
-					InactiveWriteThroughput: inactiveWrite,
-				},
+				Store:       "aws-dynamo",
+				IndexTables: chunk.PeriodicTableConfig{},
 			},
 			{
 				Store:       "aws-dynamo",
-				IndexTables: fixturePeriodicTableConfig(tablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
-				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+				IndexTables: fixturePeriodicTableConfig(tablePrefix),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			},
 		},
 	}
 	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
+		IndexTables:         fixtureProvisionConfig(2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
+		ChunkTables:         fixtureProvisionConfig(2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
 	}
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 20409f9e4080a..c86dc2a81c89f 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -42,20 +42,20 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			{
 				Store: "aws-dynamo",
 				IndexTables: chunk.PeriodicTableConfig{
-					Prefix:                  "a",
-					InactiveReadThroughput:  inactiveRead,
-					InactiveWriteThroughput: inactiveWrite,
+					Prefix: "a",
 				},
 			},
 			{
 				Store:       "aws-dynamo",
-				IndexTables: fixturePeriodicTableConfig(tablePrefix, 2, indexWriteScale, inactiveWriteScale),
-				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix, 2, chunkWriteScale, inactiveWriteScale),
+				IndexTables: fixturePeriodicTableConfig(tablePrefix),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			},
 		},
 	}
 	tbm := chunk.TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
+		IndexTables:         fixtureProvisionConfig(2, indexWriteScale, inactiveWriteScale),
+		ChunkTables:         fixtureProvisionConfig(2, chunkWriteScale, inactiveWriteScale),
 	}
 
 	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
diff --git a/schema_config.go b/schema_config.go
index b8983e5101cc2..f576e2e0fdfb8 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -124,12 +124,12 @@ func (cfg *SchemaConfig) translate() error {
 	}
 
 	cfg.ForEachAfter(cfg.legacy.IndexTablesFrom.Time, func(config *PeriodConfig) {
-		config.IndexTables = cfg.legacy.IndexTables.clean()
+		config.IndexTables = cfg.legacy.IndexTables
 	})
 	if cfg.legacy.ChunkTablesFrom.IsSet() {
 		cfg.ForEachAfter(cfg.legacy.ChunkTablesFrom.Time, func(config *PeriodConfig) {
 			config.Store = "aws-dynamo"
-			config.ChunkTables = cfg.legacy.ChunkTables.clean()
+			config.ChunkTables = cfg.legacy.ChunkTables
 		})
 	}
 	if cfg.legacy.BigtableColumnKeyFrom.IsSet() {
@@ -140,23 +140,6 @@ func (cfg *SchemaConfig) translate() error {
 	return nil
 }
 
-func (cfg PeriodicTableConfig) clean() PeriodicTableConfig {
-	cfg.WriteScale.clean()
-	cfg.InactiveWriteScale.clean()
-	return cfg
-}
-
-func (cfg *AutoScalingConfig) clean() {
-	if !cfg.Enabled {
-		// Blank the default values from flag since they aren't used
-		cfg.MinCapacity = 0
-		cfg.MaxCapacity = 0
-		cfg.OutCooldown = 0
-		cfg.InCooldown = 0
-		cfg.TargetValue = 0
-	}
-}
-
 // ForEachAfter will call f() on every entry after t, splitting
 // entries if necessary so there is an entry starting at t
 func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
@@ -304,15 +287,6 @@ type PeriodicTableConfig struct {
 	Prefix string        `yaml:"prefix"`
 	Period time.Duration `yaml:"period,omitempty"`
 	Tags   Tags          `yaml:"tags,omitempty"`
-
-	ProvisionedWriteThroughput int64 `yaml:"write_throughput,omitempty"`
-	ProvisionedReadThroughput  int64 `yaml:"read_throughput,omitempty"`
-	InactiveWriteThroughput    int64 `yaml:"inactive_write_throughput,omitempty"`
-	InactiveReadThroughput     int64 `yaml:"inactive_read_throughput,omitempty"`
-
-	WriteScale              AutoScalingConfig `yaml:"write_scale,omitempty"`
-	InactiveWriteScale      AutoScalingConfig `yaml:"inactive_write_scale,omitempty"`
-	InactiveWriteScaleLastN int64             `yaml:"inactive_write_scale_last_n,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -320,15 +294,6 @@ func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
-
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
-	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
-	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
-
-	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
-	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
-	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
 }
 
 // AutoScalingConfig for DynamoDB tables.
@@ -353,7 +318,7 @@ func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
-func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, beginGrace, endGrace time.Duration) []TableDesc {
+func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg ProvisionConfig, beginGrace, endGrace time.Duration) []TableDesc {
 	var (
 		periodSecs     = int64(cfg.Period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
@@ -371,22 +336,22 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, beginGr
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
 			Name:             cfg.Prefix + strconv.Itoa(int(i)),
-			ProvisionedRead:  cfg.InactiveReadThroughput,
-			ProvisionedWrite: cfg.InactiveWriteThroughput,
+			ProvisionedRead:  pCfg.InactiveReadThroughput,
+			ProvisionedWrite: pCfg.InactiveWriteThroughput,
 			Tags:             cfg.Tags,
 		}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
-			table.ProvisionedRead = cfg.ProvisionedReadThroughput
-			table.ProvisionedWrite = cfg.ProvisionedWriteThroughput
+			table.ProvisionedRead = pCfg.ProvisionedReadThroughput
+			table.ProvisionedWrite = pCfg.ProvisionedWriteThroughput
 
-			if cfg.WriteScale.Enabled {
-				table.WriteScale = cfg.WriteScale
+			if pCfg.WriteScale.Enabled {
+				table.WriteScale = pCfg.WriteScale
 			}
-		} else if cfg.InactiveWriteScale.Enabled && i >= (lastTable-cfg.InactiveWriteScaleLastN) {
+		} else if pCfg.InactiveWriteScale.Enabled && i >= (lastTable-pCfg.InactiveWriteScaleLastN) {
 			// Autoscale last N tables
-			table.WriteScale = cfg.InactiveWriteScale
+			table.WriteScale = pCfg.InactiveWriteScale
 		}
 
 		result = append(result, table)
diff --git a/table_manager.go b/table_manager.go
index 311619d7015f5..bd4eec8d373c1 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -52,6 +52,21 @@ type TableManagerConfig struct {
 
 	// duration a table will be created before it is needed.
 	CreationGracePeriod time.Duration
+
+	IndexTables ProvisionConfig
+	ChunkTables ProvisionConfig
+}
+
+// ProvisionConfig holds config for provisioning capacity (on DynamoDB)
+type ProvisionConfig struct {
+	ProvisionedWriteThroughput int64
+	ProvisionedReadThroughput  int64
+	InactiveWriteThroughput    int64
+	InactiveReadThroughput     int64
+
+	WriteScale              AutoScalingConfig
+	InactiveWriteScale      AutoScalingConfig
+	InactiveWriteScaleLastN int64
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -60,6 +75,20 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
+	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", f)
+	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", f)
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
+	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
+	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
+
+	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
+	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
+	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
 }
 
 // Tags is a string-string map that implements flag.Value.
@@ -191,8 +220,8 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		if config.IndexTables.Period == 0 { // non-periodic table
 			table := TableDesc{
 				Name:             config.IndexTables.Prefix,
-				ProvisionedRead:  config.IndexTables.InactiveReadThroughput,
-				ProvisionedWrite: config.IndexTables.InactiveWriteThroughput,
+				ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
+				ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
 				Tags:             config.IndexTables.Tags,
 			}
 			isActive := true
@@ -208,11 +237,11 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 				}
 			}
 			if isActive {
-				table.ProvisionedRead = config.IndexTables.ProvisionedReadThroughput
-				table.ProvisionedWrite = config.IndexTables.ProvisionedWriteThroughput
+				table.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput
+				table.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput
 
-				if config.IndexTables.WriteScale.Enabled {
-					table.WriteScale = config.IndexTables.WriteScale
+				if m.cfg.IndexTables.WriteScale.Enabled {
+					table.WriteScale = m.cfg.IndexTables.WriteScale
 				}
 			}
 			result = append(result, table)
@@ -226,11 +255,11 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 			}
 			endModelTime := model.TimeFromUnix(endTime.Unix())
 			result = append(result, config.IndexTables.periodicTables(
-				config.From, endModelTime, m.cfg.CreationGracePeriod, m.maxChunkAge,
+				config.From, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge,
 			)...)
 			if config.ChunkTables.Prefix != "" {
 				result = append(result, config.ChunkTables.periodicTables(
-					config.From, endModelTime, m.cfg.CreationGracePeriod, m.maxChunkAge,
+					config.From, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge,
 				)...)
 			}
 		}
diff --git a/table_manager_test.go b/table_manager_test.go
index d8d2691f78772..7e70b6f482743 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -119,12 +119,6 @@ func TestTableManager(t *testing.T) {
 				From: model.TimeFromUnix(baseTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
 					Prefix: baseTableName,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
-					WriteScale:                 activeScalingConfig,
-					InactiveWriteScale:         inactiveScalingConfig,
 				},
 			},
 			{
@@ -132,22 +126,11 @@ func TestTableManager(t *testing.T) {
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
-					WriteScale:                 activeScalingConfig,
-					InactiveWriteScale:         inactiveScalingConfig,
-					InactiveWriteScaleLastN:    autoScaleLastN,
 				},
 
 				ChunkTables: PeriodicTableConfig{
 					Prefix: chunkTablePrefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
 				},
 			},
 			{
@@ -155,28 +138,32 @@ func TestTableManager(t *testing.T) {
 				IndexTables: PeriodicTableConfig{
 					Prefix: table2Prefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
-					WriteScale:                 activeScalingConfig,
-					InactiveWriteScale:         inactiveScalingConfig,
-					InactiveWriteScaleLastN:    autoScaleLastN,
 				},
 
 				ChunkTables: PeriodicTableConfig{
 					Prefix: chunkTable2Prefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
 				},
 			},
 		},
 	}
 	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
+		IndexTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			WriteScale:                 activeScalingConfig,
+			InactiveWriteScale:         inactiveScalingConfig,
+			InactiveWriteScaleLastN:    autoScaleLastN,
+		},
+		ChunkTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+		},
 	}
 	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
 	if err != nil {
@@ -315,11 +302,6 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 				From: model.TimeFromUnix(baseTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
 					Prefix: baseTableName,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
-					InactiveWriteScale:         inactiveScalingConfig,
 				},
 			},
 			{
@@ -327,27 +309,31 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
-					InactiveWriteScale:         inactiveScalingConfig,
-					InactiveWriteScaleLastN:    autoScaleLastN,
 				},
 
 				ChunkTables: PeriodicTableConfig{
 					Prefix: chunkTablePrefix,
 					Period: tablePeriod,
-					ProvisionedWriteThroughput: write,
-					ProvisionedReadThroughput:  read,
-					InactiveWriteThroughput:    inactiveWrite,
-					InactiveReadThroughput:     inactiveRead,
 				},
 			},
 		},
 	}
 	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
+		IndexTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale:         inactiveScalingConfig,
+			InactiveWriteScaleLastN:    autoScaleLastN,
+		},
+		ChunkTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+		},
 	}
 	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
 	if err != nil {

From 9aac97395eae56b9940b51b74332582ea876b2cc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 1 Nov 2018 22:23:03 +0000
Subject: [PATCH 204/660] Yaml schema config dates in human-readable form

yyyy-mm-dd instead of Unix epoch milliseconds

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 composite_store.go |  2 +-
 schema_config.go   | 20 ++++++++++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index 185c5691c37b7..a706803ab93a9 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -53,7 +53,7 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, stora
 	if err != nil {
 		return err
 	}
-	c.stores = append(c.stores, compositeStoreEntry{start: cfg.From, Store: store})
+	c.stores = append(c.stores, compositeStoreEntry{start: model.TimeFromUnixNano(cfg.From.UnixNano()), Store: store})
 	return nil
 }
 
diff --git a/schema_config.go b/schema_config.go
index b8983e5101cc2..5d113923489da 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -23,7 +23,8 @@ const (
 
 // PeriodConfig defines the schema and tables to use for a period of time
 type PeriodConfig struct {
-	From        model.Time          `yaml:"from"`
+	From        model.Time          `yaml:"-"`              // used when working with config
+	FromStr     string              `yaml:"from,omitempty"` // used when loading from yaml
 	Store       string              `yaml:"store"`
 	Schema      string              `yaml:"schema"`
 	IndexTables PeriodicTableConfig `yaml:"index"`
@@ -93,9 +94,10 @@ func (cfg *SchemaConfig) translate() error {
 
 	add := func(t string, f model.Time) {
 		cfg.Configs = append(cfg.Configs, PeriodConfig{
-			From:   f,
-			Schema: t,
-			Store:  cfg.legacy.StorageClient,
+			From:    f,
+			FromStr: f.Time().Format("2006-01-02"),
+			Schema:  t,
+			Store:   cfg.legacy.StorageClient,
 			IndexTables: PeriodicTableConfig{
 				Prefix: cfg.legacy.OriginalTableName,
 			},
@@ -221,12 +223,22 @@ func (cfg *SchemaConfig) Load() error {
 	if err := decoder.Decode(&cfg); err != nil {
 		return err
 	}
+	for i := range cfg.Configs {
+		t, err := time.Parse("2006-01-02", cfg.Configs[i].FromStr)
+		if err != nil {
+			return err
+		}
+		cfg.Configs[i].From = model.TimeFromUnix(t.Unix())
+	}
 
 	return nil
 }
 
 // PrintYaml dumps the yaml to stdout, to aid in migration
 func (cfg SchemaConfig) PrintYaml() {
+	for i := range cfg.Configs {
+		cfg.Configs[i].FromStr = cfg.Configs[i].From.Time().Format("2006-01-02")
+	}
 	encoder := yaml.NewEncoder(os.Stdout)
 	encoder.Encode(cfg)
 }

From 14b8f6d6f744c975e6355385580b8174556cb718 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 7 Nov 2018 13:10:59 +0000
Subject: [PATCH 205/660] Allow the memcache connection pool to be controlled

and set the size higher than the default 2

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 cache/memcached_client.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 8679330c01c9e..d43fa93713f58 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -36,6 +36,7 @@ type MemcachedClientConfig struct {
 	Host           string
 	Service        string
 	Timeout        time.Duration
+	MaxIdleConns   int
 	UpdateInterval time.Duration
 }
 
@@ -43,6 +44,7 @@ type MemcachedClientConfig struct {
 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
 	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
+	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 }
@@ -53,6 +55,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig) MemcachedClient {
 	var servers memcache.ServerList
 	client := memcache.NewFromSelector(&servers)
 	client.Timeout = cfg.Timeout
+	client.MaxIdleConns = cfg.MaxIdleConns
 
 	newClient := &memcachedClient{
 		Client:     client,

From 2bcb57da8505977e973e7748531ec7c6b12f41ea Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 1 Oct 2018 21:18:11 +0100
Subject: [PATCH 206/660] Remove unused code from the chunk package.

Also, remove assumptions about marshalled chunk length.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk.go      | 10 +++++++---
 chunk_test.go | 18 +++++++++++++++---
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/chunk.go b/chunk.go
index 0b5cb30e7c4d3..eb926358612c9 100644
--- a/chunk.go
+++ b/chunk.go
@@ -213,12 +213,12 @@ func (c *Chunk) Encode() ([]byte, error) {
 
 	// Write the metadata length back at the start of the buffer.
 	// (note this length includes the 4 bytes for the length itself)
-	binary.BigEndian.PutUint32(metadataLenBytes[:], uint32(buf.Len()))
+	metadataLen := buf.Len()
+	binary.BigEndian.PutUint32(metadataLenBytes[:], uint32(metadataLen))
 	copy(buf.Bytes(), metadataLenBytes[:])
 
-	// Write the data length
+	// Write another 4 empty bytes - we will come back and put the len in here.
 	dataLenBytes := [4]byte{}
-	binary.BigEndian.PutUint32(dataLenBytes[:], uint32(prom_chunk.ChunkLen))
 	if _, err := buf.Write(dataLenBytes[:]); err != nil {
 		return nil, err
 	}
@@ -228,6 +228,10 @@ func (c *Chunk) Encode() ([]byte, error) {
 		return nil, err
 	}
 
+	// Now write the data len back into the buf.
+	binary.BigEndian.PutUint32(dataLenBytes[:], uint32(buf.Len()-metadataLen-4))
+	copy(buf.Bytes()[metadataLen:], dataLenBytes[:])
+
 	// Now work out the checksum
 	c.encoded = buf.Bytes()
 	c.ChecksumSet = true
diff --git a/chunk_test.go b/chunk_test.go
index 66d252ec6172d..7882eb0fe17e8 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -24,13 +24,21 @@ func dummyChunk(now model.Time) Chunk {
 	})
 }
 
-func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
-	cs, _ := chunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+func dummyChunkForEncoding(now model.Time, metric model.Metric, encoding chunk.Encoding, samples int) Chunk {
+	c, _ := chunk.NewForEncoding(encoding)
+	for i := 0; i < samples; i++ {
+		t := time.Duration(i) * 15 * time.Second
+		cs, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
+		if err != nil {
+			panic(err)
+		}
+		c = cs[0]
+	}
 	chunk := NewChunk(
 		userID,
 		metric.Fingerprint(),
 		metric,
-		cs[0],
+		c,
 		now.Add(-time.Hour),
 		now,
 	)
@@ -42,6 +50,10 @@ func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
 	return chunk
 }
 
+func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
+	return dummyChunkForEncoding(now, metric, chunk.Varbit, 1)
+}
+
 func TestChunkCodec(t *testing.T) {
 	dummy := dummyChunk(model.Now())
 	decodeContext := NewDecodeContext()

From 80846d5a4288b327f04ccb22653bcd811857f1ae Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 2 Oct 2018 11:05:34 +0100
Subject: [PATCH 207/660] Add bigchunk implementation.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/chunk.go b/chunk.go
index eb926358612c9..1aa80308dd6d8 100644
--- a/chunk.go
+++ b/chunk.go
@@ -28,6 +28,7 @@ const (
 	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
 	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
 	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
+	ErrDataLength      = errs.Error("chunk data wrong length")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -318,6 +319,10 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 
 	c.encoded = input
 	remainingData := input[len(input)-r.Len():]
+	if int(dataLen) > len(remainingData) {
+		return ErrDataLength
+	}
+
 	return c.Data.UnmarshalFromBuf(remainingData[:int(dataLen)])
 }
 

From edd87b50458f0df930c4b7de91155de5519a7a7a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 9 Nov 2018 11:57:54 +0000
Subject: [PATCH 208/660] Move pkg/prom1/storage/local/chunk to
 pkg/chunk/encoding

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go         |    4 +-
 chunk.go                    |    2 +-
 chunk_store_test.go         |   38 +-
 chunk_test.go               |   20 +-
 encoding/bigchunk.go        |  321 ++++++++++
 encoding/bigchunk_test.go   |   89 +++
 encoding/chunk.go           |  308 +++++++++
 encoding/chunk_test.go      |  176 ++++++
 encoding/delta.go           |  364 +++++++++++
 encoding/delta_helpers.go   |   87 +++
 encoding/delta_test.go      |  119 ++++
 encoding/doubledelta.go     |  529 ++++++++++++++++
 encoding/instrumentation.go |   91 +++
 encoding/varbit.go          | 1164 +++++++++++++++++++++++++++++++++++
 encoding/varbit_helpers.go  |   78 +++
 encoding/varbit_test.go     |   55 ++
 testutils/testutils.go      |    6 +-
 17 files changed, 3416 insertions(+), 35 deletions(-)
 create mode 100644 encoding/bigchunk.go
 create mode 100644 encoding/bigchunk_test.go
 create mode 100644 encoding/chunk.go
 create mode 100644 encoding/chunk_test.go
 create mode 100644 encoding/delta.go
 create mode 100644 encoding/delta_helpers.go
 create mode 100644 encoding/delta_test.go
 create mode 100644 encoding/doubledelta.go
 create mode 100644 encoding/instrumentation.go
 create mode 100644 encoding/varbit.go
 create mode 100644 encoding/varbit_helpers.go
 create mode 100644 encoding/varbit_test.go

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 706d340a6804d..d4753dbe902ee 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -12,7 +12,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	prom_chunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 )
@@ -37,7 +37,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			promChunk[0],
 			ts,
diff --git a/chunk.go b/chunk.go
index 1aa80308dd6d8..e2e604a38ed21 100644
--- a/chunk.go
+++ b/chunk.go
@@ -10,7 +10,7 @@ import (
 	"strings"
 	"sync"
 
-	prom_chunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 	"github.com/golang/snappy"
 	jsoniter "github.com/json-iterator/go"
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 310ea8fbe16fa..a2de276d6fc98 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -16,7 +16,7 @@ import (
 	"golang.org/x/net/context"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/validation"
@@ -135,25 +135,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -315,14 +315,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	})
 
 	for _, tc := range []struct {
@@ -415,7 +415,7 @@ func TestChunkStoreRandom(t *testing.T) {
 			const chunkLen = 2 * 3600 // in seconds
 			for i := 0; i < 100; i++ {
 				ts := model.TimeFromUnix(int64(i * chunkLen))
-				chunks, _ := chunk.New().Add(model.SamplePair{
+				chunks, _ := encoding.New().Add(model.SamplePair{
 					Timestamp: ts,
 					Value:     model.SampleValue(float64(i)),
 				})
@@ -424,7 +424,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar": "baz",
+						"bar":                 "baz",
 					},
 					chunks[0],
 					ts,
@@ -479,7 +479,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	const chunkLen = 60 // in seconds
 	for i := 0; i < 24; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		chunks, _ := chunk.New().Add(model.SamplePair{
+		chunks, _ := encoding.New().Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(float64(i)),
 		})
@@ -488,7 +488,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			chunks[0],
 			ts,
@@ -534,7 +534,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/chunk_test.go b/chunk_test.go
index 7882eb0fe17e8..4db4aec1b1942 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -7,7 +7,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
@@ -19,13 +19,13 @@ const userID = "userID"
 func dummyChunk(now model.Time) Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 
-func dummyChunkForEncoding(now model.Time, metric model.Metric, encoding chunk.Encoding, samples int) Chunk {
-	c, _ := chunk.NewForEncoding(encoding)
+func dummyChunkForEncoding(now model.Time, metric model.Metric, enc encoding.Encoding, samples int) Chunk {
+	c, _ := encoding.NewForEncoding(enc)
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
 		cs, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
@@ -51,7 +51,7 @@ func dummyChunkForEncoding(now model.Time, metric model.Metric, encoding chunk.E
 }
 
 func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
-	return dummyChunkForEncoding(now, metric, chunk.Varbit, 1)
+	return dummyChunkForEncoding(now, metric, encoding.Varbit, 1)
 }
 
 func TestChunkCodec(t *testing.T) {
@@ -150,8 +150,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, metric)
@@ -169,8 +169,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create another chunk with a different metric
 	otherMetric := model.Metric{
 		model.MetricNameLabel: "foo2",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 	chunk3 := dummyChunkFor(now, otherMetric)
 	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
new file mode 100644
index 0000000000000..3ae3b9ddc9e76
--- /dev/null
+++ b/encoding/bigchunk.go
@@ -0,0 +1,321 @@
+package encoding
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"io"
+	"io/ioutil"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/tsdb/chunkenc"
+)
+
+const samplesPerChunk = 60
+
+var errOutOfBounds = errors.New("out of bounds")
+
+// bigchunk is a set of prometheus/tsdb chunks.  It grows over time and has no
+// upperbound on number of samples it can contain.
+type bigchunk struct {
+	chunks []chunkenc.Chunk
+	starts []int64
+	ends   []int64
+
+	appender         chunkenc.Appender
+	remainingSamples int
+}
+
+func newBigchunk() *bigchunk {
+	return &bigchunk{}
+}
+
+func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
+	if b.remainingSamples == 0 {
+		if err := b.addNextChunk(sample.Timestamp); err != nil {
+			return nil, err
+		}
+	}
+
+	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
+	b.remainingSamples--
+	b.ends[len(b.ends)-1] = int64(sample.Timestamp)
+	return []Chunk{b}, nil
+}
+
+// addNextChunk adds a new XOR "subchunk" to the internal list of chunks.
+func (b *bigchunk) addNextChunk(start model.Time) error {
+	// To save memory, we "compact" the last chunk.
+	if l := len(b.chunks); l > 0 {
+		c := b.chunks[l-1]
+		buf := make([]byte, len(c.Bytes()))
+		copy(buf, c.Bytes())
+		compacted, err := chunkenc.FromData(chunkenc.EncXOR, buf)
+		if err != nil {
+			return err
+		}
+		b.chunks[l-1] = compacted
+	}
+
+	chunk := chunkenc.NewXORChunk()
+	appender, err := chunk.Appender()
+	if err != nil {
+		return err
+	}
+
+	b.starts = append(b.starts, int64(start))
+	b.ends = append(b.ends, int64(start))
+	b.chunks = append(b.chunks, chunk)
+
+	b.appender = appender
+	b.remainingSamples = samplesPerChunk
+	return nil
+}
+
+func (b *bigchunk) Marshal(wio io.Writer) error {
+	w := writer{wio}
+	if err := w.WriteVarInt16(uint16(len(b.chunks))); err != nil {
+		return err
+	}
+	for _, chunk := range b.chunks {
+		buf := chunk.Bytes()
+		if err := w.WriteVarInt16(uint16(len(buf))); err != nil {
+			return err
+		}
+		if _, err := w.Write(buf); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *bigchunk) MarshalToBuf(buf []byte) error {
+	writer := bytes.NewBuffer(buf)
+	return b.Marshal(writer)
+}
+
+func (b *bigchunk) Unmarshal(r io.Reader) error {
+	buf, err := ioutil.ReadAll(r)
+	if err != nil {
+		return err
+	}
+	return b.UnmarshalFromBuf(buf)
+}
+
+func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
+	r := reader{buf: buf}
+	numChunks, err := r.ReadUint16()
+	if err != nil {
+		return err
+	}
+
+	b.chunks = make([]chunkenc.Chunk, 0, numChunks)
+	for i := uint16(0); i < numChunks; i++ {
+		chunkLen, err := r.ReadUint16()
+		if err != nil {
+			return err
+		}
+
+		chunkBuf, err := r.ReadBytes(int(chunkLen))
+		if err != nil {
+			return err
+		}
+
+		chunk, err := chunkenc.FromData(chunkenc.EncXOR, chunkBuf)
+		if err != nil {
+			return err
+		}
+
+		start, end, err := firstAndLastTimes(chunk)
+		if err != nil {
+			return err
+		}
+
+		b.chunks = append(b.chunks, chunk)
+		b.starts = append(b.starts, start)
+		b.ends = append(b.ends, end)
+	}
+	return nil
+}
+
+func (b *bigchunk) Encoding() Encoding {
+	return Bigchunk
+}
+
+func (b *bigchunk) Utilization() float64 {
+	return 1.0
+}
+
+func (b *bigchunk) Len() int {
+	sum := 0
+	for _, c := range b.chunks {
+		sum += c.NumSamples()
+	}
+	return sum
+}
+
+func (b *bigchunk) Size() int {
+	sum := 0
+	for _, c := range b.chunks {
+		sum += len(c.Bytes())
+	}
+	return sum
+}
+
+func (b *bigchunk) NewIterator() Iterator {
+	return &bigchunkIterator{
+		bigchunk: b,
+	}
+}
+
+func (b *bigchunk) Slice(start, end model.Time) Chunk {
+	i, j := 0, len(b.chunks)
+	for k := 0; k < len(b.chunks); k++ {
+		if b.ends[k] < int64(start) {
+			i = k + 1
+		}
+		if b.starts[k] > int64(end) {
+			j = k
+			break
+		}
+	}
+	return &bigchunk{
+		chunks: b.chunks[i:j],
+		starts: b.starts[i:j],
+		ends:   b.ends[i:j],
+	}
+}
+
+type writer struct {
+	io.Writer
+}
+
+func (w writer) WriteVarInt16(i uint16) error {
+	var b [2]byte
+	binary.LittleEndian.PutUint16(b[:], i)
+	_, err := w.Write(b[:])
+	return err
+}
+
+type reader struct {
+	i   int
+	buf []byte
+}
+
+func (r *reader) ReadUint16() (uint16, error) {
+	if r.i+2 > len(r.buf) {
+		return 0, errOutOfBounds
+	}
+	result := binary.LittleEndian.Uint16(r.buf[r.i:])
+	r.i += 2
+	return result, nil
+}
+
+func (r *reader) ReadBytes(count int) ([]byte, error) {
+	if r.i+count > len(r.buf) {
+		return nil, errOutOfBounds
+	}
+	result := r.buf[r.i : r.i+count]
+	r.i += count
+	return result, nil
+}
+
+type bigchunkIterator struct {
+	*bigchunk
+
+	iter chunkenc.Iterator
+	i    int
+}
+
+func (i *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
+	// On average we'll have about 12*3600/15/120 = 24 chunks, so just linear
+	// scan for now.
+	i.i = 0
+	for i.i < len(i.chunks) {
+		if int64(target) <= i.ends[i.i] {
+			break
+		}
+		i.i++
+	}
+
+	if i.i >= len(i.chunks) {
+		return false
+	}
+
+	i.iter = i.chunks[i.i].Iterator()
+	i.i++
+
+	for i.iter.Next() {
+		t, _ := i.iter.At()
+		if t >= int64(target) {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (i *bigchunkIterator) Scan() bool {
+	if i.iter != nil && i.iter.Next() {
+		return true
+	}
+
+	for i.i < len(i.chunks) {
+		i.iter = i.chunks[i.i].Iterator()
+		i.i++
+		if i.iter.Next() {
+			return true
+		}
+	}
+	return false
+}
+
+func (i *bigchunkIterator) Value() model.SamplePair {
+	t, v := i.iter.At()
+	return model.SamplePair{
+		Timestamp: model.Time(t),
+		Value:     model.SampleValue(v),
+	}
+}
+
+func (i *bigchunkIterator) Batch(size int) Batch {
+	var result Batch
+	j := 0
+	for j < size {
+		t, v := i.iter.At()
+		result.Timestamps[j] = t
+		result.Values[j] = v
+		j++
+
+		if j < size && !i.Scan() {
+			break
+		}
+	}
+	result.Length = j
+	return result
+}
+
+func (i *bigchunkIterator) Err() error {
+	if i.iter != nil {
+		return i.iter.Err()
+	}
+	return nil
+}
+
+func firstAndLastTimes(c chunkenc.Chunk) (int64, int64, error) {
+	var (
+		first    int64
+		last     int64
+		firstSet bool
+		iter     = c.Iterator()
+	)
+	for iter.Next() {
+		t, _ := iter.At()
+		if !firstSet {
+			first = t
+			firstSet = true
+		}
+		last = t
+	}
+	return first, last, iter.Err()
+}
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
new file mode 100644
index 0000000000000..6c3bd432eaa46
--- /dev/null
+++ b/encoding/bigchunk_test.go
@@ -0,0 +1,89 @@
+package encoding
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSliceBiggerChunk(t *testing.T) {
+	var c Chunk = newBigchunk()
+	for i := 0; i < 12*3600/15; i++ {
+		cs, err := c.Add(model.SamplePair{
+			Timestamp: model.Time(i * step),
+			Value:     model.SampleValue(i),
+		})
+		require.NoError(t, err)
+		c = cs[0]
+	}
+
+	// Test for when the slice aligns perfectly with the sub-chunk boundaries.
+
+	for i := 0; i < (12*3600/15)-480; i += 120 {
+		s := c.Slice(model.Time(i*step), model.Time((i+479)*step))
+		iter := s.NewIterator()
+		for j := i; j < i+480; j++ {
+			require.True(t, iter.Scan())
+			sample := iter.Value()
+			require.Equal(t, sample.Timestamp, model.Time(j*step))
+			require.Equal(t, sample.Value, model.SampleValue(j))
+		}
+		require.False(t, iter.Scan())
+		require.NoError(t, iter.Err())
+	}
+
+	// Test for when the slice does not align perfectly with the sub-chunk boundaries.
+	for i := 0; i < (12*3600/15)-500; i += 100 {
+		s := c.Slice(model.Time(i*step), model.Time((i+500)*step))
+		iter := s.NewIterator()
+
+		// Consume some samples until we get to where we want to be.
+		for {
+			require.True(t, iter.Scan())
+			sample := iter.Value()
+			if sample.Timestamp == model.Time(i*step) {
+				break
+			}
+		}
+
+		for j := i; j < i+500; j++ {
+			sample := iter.Value()
+			require.Equal(t, sample.Timestamp, model.Time(j*step))
+			require.Equal(t, sample.Value, model.SampleValue(j))
+			require.True(t, iter.Scan())
+		}
+	}
+}
+
+func BenchmarkBiggerChunkMemory(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		var c Chunk = newBigchunk()
+		for i := 0; i < 12*3600/15; i++ {
+			cs, err := c.Add(model.SamplePair{
+				Timestamp: model.Time(i * step),
+				Value:     model.SampleValue(i),
+			})
+			require.NoError(b, err)
+			c = cs[0]
+		}
+
+		c.(*bigchunk).printSize()
+	}
+}
+
+// printSize calculates various sizes of the chunk when encoded, and in memory.
+func (b *bigchunk) printSize() {
+	var buf bytes.Buffer
+	b.Marshal(&buf)
+
+	var size, allocd int
+	for _, c := range b.chunks {
+		size += len(c.Bytes())
+		allocd += cap(c.Bytes())
+	}
+
+	fmt.Println("encodedlen =", len(buf.Bytes()), "subchunks =", len(b.chunks), "len =", size, "cap =", allocd)
+}
diff --git a/encoding/chunk.go b/encoding/chunk.go
new file mode 100644
index 0000000000000..3ed281af2f3b8
--- /dev/null
+++ b/encoding/chunk.go
@@ -0,0 +1,308 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"sort"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
+)
+
+// ChunkLen is the length of a chunk in bytes.
+const ChunkLen = 1024
+
+// DefaultEncoding can be changed via a flag.
+var DefaultEncoding = DoubleDelta
+
+var (
+	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
+	errAddedToEvictedChunk = errors.New("attempted to add sample to evicted chunk")
+)
+
+// Encoding defines which encoding we are using, delta, doubledelta, or varbit
+type Encoding byte
+
+// String implements flag.Value.
+func (e Encoding) String() string {
+	return fmt.Sprintf("%d", e)
+}
+
+// Set implements flag.Value.
+func (e *Encoding) Set(s string) error {
+	switch s {
+	case "0":
+		*e = Delta
+	case "1":
+		*e = DoubleDelta
+	case "2":
+		*e = Varbit
+	case "3":
+		*e = Bigchunk
+	default:
+		return fmt.Errorf("invalid chunk encoding: %s", s)
+	}
+	return nil
+}
+
+const (
+	// Delta encoding
+	Delta Encoding = iota
+	// DoubleDelta encoding
+	DoubleDelta
+	// Varbit encoding
+	Varbit
+	// Bigchunk encoding
+	Bigchunk
+)
+
+// Chunk is the interface for all chunks. Chunks are generally not
+// goroutine-safe.
+type Chunk interface {
+	// Add adds a SamplePair to the chunks, performs any necessary
+	// re-encoding, and adds any necessary overflow chunks. It returns the
+	// new version of the original chunk, followed by overflow chunks, if
+	// any. The first chunk returned might be the same as the original one
+	// or a newly allocated version. In any case, take the returned chunk as
+	// the relevant one and discard the original chunk.
+	Add(sample model.SamplePair) ([]Chunk, error)
+	NewIterator() Iterator
+	Marshal(io.Writer) error
+	Unmarshal(io.Reader) error
+	UnmarshalFromBuf([]byte) error
+	Encoding() Encoding
+	Utilization() float64
+
+	// Slice returns a smaller chunk the includes all samples between start and end
+	// (inclusive).  Its may over estimate. On some encodings it is a noop.
+	Slice(start, end model.Time) Chunk
+
+	// Len returns the number of samples in the chunk.  Implementations may be
+	// expensive.
+	Len() int
+
+	// Size returns the approximate length of the chunk in bytes.
+	Size() int
+}
+
+// Iterator enables efficient access to the content of a chunk. It is
+// generally not safe to use an Iterator concurrently with or after chunk
+// mutation.
+type Iterator interface {
+	// Scans the next value in the chunk. Directly after the iterator has
+	// been created, the next value is the first value in the
+	// chunk. Otherwise, it is the value following the last value scanned or
+	// found (by one of the Find... methods). Returns false if either the
+	// end of the chunk is reached or an error has occurred.
+	Scan() bool
+	// Finds the oldest value at or after the provided time. Returns false
+	// if either the chunk contains no value at or after the provided time,
+	// or an error has occurred.
+	FindAtOrAfter(model.Time) bool
+	// Returns the last value scanned (by the scan method) or found (by one
+	// of the find... methods). It returns model.ZeroSamplePair before any of
+	// those methods were called.
+	Value() model.SamplePair
+	// Returns a batch of the provisded size; NB not idempotent!  Should only be called
+	// once per Scan.
+	Batch(size int) Batch
+	// Returns the last error encountered. In general, an error signals data
+	// corruption in the chunk and requires quarantining.
+	Err() error
+}
+
+// BatchSize is samples per batch; this was choose by benchmarking all sizes from
+// 1 to 128.
+const BatchSize = 12
+
+// Batch is a sorted set of (timestamp, value) pairs.  They are intended to be
+// small, and passed by value.
+type Batch struct {
+	Timestamps [BatchSize]int64
+	Values     [BatchSize]float64
+	Index      int
+	Length     int
+}
+
+// RangeValues is a utility function that retrieves all values within the given
+// range from an Iterator.
+func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
+	result := []model.SamplePair{}
+	if !it.FindAtOrAfter(in.OldestInclusive) {
+		return result, it.Err()
+	}
+	for !it.Value().Timestamp.After(in.NewestInclusive) {
+		result = append(result, it.Value())
+		if !it.Scan() {
+			break
+		}
+	}
+	return result, it.Err()
+}
+
+// addToOverflowChunk is a utility function that creates a new chunk as overflow
+// chunk, adds the provided sample to it, and returns a chunk slice containing
+// the provided old chunk followed by the new overflow chunk.
+func addToOverflowChunk(c Chunk, s model.SamplePair) ([]Chunk, error) {
+	overflowChunks, err := New().Add(s)
+	if err != nil {
+		return nil, err
+	}
+	return []Chunk{c, overflowChunks[0]}, nil
+}
+
+// transcodeAndAdd is a utility function that transcodes the dst chunk into the
+// provided src chunk (plus the necessary overflow chunks) and then adds the
+// provided sample. It returns the new chunks (transcoded plus overflow) with
+// the new sample at the end.
+func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
+	Ops.WithLabelValues(Transcode).Inc()
+
+	var (
+		head            = dst
+		body, NewChunks []Chunk
+		err             error
+	)
+
+	it := src.NewIterator()
+	for it.Scan() {
+		if NewChunks, err = head.Add(it.Value()); err != nil {
+			return nil, err
+		}
+		body = append(body, NewChunks[:len(NewChunks)-1]...)
+		head = NewChunks[len(NewChunks)-1]
+	}
+	if it.Err() != nil {
+		return nil, it.Err()
+	}
+
+	if NewChunks, err = head.Add(s); err != nil {
+		return nil, err
+	}
+	return append(body, NewChunks...), nil
+}
+
+// New creates a new chunk according to the encoding set by the
+// DefaultEncoding flag.
+func New() Chunk {
+	chunk, err := NewForEncoding(DefaultEncoding)
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}
+
+// NewForEncoding allows configuring what chunk type you want
+func NewForEncoding(encoding Encoding) (Chunk, error) {
+	switch encoding {
+	case Delta:
+		return newDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
+	case DoubleDelta:
+		return newDoubleDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
+	case Varbit:
+		return newVarbitChunk(varbitZeroEncoding), nil
+	case Bigchunk:
+		return newBigchunk(), nil
+	default:
+		return nil, fmt.Errorf("unknown chunk encoding: %v", encoding)
+	}
+}
+
+// indexAccessor allows accesses to samples by index.
+type indexAccessor interface {
+	timestampAtIndex(int) model.Time
+	sampleValueAtIndex(int) model.SampleValue
+	err() error
+}
+
+// indexAccessingChunkIterator is a chunk iterator for chunks for which an
+// indexAccessor implementation exists.
+type indexAccessingChunkIterator struct {
+	len       int
+	pos       int
+	lastValue model.SamplePair
+	acc       indexAccessor
+}
+
+func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingChunkIterator {
+	return &indexAccessingChunkIterator{
+		len:       len,
+		pos:       -1,
+		lastValue: model.ZeroSamplePair,
+		acc:       acc,
+	}
+}
+
+// scan implements Iterator.
+func (it *indexAccessingChunkIterator) Scan() bool {
+	it.pos++
+	if it.pos >= it.len {
+		return false
+	}
+	it.lastValue = model.SamplePair{
+		Timestamp: it.acc.timestampAtIndex(it.pos),
+		Value:     it.acc.sampleValueAtIndex(it.pos),
+	}
+	return it.acc.err() == nil
+}
+
+// findAtOrAfter implements Iterator.
+func (it *indexAccessingChunkIterator) FindAtOrAfter(t model.Time) bool {
+	i := sort.Search(it.len, func(i int) bool {
+		return !it.acc.timestampAtIndex(i).Before(t)
+	})
+	if i == it.len || it.acc.err() != nil {
+		return false
+	}
+	it.pos = i
+	it.lastValue = model.SamplePair{
+		Timestamp: it.acc.timestampAtIndex(i),
+		Value:     it.acc.sampleValueAtIndex(i),
+	}
+	return true
+}
+
+// value implements Iterator.
+func (it *indexAccessingChunkIterator) Value() model.SamplePair {
+	return it.lastValue
+}
+
+func (it *indexAccessingChunkIterator) Batch(size int) Batch {
+	var batch Batch
+	j := 0
+	for j < size && it.pos < it.len {
+		batch.Timestamps[j] = int64(it.acc.timestampAtIndex(it.pos))
+		batch.Values[j] = float64(it.acc.sampleValueAtIndex(it.pos))
+		it.pos++
+		j++
+	}
+	// Interface contract is that you call Scan before calling Batch; therefore
+	// without this decrement, you'd end up skipping samples.
+	it.pos--
+	batch.Index = 0
+	batch.Length = j
+	return batch
+}
+
+// err implements Iterator.
+func (it *indexAccessingChunkIterator) Err() error {
+	return it.acc.err()
+}
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
new file mode 100644
index 0000000000000..b4a3acf731327
--- /dev/null
+++ b/encoding/chunk_test.go
@@ -0,0 +1,176 @@
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Note: this file has tests for code in both delta.go and doubledelta.go --
+// it may make sense to split those out later, but given that the tests are
+// near-identical and share a helper, this feels simpler for now.
+
+package encoding
+
+import (
+	"bytes"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+)
+
+func TestLen(t *testing.T) {
+	chunks := []Chunk{}
+	for _, encoding := range []Encoding{Delta, DoubleDelta, Varbit} {
+		c, err := NewForEncoding(encoding)
+		if err != nil {
+			t.Fatal(err)
+		}
+		chunks = append(chunks, c)
+	}
+
+	for _, c := range chunks {
+		for i := 0; i <= 10; i++ {
+			if c.Len() != i {
+				t.Errorf("chunk type %s should have %d samples, had %d", c.Encoding(), i, c.Len())
+			}
+
+			cs, _ := c.Add(model.SamplePair{
+				Timestamp: model.Time(i),
+				Value:     model.SampleValue(i),
+			})
+			c = cs[0]
+		}
+	}
+}
+
+var step = int(15 * time.Second / time.Millisecond)
+
+func TestChunk(t *testing.T) {
+	for _, tc := range []struct {
+		encoding   Encoding
+		maxSamples int
+	}{
+		{DoubleDelta, 989},
+		{Varbit, 2048},
+		{Bigchunk, 4096},
+	} {
+		for samples := 0; samples < tc.maxSamples; samples += tc.maxSamples / 10 {
+
+			// DoubleDelta doesn't support zero length chunks.
+			if tc.encoding == DoubleDelta && samples == 0 {
+				continue
+			}
+
+			t.Run(fmt.Sprintf("testChunkEncoding/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+				testChunkEncoding(t, tc.encoding, samples)
+			})
+
+			t.Run(fmt.Sprintf("testChunkSeek/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+				testChunkSeek(t, tc.encoding, samples)
+			})
+
+			t.Run(fmt.Sprintf("testChunkBatch/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+				testChunkBatch(t, tc.encoding, samples)
+			})
+		}
+	}
+}
+
+func mkChunk(t *testing.T, encoding Encoding, samples int) Chunk {
+	chunk, err := NewForEncoding(encoding)
+	require.NoError(t, err)
+
+	for i := 0; i < samples; i++ {
+		chunks, err := chunk.Add(model.SamplePair{
+			Timestamp: model.Time(i * step),
+			Value:     model.SampleValue(i),
+		})
+		require.NoError(t, err)
+		require.Len(t, chunks, 1)
+		chunk = chunks[0]
+	}
+
+	return chunk
+}
+
+// testChunkEncoding checks chunks roundtrip and contain all their samples.
+func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
+	chunk := mkChunk(t, encoding, samples)
+
+	var buf bytes.Buffer
+	err := chunk.Marshal(&buf)
+	require.NoError(t, err)
+
+	bs1 := buf.Bytes()
+	chunk, err = NewForEncoding(encoding)
+	err = chunk.Unmarshal(&buf)
+	require.NoError(t, err)
+
+	// Check all the samples are in there.
+	iter := chunk.NewIterator()
+	for i := 0; i < samples; i++ {
+		require.True(t, iter.Scan())
+		sample := iter.Value()
+		require.EqualValues(t, model.Time(i*step), sample.Timestamp)
+		require.EqualValues(t, model.SampleValue(i), sample.Value)
+	}
+	require.False(t, iter.Scan())
+	require.NoError(t, iter.Err())
+
+	// Check the byte representation after another Marshall is the same.
+	err = chunk.Marshal(&buf)
+	require.NoError(t, err)
+	bs2 := buf.Bytes()
+
+	require.True(t, bytes.Equal(bs1, bs2))
+}
+
+// testChunkSeek checks seek works as expected.
+func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
+	chunk := mkChunk(t, encoding, samples)
+
+	iter := chunk.NewIterator()
+	for i := 0; i < samples; i += samples / 10 {
+		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
+		sample := iter.Value()
+		require.EqualValues(t, model.Time(i*step), sample.Timestamp)
+		require.EqualValues(t, model.SampleValue(i), sample.Value)
+
+		j := i + 1
+		for ; j < samples; j++ {
+			require.True(t, iter.Scan())
+			sample := iter.Value()
+			require.EqualValues(t, model.Time(j*step), sample.Timestamp)
+			require.EqualValues(t, model.SampleValue(j), sample.Value)
+		}
+		require.False(t, iter.Scan())
+		require.NoError(t, iter.Err())
+	}
+}
+
+func testChunkBatch(t *testing.T, encoding Encoding, samples int) {
+	chunk := mkChunk(t, encoding, samples)
+
+	// Check all the samples are in there.
+	iter := chunk.NewIterator()
+	for i := 0; i < samples; {
+		require.True(t, iter.Scan())
+		batch := iter.Batch(BatchSize)
+		for j := 0; j < batch.Length; j++ {
+			require.EqualValues(t, int64((i+j)*step), batch.Timestamps[j])
+			require.EqualValues(t, float64(i+j), batch.Values[j])
+		}
+		i += batch.Length
+	}
+	require.False(t, iter.Scan())
+	require.NoError(t, iter.Err())
+}
diff --git a/encoding/delta.go b/encoding/delta.go
new file mode 100644
index 0000000000000..dfe4659d1d598
--- /dev/null
+++ b/encoding/delta.go
@@ -0,0 +1,364 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The 21-byte header of a delta-encoded chunk looks like:
+//
+// - time delta bytes:  1 bytes
+// - value delta bytes: 1 bytes
+// - is integer:        1 byte
+// - base time:         8 bytes
+// - base value:        8 bytes
+// - used buf bytes:    2 bytes
+const (
+	deltaHeaderBytes = 21
+
+	deltaHeaderTimeBytesOffset  = 0
+	deltaHeaderValueBytesOffset = 1
+	deltaHeaderIsIntOffset      = 2
+	deltaHeaderBaseTimeOffset   = 3
+	deltaHeaderBaseValueOffset  = 11
+	deltaHeaderBufLenOffset     = 19
+)
+
+// A deltaEncodedChunk adaptively stores sample timestamps and values with a
+// delta encoding of various types (int, float) and bit widths. However, once 8
+// bytes would be needed to encode a delta value, a fall-back to the absolute
+// numbers happens (so that timestamps are saved directly as int64 and values as
+// float64). It implements the chunk interface.
+type deltaEncodedChunk []byte
+
+// newDeltaEncodedChunk returns a newly allocated deltaEncodedChunk.
+func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncodedChunk {
+	if tb < 1 {
+		panic("need at least 1 time delta byte")
+	}
+	if length < deltaHeaderBytes+16 {
+		panic(fmt.Errorf(
+			"chunk length %d bytes is insufficient, need at least %d",
+			length, deltaHeaderBytes+16,
+		))
+	}
+	c := make(deltaEncodedChunk, deltaHeaderIsIntOffset+1, length)
+
+	c[deltaHeaderTimeBytesOffset] = byte(tb)
+	c[deltaHeaderValueBytesOffset] = byte(vb)
+	if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes.
+		c[deltaHeaderIsIntOffset] = 1
+	} else {
+		c[deltaHeaderIsIntOffset] = 0
+	}
+
+	return &c
+}
+
+// Add implements chunk.
+func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
+	if c.Len() == 0 {
+		c = c[:deltaHeaderBytes]
+		binary.LittleEndian.PutUint64(c[deltaHeaderBaseTimeOffset:], uint64(s.Timestamp))
+		binary.LittleEndian.PutUint64(c[deltaHeaderBaseValueOffset:], math.Float64bits(float64(s.Value)))
+	}
+
+	remainingBytes := cap(c) - len(c)
+	sampleSize := c.sampleSize()
+
+	// Do we generally have space for another sample in this chunk? If not,
+	// overflow into a new one.
+	if remainingBytes < sampleSize {
+		return addToOverflowChunk(&c, s)
+	}
+
+	baseValue := c.baseValue()
+	dt := s.Timestamp - c.baseTime()
+	if dt < 0 {
+		return nil, fmt.Errorf("time delta is less than zero: %v", dt)
+	}
+
+	dv := s.Value - baseValue
+	tb := c.timeBytes()
+	vb := c.valueBytes()
+	isInt := c.isInt()
+
+	// If the new sample is incompatible with the current encoding, reencode the
+	// existing chunk data into new chunk(s).
+
+	ntb, nvb, nInt := tb, vb, isInt
+	if isInt && !isInt64(dv) {
+		// int->float.
+		nvb = d4
+		nInt = false
+	} else if !isInt && vb == d4 && baseValue+model.SampleValue(float32(dv)) != s.Value {
+		// float32->float64.
+		nvb = d8
+	} else {
+		if tb < d8 {
+			// Maybe more bytes for timestamp.
+			ntb = max(tb, bytesNeededForUnsignedTimestampDelta(dt))
+		}
+		if c.isInt() && vb < d8 {
+			// Maybe more bytes for sample value.
+			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(dv))
+		}
+	}
+	if tb != ntb || vb != nvb || isInt != nInt {
+		if len(c)*2 < cap(c) {
+			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+		}
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		return addToOverflowChunk(&c, s)
+	}
+
+	offset := len(c)
+	c = c[:offset+sampleSize]
+
+	switch tb {
+	case d1:
+		c[offset] = byte(dt)
+	case d2:
+		binary.LittleEndian.PutUint16(c[offset:], uint16(dt))
+	case d4:
+		binary.LittleEndian.PutUint32(c[offset:], uint32(dt))
+	case d8:
+		// Store the absolute value (no delta) in case of d8.
+		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
+	default:
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+	}
+
+	offset += int(tb)
+
+	if c.isInt() {
+		switch vb {
+		case d0:
+			// No-op. Constant value is stored as base value.
+		case d1:
+			c[offset] = byte(int8(dv))
+		case d2:
+			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(dv)))
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
+		// d8 must not happen. Those samples are encoded as float64.
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+		}
+	} else {
+		switch vb {
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(dv)))
+		case d8:
+			// Store the absolute value (no delta) in case of d8.
+			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+		}
+	}
+	return []Chunk{&c}, nil
+}
+
+func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
+	return c
+}
+
+// NewIterator implements chunk.
+func (c *deltaEncodedChunk) NewIterator() Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
+		c:      *c,
+		baseT:  c.baseTime(),
+		baseV:  c.baseValue(),
+		tBytes: c.timeBytes(),
+		vBytes: c.valueBytes(),
+		isInt:  c.isInt(),
+	})
+}
+
+// Marshal implements chunk.
+func (c deltaEncodedChunk) Marshal(w io.Writer) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint.")
+	}
+	binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n, err := w.Write(c[:cap(c)])
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c *deltaEncodedChunk) Unmarshal(r io.Reader) error {
+	*c = (*c)[:cap(*c)]
+	if _, err := io.ReadFull(r, *c); err != nil {
+		return err
+	}
+	return c.setLen()
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c *deltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
+	*c = (*c)[:cap(*c)]
+	copy(*c, buf)
+	return c.setLen()
+}
+
+// setLen sets the length of the underlying slice and performs some sanity checks.
+func (c *deltaEncodedChunk) setLen() error {
+	l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
+	if int(l) > cap(*c) {
+		return fmt.Errorf("delta chunk length exceeded during unmarshaling: %d", l)
+	}
+	if int(l) < deltaHeaderBytes {
+		return fmt.Errorf("delta chunk length less than header size: %d < %d", l, deltaHeaderBytes)
+	}
+	switch c.timeBytes() {
+	case d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of time bytes in delta chunk: %d", c.timeBytes())
+	}
+	switch c.valueBytes() {
+	case d0, d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of value bytes in delta chunk: %d", c.valueBytes())
+	}
+	*c = (*c)[:l]
+	return nil
+}
+
+// Encoding implements chunk.
+func (c deltaEncodedChunk) Encoding() Encoding { return Delta }
+
+// Utilization implements chunk.
+func (c deltaEncodedChunk) Utilization() float64 {
+	return float64(len(c)) / float64(cap(c))
+}
+
+func (c deltaEncodedChunk) timeBytes() deltaBytes {
+	return deltaBytes(c[deltaHeaderTimeBytesOffset])
+}
+
+func (c deltaEncodedChunk) valueBytes() deltaBytes {
+	return deltaBytes(c[deltaHeaderValueBytesOffset])
+}
+
+func (c deltaEncodedChunk) isInt() bool {
+	return c[deltaHeaderIsIntOffset] == 1
+}
+
+func (c deltaEncodedChunk) baseTime() model.Time {
+	return model.Time(binary.LittleEndian.Uint64(c[deltaHeaderBaseTimeOffset:]))
+}
+
+func (c deltaEncodedChunk) baseValue() model.SampleValue {
+	return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c[deltaHeaderBaseValueOffset:])))
+}
+
+func (c deltaEncodedChunk) sampleSize() int {
+	return int(c.timeBytes() + c.valueBytes())
+}
+
+// Len implements Chunk. Runs in constant time.
+func (c deltaEncodedChunk) Len() int {
+	if len(c) < deltaHeaderBytes {
+		return 0
+	}
+	return (len(c) - deltaHeaderBytes) / c.sampleSize()
+}
+
+func (c deltaEncodedChunk) Size() int {
+	return len(c)
+}
+
+// deltaEncodedIndexAccessor implements indexAccessor.
+type deltaEncodedIndexAccessor struct {
+	c              deltaEncodedChunk
+	baseT          model.Time
+	baseV          model.SampleValue
+	tBytes, vBytes deltaBytes
+	isInt          bool
+	lastErr        error
+}
+
+func (acc *deltaEncodedIndexAccessor) err() error {
+	return acc.lastErr
+}
+
+func (acc *deltaEncodedIndexAccessor) timestampAtIndex(idx int) model.Time {
+	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes)
+
+	switch acc.tBytes {
+	case d1:
+		return acc.baseT + model.Time(uint8(acc.c[offset]))
+	case d2:
+		return acc.baseT + model.Time(binary.LittleEndian.Uint16(acc.c[offset:]))
+	case d4:
+		return acc.baseT + model.Time(binary.LittleEndian.Uint32(acc.c[offset:]))
+	case d8:
+		// Take absolute value for d8.
+		return model.Time(binary.LittleEndian.Uint64(acc.c[offset:]))
+	default:
+		acc.lastErr = fmt.Errorf("invalid number of bytes for time delta: %d", acc.tBytes)
+		return model.Earliest
+	}
+}
+
+func (acc *deltaEncodedIndexAccessor) sampleValueAtIndex(idx int) model.SampleValue {
+	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes) + int(acc.tBytes)
+
+	if acc.isInt {
+		switch acc.vBytes {
+		case d0:
+			return acc.baseV
+		case d1:
+			return acc.baseV + model.SampleValue(int8(acc.c[offset]))
+		case d2:
+			return acc.baseV + model.SampleValue(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+		case d4:
+			return acc.baseV + model.SampleValue(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+		// No d8 for ints.
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for integer delta: %d", acc.vBytes)
+			return 0
+		}
+	} else {
+		switch acc.vBytes {
+		case d4:
+			return acc.baseV + model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(acc.c[offset:])))
+		case d8:
+			// Take absolute value for d8.
+			return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(acc.c[offset:])))
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for floating point delta: %d", acc.vBytes)
+			return 0
+		}
+	}
+}
diff --git a/encoding/delta_helpers.go b/encoding/delta_helpers.go
new file mode 100644
index 0000000000000..c34ab8555a371
--- /dev/null
+++ b/encoding/delta_helpers.go
@@ -0,0 +1,87 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+type deltaBytes byte
+
+const (
+	d0 deltaBytes = 0
+	d1 deltaBytes = 1
+	d2 deltaBytes = 2
+	d4 deltaBytes = 4
+	d8 deltaBytes = 8
+)
+
+func bytesNeededForUnsignedTimestampDelta(deltaT model.Time) deltaBytes {
+	switch {
+	case deltaT > math.MaxUint32:
+		return d8
+	case deltaT > math.MaxUint16:
+		return d4
+	case deltaT > math.MaxUint8:
+		return d2
+	default:
+		return d1
+	}
+}
+
+func bytesNeededForSignedTimestampDelta(deltaT model.Time) deltaBytes {
+	switch {
+	case deltaT > math.MaxInt32 || deltaT < math.MinInt32:
+		return d8
+	case deltaT > math.MaxInt16 || deltaT < math.MinInt16:
+		return d4
+	case deltaT > math.MaxInt8 || deltaT < math.MinInt8:
+		return d2
+	default:
+		return d1
+	}
+}
+
+func bytesNeededForIntegerSampleValueDelta(deltaV model.SampleValue) deltaBytes {
+	switch {
+	case deltaV < math.MinInt32 || deltaV > math.MaxInt32:
+		return d8
+	case deltaV < math.MinInt16 || deltaV > math.MaxInt16:
+		return d4
+	case deltaV < math.MinInt8 || deltaV > math.MaxInt8:
+		return d2
+	case deltaV != 0:
+		return d1
+	default:
+		return d0
+	}
+}
+
+func max(a, b deltaBytes) deltaBytes {
+	if a > b {
+		return a
+	}
+	return b
+}
+
+// isInt64 returns true if v can be represented as an int64.
+func isInt64(v model.SampleValue) bool {
+	// Note: Using math.Modf is slower than the conversion approach below.
+	return model.SampleValue(int64(v)) == v
+}
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
new file mode 100644
index 0000000000000..01700861dc3b9
--- /dev/null
+++ b/encoding/delta_test.go
@@ -0,0 +1,119 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Note: this file has tests for code in both delta.go and doubledelta.go --
+// it may make sense to split those out later, but given that the tests are
+// near-identical and share a helper, this feels simpler for now.
+
+package encoding
+
+import (
+	"bytes"
+	"encoding/binary"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/common/model"
+)
+
+func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
+
+	var verifyUnmarshallingError = func(
+		err error,
+		chunkTypeName string,
+		unmarshalMethod string,
+		expectedStr string,
+	) {
+
+		if err == nil {
+			t.Errorf("Failed to obtain an error when unmarshalling corrupt %s (from %s)", chunkTypeName, unmarshalMethod)
+			return
+		}
+
+		if !strings.Contains(err.Error(), expectedStr) {
+			t.Errorf(
+				"'%s' not present in error when unmarshalling corrupt %s (from %s): '%s'",
+				expectedStr,
+				chunkTypeName,
+				unmarshalMethod,
+				err.Error())
+		}
+	}
+
+	cases := []struct {
+		chunkTypeName    string
+		chunkConstructor func(deltaBytes, deltaBytes, bool, int) Chunk
+		minHeaderLen     int
+		chunkLenPos      int
+		timeBytesPos     int
+	}{
+		{
+			chunkTypeName: "deltaEncodedChunk",
+			chunkConstructor: func(a, b deltaBytes, c bool, d int) Chunk {
+				return newDeltaEncodedChunk(a, b, c, d)
+			},
+			minHeaderLen: deltaHeaderBytes,
+			chunkLenPos:  deltaHeaderBufLenOffset,
+			timeBytesPos: deltaHeaderTimeBytesOffset,
+		},
+		{
+			chunkTypeName: "doubleDeltaEncodedChunk",
+			chunkConstructor: func(a, b deltaBytes, c bool, d int) Chunk {
+				return newDoubleDeltaEncodedChunk(a, b, c, d)
+			},
+			minHeaderLen: doubleDeltaHeaderMinBytes,
+			chunkLenPos:  doubleDeltaHeaderBufLenOffset,
+			timeBytesPos: doubleDeltaHeaderTimeBytesOffset,
+		},
+	}
+	for _, c := range cases {
+		chunk := c.chunkConstructor(d1, d4, false, ChunkLen)
+
+		cs, err := chunk.Add(model.SamplePair{
+			Timestamp: model.Now(),
+			Value:     model.SampleValue(100),
+		})
+		if err != nil {
+			t.Fatalf("Couldn't add sample to empty %s: %s", c.chunkTypeName, err)
+		}
+
+		var writer bytes.Buffer
+		cs[0].Marshal(&writer)
+
+		// Corrupt time byte to 0, which is illegal.
+		buf := writer.Bytes()
+		buf[c.timeBytesPos] = 0
+		err = cs[0].UnmarshalFromBuf(buf)
+		verifyUnmarshallingError(err, c.chunkTypeName, "buf", "invalid number of time bytes")
+
+		err = cs[0].Unmarshal(bytes.NewBuffer(buf))
+		verifyUnmarshallingError(err, c.chunkTypeName, "Reader", "invalid number of time bytes")
+
+		// Fix the corruption to go on.
+		buf[c.timeBytesPos] = byte(d1)
+
+		// Corrupt the length to be every possible too-small value
+		for i := 0; i < c.minHeaderLen; i++ {
+			binary.LittleEndian.PutUint16(buf[c.chunkLenPos:], uint16(i))
+
+			err = cs[0].UnmarshalFromBuf(buf)
+			verifyUnmarshallingError(err, c.chunkTypeName, "buf", "header size")
+
+			err = cs[0].Unmarshal(bytes.NewBuffer(buf))
+			verifyUnmarshallingError(err, c.chunkTypeName, "Reader", "header size")
+		}
+	}
+}
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
new file mode 100644
index 0000000000000..59d9f95da2bc4
--- /dev/null
+++ b/encoding/doubledelta.go
@@ -0,0 +1,529 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The 37-byte header of a delta-encoded chunk looks like:
+//
+// - used buf bytes:           2 bytes
+// - time double-delta bytes:  1 bytes
+// - value double-delta bytes: 1 bytes
+// - is integer:               1 byte
+// - base time:                8 bytes
+// - base value:               8 bytes
+// - base time delta:          8 bytes
+// - base value delta:         8 bytes
+const (
+	doubleDeltaHeaderBytes    = 37
+	doubleDeltaHeaderMinBytes = 21 // header isn't full for chunk w/ one sample
+
+	doubleDeltaHeaderBufLenOffset         = 0
+	doubleDeltaHeaderTimeBytesOffset      = 2
+	doubleDeltaHeaderValueBytesOffset     = 3
+	doubleDeltaHeaderIsIntOffset          = 4
+	doubleDeltaHeaderBaseTimeOffset       = 5
+	doubleDeltaHeaderBaseValueOffset      = 13
+	doubleDeltaHeaderBaseTimeDeltaOffset  = 21
+	doubleDeltaHeaderBaseValueDeltaOffset = 29
+)
+
+// A doubleDeltaEncodedChunk adaptively stores sample timestamps and values with
+// a double-delta encoding of various types (int, float) and bit widths. A base
+// value and timestamp and a base delta for each is saved in the header. The
+// payload consists of double-deltas, i.e. deviations from the values and
+// timestamps calculated by applying the base value and time and the base deltas.
+// However, once 8 bytes would be needed to encode a double-delta value, a
+// fall-back to the absolute numbers happens (so that timestamps are saved
+// directly as int64 and values as float64).
+// doubleDeltaEncodedChunk implements the chunk interface.
+type doubleDeltaEncodedChunk []byte
+
+// newDoubleDeltaEncodedChunk returns a newly allocated doubleDeltaEncodedChunk.
+func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doubleDeltaEncodedChunk {
+	if tb < 1 {
+		panic("need at least 1 time delta byte")
+	}
+	if length < doubleDeltaHeaderBytes+16 {
+		panic(fmt.Errorf(
+			"chunk length %d bytes is insufficient, need at least %d",
+			length, doubleDeltaHeaderBytes+16,
+		))
+	}
+	c := make(doubleDeltaEncodedChunk, doubleDeltaHeaderIsIntOffset+1, length)
+
+	c[doubleDeltaHeaderTimeBytesOffset] = byte(tb)
+	c[doubleDeltaHeaderValueBytesOffset] = byte(vb)
+	if vb < d8 && isInt { // Only use int for fewer than 8 value double-delta bytes.
+		c[doubleDeltaHeaderIsIntOffset] = 1
+	} else {
+		c[doubleDeltaHeaderIsIntOffset] = 0
+	}
+	return &c
+}
+
+// Add implements chunk.
+func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
+	if c.Len() == 0 {
+		return c.addFirstSample(s), nil
+	}
+
+	tb := c.timeBytes()
+	vb := c.valueBytes()
+
+	if c.Len() == 1 {
+		return c.addSecondSample(s, tb, vb)
+	}
+
+	remainingBytes := cap(c) - len(c)
+	sampleSize := c.sampleSize()
+
+	// Do we generally have space for another sample in this chunk? If not,
+	// overflow into a new one.
+	if remainingBytes < sampleSize {
+		return addToOverflowChunk(&c, s)
+	}
+
+	projectedTime := c.baseTime() + model.Time(c.Len())*c.baseTimeDelta()
+	ddt := s.Timestamp - projectedTime
+
+	projectedValue := c.baseValue() + model.SampleValue(c.Len())*c.baseValueDelta()
+	ddv := s.Value - projectedValue
+
+	ntb, nvb, nInt := tb, vb, c.isInt()
+	// If the new sample is incompatible with the current encoding, reencode the
+	// existing chunk data into new chunk(s).
+	if c.isInt() && !isInt64(ddv) {
+		// int->float.
+		nvb = d4
+		nInt = false
+	} else if !c.isInt() && vb == d4 && projectedValue+model.SampleValue(float32(ddv)) != s.Value {
+		// float32->float64.
+		nvb = d8
+	} else {
+		if tb < d8 {
+			// Maybe more bytes for timestamp.
+			ntb = max(tb, bytesNeededForSignedTimestampDelta(ddt))
+		}
+		if c.isInt() && vb < d8 {
+			// Maybe more bytes for sample value.
+			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(ddv))
+		}
+	}
+	if tb != ntb || vb != nvb || c.isInt() != nInt {
+		if len(c)*2 < cap(c) {
+			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+		}
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		return addToOverflowChunk(&c, s)
+	}
+
+	offset := len(c)
+	c = c[:offset+sampleSize]
+
+	switch tb {
+	case d1:
+		c[offset] = byte(ddt)
+	case d2:
+		binary.LittleEndian.PutUint16(c[offset:], uint16(ddt))
+	case d4:
+		binary.LittleEndian.PutUint32(c[offset:], uint32(ddt))
+	case d8:
+		// Store the absolute value (no delta) in case of d8.
+		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
+	default:
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+	}
+
+	offset += int(tb)
+
+	if c.isInt() {
+		switch vb {
+		case d0:
+			// No-op. Constant delta is stored as base value.
+		case d1:
+			c[offset] = byte(int8(ddv))
+		case d2:
+			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(ddv)))
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(ddv)))
+		// d8 must not happen. Those samples are encoded as float64.
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+		}
+	} else {
+		switch vb {
+		case d4:
+			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(ddv)))
+		case d8:
+			// Store the absolute value (no delta) in case of d8.
+			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
+		default:
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+		}
+	}
+	return []Chunk{&c}, nil
+}
+
+// FirstTime implements chunk.
+func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
+	return c.baseTime()
+}
+
+// NewIterator( implements chunk.
+func (c *doubleDeltaEncodedChunk) NewIterator() Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
+		c:      *c,
+		baseT:  c.baseTime(),
+		baseΔT: c.baseTimeDelta(),
+		baseV:  c.baseValue(),
+		baseΔV: c.baseValueDelta(),
+		tBytes: c.timeBytes(),
+		vBytes: c.valueBytes(),
+		isInt:  c.isInt(),
+	})
+}
+
+func (c *doubleDeltaEncodedChunk) Slice(_, _ model.Time) Chunk {
+	return c
+}
+
+// Marshal implements chunk.
+func (c doubleDeltaEncodedChunk) Marshal(w io.Writer) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint")
+	}
+	binary.LittleEndian.PutUint16(c[doubleDeltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n, err := w.Write(c[:cap(c)])
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// MarshalToBuf implements chunk.
+func (c doubleDeltaEncodedChunk) MarshalToBuf(buf []byte) error {
+	if len(c) > math.MaxUint16 {
+		panic("chunk buffer length would overflow a 16 bit uint")
+	}
+	binary.LittleEndian.PutUint16(c[doubleDeltaHeaderBufLenOffset:], uint16(len(c)))
+
+	n := copy(buf, c)
+	if n != len(c) {
+		return fmt.Errorf("wanted to copy %d bytes to buffer, copied %d", len(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c *doubleDeltaEncodedChunk) Unmarshal(r io.Reader) error {
+	*c = (*c)[:cap(*c)]
+	if _, err := io.ReadFull(r, *c); err != nil {
+		return err
+	}
+	return c.setLen()
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c *doubleDeltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
+	*c = (*c)[:cap(*c)]
+	copy(*c, buf)
+	return c.setLen()
+}
+
+// setLen sets the length of the underlying slice and performs some sanity checks.
+func (c *doubleDeltaEncodedChunk) setLen() error {
+	l := binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])
+	if int(l) > cap(*c) {
+		return fmt.Errorf("doubledelta chunk length exceeded during unmarshaling: %d", l)
+	}
+	if int(l) < doubleDeltaHeaderMinBytes {
+		return fmt.Errorf("doubledelta chunk length less than header size: %d < %d", l, doubleDeltaHeaderMinBytes)
+	}
+	switch c.timeBytes() {
+	case d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of time bytes in doubledelta chunk: %d", c.timeBytes())
+	}
+	switch c.valueBytes() {
+	case d0, d1, d2, d4, d8:
+		// Pass.
+	default:
+		return fmt.Errorf("invalid number of value bytes in doubledelta chunk: %d", c.valueBytes())
+	}
+	*c = (*c)[:l]
+	return nil
+}
+
+// Encoding implements chunk.
+func (c doubleDeltaEncodedChunk) Encoding() Encoding { return DoubleDelta }
+
+// Utilization implements chunk.
+func (c doubleDeltaEncodedChunk) Utilization() float64 {
+	return float64(len(c)-doubleDeltaHeaderIsIntOffset-1) / float64(cap(c))
+}
+
+func (c doubleDeltaEncodedChunk) baseTime() model.Time {
+	return model.Time(
+		binary.LittleEndian.Uint64(
+			c[doubleDeltaHeaderBaseTimeOffset:],
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.LittleEndian.Uint64(
+				c[doubleDeltaHeaderBaseValueOffset:],
+			),
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseTimeDelta() model.Time {
+	if len(c) < doubleDeltaHeaderBaseTimeDeltaOffset+8 {
+		return 0
+	}
+	return model.Time(
+		binary.LittleEndian.Uint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) baseValueDelta() model.SampleValue {
+	if len(c) < doubleDeltaHeaderBaseValueDeltaOffset+8 {
+		return 0
+	}
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.LittleEndian.Uint64(
+				c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			),
+		),
+	)
+}
+
+func (c doubleDeltaEncodedChunk) timeBytes() deltaBytes {
+	return deltaBytes(c[doubleDeltaHeaderTimeBytesOffset])
+}
+
+func (c doubleDeltaEncodedChunk) valueBytes() deltaBytes {
+	return deltaBytes(c[doubleDeltaHeaderValueBytesOffset])
+}
+
+func (c doubleDeltaEncodedChunk) sampleSize() int {
+	return int(c.timeBytes() + c.valueBytes())
+}
+
+// Len implements Chunk. Runs in constant time.
+func (c doubleDeltaEncodedChunk) Len() int {
+	if len(c) <= doubleDeltaHeaderIsIntOffset+1 {
+		return 0
+	}
+	if len(c) <= doubleDeltaHeaderBaseValueOffset+8 {
+		return 1
+	}
+	return (len(c)-doubleDeltaHeaderBytes)/c.sampleSize() + 2
+}
+
+func (c doubleDeltaEncodedChunk) Size() int {
+	return len(c)
+}
+
+func (c doubleDeltaEncodedChunk) isInt() bool {
+	return c[doubleDeltaHeaderIsIntOffset] == 1
+}
+
+// addFirstSample is a helper method only used by c.add(). It adds timestamp and
+// value as base time and value.
+func (c doubleDeltaEncodedChunk) addFirstSample(s model.SamplePair) []Chunk {
+	c = c[:doubleDeltaHeaderBaseValueOffset+8]
+	binary.LittleEndian.PutUint64(
+		c[doubleDeltaHeaderBaseTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.LittleEndian.PutUint64(
+		c[doubleDeltaHeaderBaseValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+	return []Chunk{&c}
+}
+
+// addSecondSample is a helper method only used by c.add(). It calculates the
+// base delta from the provided sample and adds it to the chunk.
+func (c doubleDeltaEncodedChunk) addSecondSample(s model.SamplePair, tb, vb deltaBytes) ([]Chunk, error) {
+	baseTimeDelta := s.Timestamp - c.baseTime()
+	if baseTimeDelta < 0 {
+		return nil, fmt.Errorf("base time delta is less than zero: %v", baseTimeDelta)
+	}
+	c = c[:doubleDeltaHeaderBytes]
+	if tb >= d8 || bytesNeededForUnsignedTimestampDelta(baseTimeDelta) >= d8 {
+		// If already the base delta needs d8 (or we are at d8
+		// already, anyway), we better encode this timestamp
+		// directly rather than as a delta and switch everything
+		// to d8.
+		c[doubleDeltaHeaderTimeBytesOffset] = byte(d8)
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			uint64(s.Timestamp),
+		)
+	} else {
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			uint64(baseTimeDelta),
+		)
+	}
+	baseValue := c.baseValue()
+	baseValueDelta := s.Value - baseValue
+	if vb >= d8 || baseValue+baseValueDelta != s.Value {
+		// If we can't reproduce the original sample value (or
+		// if we are at d8 already, anyway), we better encode
+		// this value directly rather than as a delta and switch
+		// everything to d8.
+		c[doubleDeltaHeaderValueBytesOffset] = byte(d8)
+		c[doubleDeltaHeaderIsIntOffset] = 0
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			math.Float64bits(float64(s.Value)),
+		)
+	} else {
+		binary.LittleEndian.PutUint64(
+			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			math.Float64bits(float64(baseValueDelta)),
+		)
+	}
+	return []Chunk{&c}, nil
+}
+
+// doubleDeltaEncodedIndexAccessor implements indexAccessor.
+type doubleDeltaEncodedIndexAccessor struct {
+	c              doubleDeltaEncodedChunk
+	baseT, baseΔT  model.Time
+	baseV, baseΔV  model.SampleValue
+	tBytes, vBytes deltaBytes
+	isInt          bool
+	lastErr        error
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) err() error {
+	return acc.lastErr
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) timestampAtIndex(idx int) model.Time {
+	if idx == 0 {
+		return acc.baseT
+	}
+	if idx == 1 {
+		// If time bytes are at d8, the time is saved directly rather
+		// than as a difference.
+		if acc.tBytes == d8 {
+			return acc.baseΔT
+		}
+		return acc.baseT + acc.baseΔT
+	}
+
+	offset := doubleDeltaHeaderBytes + (idx-2)*int(acc.tBytes+acc.vBytes)
+
+	switch acc.tBytes {
+	case d1:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int8(acc.c[offset]))
+	case d2:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+	case d4:
+		return acc.baseT +
+			model.Time(idx)*acc.baseΔT +
+			model.Time(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+	case d8:
+		// Take absolute value for d8.
+		return model.Time(binary.LittleEndian.Uint64(acc.c[offset:]))
+	default:
+		acc.lastErr = fmt.Errorf("invalid number of bytes for time delta: %d", acc.tBytes)
+		return model.Earliest
+	}
+}
+
+func (acc *doubleDeltaEncodedIndexAccessor) sampleValueAtIndex(idx int) model.SampleValue {
+	if idx == 0 {
+		return acc.baseV
+	}
+	if idx == 1 {
+		// If value bytes are at d8, the value is saved directly rather
+		// than as a difference.
+		if acc.vBytes == d8 {
+			return acc.baseΔV
+		}
+		return acc.baseV + acc.baseΔV
+	}
+
+	offset := doubleDeltaHeaderBytes + (idx-2)*int(acc.tBytes+acc.vBytes) + int(acc.tBytes)
+
+	if acc.isInt {
+		switch acc.vBytes {
+		case d0:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV
+		case d1:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int8(acc.c[offset]))
+		case d2:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
+		case d4:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
+		// No d8 for ints.
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for integer delta: %d", acc.vBytes)
+			return 0
+		}
+	} else {
+		switch acc.vBytes {
+		case d4:
+			return acc.baseV +
+				model.SampleValue(idx)*acc.baseΔV +
+				model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(acc.c[offset:])))
+		case d8:
+			// Take absolute value for d8.
+			return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(acc.c[offset:])))
+		default:
+			acc.lastErr = fmt.Errorf("invalid number of bytes for floating point delta: %d", acc.vBytes)
+			return 0
+		}
+	}
+}
diff --git a/encoding/instrumentation.go b/encoding/instrumentation.go
new file mode 100644
index 0000000000000..241b88c48bc17
--- /dev/null
+++ b/encoding/instrumentation.go
@@ -0,0 +1,91 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2014 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import "github.com/prometheus/client_golang/prometheus"
+
+// Usually, a separate file for instrumentation is frowned upon. Metrics should
+// be close to where they are used. However, the metrics below are set all over
+// the place, so we go for a separate instrumentation file in this case.
+var (
+	Ops = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "chunk_ops_total",
+			Help:      "The total number of chunk operations by their type.",
+		},
+		[]string{OpTypeLabel},
+	)
+	DescOps = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: namespace,
+			Subsystem: subsystem,
+			Name:      "chunkdesc_ops_total",
+			Help:      "The total number of chunk descriptor operations by their type.",
+		},
+		[]string{OpTypeLabel},
+	)
+	NumMemDescs = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: namespace,
+		Subsystem: subsystem,
+		Name:      "memory_chunkdescs",
+		Help:      "The current number of chunk descriptors in memory.",
+	})
+)
+
+const (
+	namespace = "prometheus"
+	subsystem = "local_storage"
+
+	// OpTypeLabel is the label name for chunk operation types.
+	OpTypeLabel = "type"
+
+	// Op-types for ChunkOps.
+
+	// CreateAndPin is the label value for create-and-pin chunk ops.
+	CreateAndPin = "create" // A Desc creation with refCount=1.
+	// PersistAndUnpin is the label value for persist chunk ops.
+	PersistAndUnpin = "persist"
+	// Pin is the label value for pin chunk ops (excludes pin on creation).
+	Pin = "pin"
+	// Unpin is the label value for unpin chunk ops (excludes the unpin on persisting).
+	Unpin = "unpin"
+	// Transcode is the label value for transcode chunk ops.
+	Transcode = "transcode"
+	// Drop is the label value for drop chunk ops.
+	Drop = "drop"
+
+	// Op-types for ChunkOps and ChunkDescOps.
+
+	// Evict is the label value for evict chunk desc ops.
+	Evict = "evict"
+	// Load is the label value for load chunk and chunk desc ops.
+	Load = "load"
+)
+
+func init() {
+	prometheus.MustRegister(Ops)
+	prometheus.MustRegister(DescOps)
+	prometheus.MustRegister(NumMemDescs)
+}
+
+// NumMemChunks is the total number of chunks in memory. This is a global
+// counter, also used internally, so not implemented as metrics. Collected in
+// MemorySeriesStorage.
+// TODO(beorn7): Having this as an exported global variable is really bad.
+var NumMemChunks int64
diff --git a/encoding/varbit.go b/encoding/varbit.go
new file mode 100644
index 0000000000000..91fc3eaad47c2
--- /dev/null
+++ b/encoding/varbit.go
@@ -0,0 +1,1164 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"math"
+
+	"github.com/prometheus/common/model"
+)
+
+// The varbit chunk encoding is broadly similar to the double-delta
+// chunks. However, it uses a number of different bit-widths to save the
+// double-deltas (rather than 1, 2, or 4 bytes). Also, it doesn't use the delta
+// of the first two samples of a chunk as the base delta, but uses a "sliding"
+// delta, i.e. the delta of the two previous samples. Both differences make
+// random access more expensive. Sample values can be encoded with the same
+// double-delta scheme as timestamps, but different value encodings can be
+// chosen adaptively, among them XOR encoding and "zero" encoding for constant
+// sample values. Overall, the varbit encoding results in a much better
+// compression ratio (~1.3 bytes per sample compared to ~3.3 bytes per sample
+// with double-delta encoding, for typical data sets).
+//
+// Major parts of the varbit encoding are inspired by the following paper:
+//   Gorilla: A Fast, Scalable, In-Memory Time Series Database
+//   T. Pelkonen et al., Facebook Inc.
+//   http://www.vldb.org/pvldb/vol8/p1816-teller.pdf
+// Note that there are significant differences, some due to the way Prometheus
+// chunks work, others to optimize for the Prometheus use-case.
+//
+// Layout of a 1024 byte varbit chunk (big endian, wherever it matters):
+// - first time (int64):                  8 bytes  bit 0000-0063
+// - first value (float64):               8 bytes  bit 0064-0127
+// - last time (int64):                   8 bytes  bit 0128-0191
+// - last value (float64):                8 bytes  bit 0192-0255
+// - first Δt (t1-t0, unsigned):          3 bytes  bit 0256-0279
+// - flags (byte)                         1 byte   bit 0280-0287
+// - bit offset for next sample           2 bytes  bit 0288-0303
+// - first Δv for value encoding 1, otherwise payload
+//                                        4 bytes  bit 0304-0335
+// - payload                            973 bytes  bit 0336-8119
+// The following only exists if the chunk is still open. Otherwise, it might be
+// used by payload.
+// - bit offset for current ΔΔt=0 count   2 bytes  bit 8120-8135
+// - last Δt                              3 bytes  bit 8136-8159
+// - special bytes for value encoding     4 bytes  bit 8160-8191
+//   - for encoding 1: last Δv            4 bytes  bit 8160-8191
+//   - for encoding 2: count of
+//     - last leading zeros (1 byte)      1 byte   bit 8160-8167
+//     - last significant bits (1 byte)   1 byte   bit 8168-8175
+//
+// FLAGS
+//
+// The two least significant bits of the flags byte define the value encoding
+// for the whole chunk, see below. The most significant byte of the flags byte
+// is set if the chunk is closed. No samples can be added anymore to a closed
+// chunk. Furthermore, the last value of a closed chunk is only saved in the
+// header (last time, last value), while in a chunk that is still open, the last
+// sample in the payload is the same sample as saved in the header.
+//
+// The remaining bits in the flags byte are currently unused.
+//
+// TIMESTAMP ENCODING
+//
+// The 1st timestamp is saved directly.
+//
+// The difference to the 2nd timestamp is saved as first Δt. 3 bytes is enough
+// for about 4.5h. Since we close a chunk after sitting idle for 1h, this
+// limitation has no practical consequences. Should, for whatever reason, a
+// larger delta be required, the chunk would be closed, i.e. the new sample is
+// added as the last sample to the chunk, and the next sample will be added to a
+// new chunk.
+//
+// From the 3rd timestamp on, a double-delta (ΔΔt) is saved:
+//   (t_{n} - t_{n-1}) - (t_{n-1} - t_{n-2})
+// To perform that operation, the last Δt is saved at the end of the chunk for
+// as long the chunk is not closed yet (see above).
+//
+// Most of the times, ΔΔt is zero, even with the ms-precision of
+// Prometheus. Therefore, we save a ΔΔt of zero as a leading '0' bit followed by
+// 7 bits counting the number of consecutive ΔΔt==0 (the count is offset by -1,
+// so the range of 0 to 127 represents 1 to 128 repetitions).
+//
+// If ΔΔt != 0, we essentially apply the Gorilla encoding scheme (cf. section
+// 4.1.1 in the paper) but with different bit buckets as Prometheus uses ms
+// rather than s, and the default scrape interval is 1m rather than 4m). In
+// particular:
+//
+// - If ΔΔt is between [-32,31], store '10' followed by a 6 bit value. This is
+//   for minor irregularities in the scrape interval.
+//
+// - If ΔΔt is between [-65536,65535], store '110' followed by a 17 bit
+//   value. This will typically happen if a scrape is missed completely.
+//
+// - If ΔΔt is between [-4194304,4194303], store '111' followed by a 23 bit
+//   value.  This spans more than 1h, which is usually enough as we close a
+//   chunk anyway if it doesn't receive any sample in 1h.
+//
+// - Should we nevertheless encounter a larger ΔΔt, we simply close the chunk,
+//   add the new sample as the last of the chunk, and add subsequent samples to
+//   a new chunk.
+//
+// VALUE ENCODING
+//
+// Value encoding can change and is determined by the two least significant bits
+// of the 'flags' byte at bit position 280. The encoding can be changed without
+// transcoding upon adding the 3rd sample. After that, an encoding change
+// results either in transcoding or in closing the chunk.
+//
+// The 1st sample value is always saved directly. The 2nd sample value is saved
+// in the header as the last value. Upon saving the 3rd value, an encoding is
+// chosen, and the chunk is prepared accordingly.
+//
+// The following value encodings exist (with their value in the flags byte):
+//
+// 0: "Zero encoding".
+//
+// In many time series, the value simply stays constant over a long time
+// (e.g. the "up" time series). In that case, all sample values are determined
+// by the 1st value, and no further value encoding is happening at all. The
+// payload consists entirely of timestamps.
+//
+// 1: Integer double-delta encoding.
+//
+// Many Prometheus metrics are integer counters and change in a quite regular
+// fashion, similar to timestamps. Thus, the same double-delta encoding can be
+// applied. This encoding works like the timestamp encoding described above, but
+// with different bit buckets and without counting of repeated ΔΔv=0. The case
+// of ΔΔv=0 is represented by a single '0' bit for each occurrence. The first Δv
+// is saved as an int32 at bit position 288. The most recent Δv is saved as an
+// int32 at the end of the chunk (see above). If Δv cannot be represented as a
+// 32 bit signed integer, no integer double-delta encoding can be applied.
+//
+// Bit buckets (lead-in bytes followed by (signed) value bits):
+// - '0': 0 bit
+// - '10': 6 bit
+// - '110': 13 bit
+// - '1110': 20 bit
+// - '1111': 33 bit
+// Since Δv is restricted to 32 bit, 33 bit are always enough for ΔΔv.
+//
+// 2: XOR encoding.
+//
+// This follows almost precisely the Gorilla value encoding (cf. section 4.1.2
+// of the paper). The last count of leading zeros and the last count of
+// meaningful bits in the XOR value is saved at the end of the chunk for as long
+// as the chunk is not closed yet (see above). Note, though, that the number of
+// significant bits is saved as (count-1), i.e. a saved value of 0 means 1
+// significant bit, a saved value of 1 means 2, and so on. Also, we save the
+// numbers of leading zeros and significant bits anew if they drop a
+// lot. Otherwise, you can easily be locked in with a high number of significant
+// bits.
+//
+// 3: Direct encoding.
+//
+// If the sample values are just random, it is most efficient to save sample
+// values directly as float64.
+//
+// ZIPPING TIMESTAMPS AND VALUES TOGETHER
+//
+// Usually, encoded timestamps and encoded values simply alternate. There are
+// two exceptions:
+//
+// (1) With the "zero encoding" for values, the payload only contains
+//     timestamps.
+//
+// (2) In a consecutive row of up to 128 ΔΔt=0 repeats, the count of timestamps
+//     determines how many sample values will follow directly after another.
+
+const (
+	varbitMinLength = 128
+	varbitMaxLength = 8191
+
+	// Useful byte offsets.
+	varbitFirstTimeOffset           = 0
+	varbitFirstValueOffset          = 8
+	varbitLastTimeOffset            = 16
+	varbitLastValueOffset           = 24
+	varbitFirstTimeDeltaOffset      = 32
+	varbitFlagOffset                = 35
+	varbitNextSampleBitOffsetOffset = 36
+	varbitFirstValueDeltaOffset     = 38
+	// The following are in the "footer" and only usable if the chunk is
+	// still open.
+	varbitCountOffsetBitOffset           = ChunkLen - 9
+	varbitLastTimeDeltaOffset            = ChunkLen - 7
+	varbitLastValueDeltaOffset           = ChunkLen - 4
+	varbitLastLeadingZerosCountOffset    = ChunkLen - 4
+	varbitLastSignificantBitsCountOffset = ChunkLen - 3
+
+	varbitFirstSampleBitOffset  uint16 = 0 // Symbolic, don't really read or write here.
+	varbitSecondSampleBitOffset uint16 = 1 // Symbolic, don't really read or write here.
+	// varbitThirdSampleBitOffset is a bit special. Depending on the encoding, there can
+	// be various things at this offset. It's most of the time symbolic, but in the best
+	// case (zero encoding for values), it will be the real offset for the 3rd sample.
+	varbitThirdSampleBitOffset uint16 = varbitFirstValueDeltaOffset * 8
+
+	// If the bit offset for the next sample is above this threshold, no new
+	// samples can be added to the chunk's payload (because the payload has
+	// already reached the footer). However, one more sample can be saved in
+	// the header as the last sample.
+	varbitNextSampleBitOffsetThreshold = 8 * varbitCountOffsetBitOffset
+
+	varbitMaxTimeDelta = 1 << 24 // What fits into a 3-byte timestamp.
+)
+
+type varbitValueEncoding byte
+
+const (
+	varbitZeroEncoding varbitValueEncoding = iota
+	varbitIntDoubleDeltaEncoding
+	varbitXOREncoding
+	varbitDirectEncoding
+)
+
+// varbitWorstCaseBitsPerSample provides the worst-case number of bits needed
+// per sample with the various value encodings. The counts already include the
+// up to 27 bits taken by a timestamp.
+var varbitWorstCaseBitsPerSample = map[varbitValueEncoding]int{
+	varbitZeroEncoding:           27 + 0,
+	varbitIntDoubleDeltaEncoding: 27 + 38,
+	varbitXOREncoding:            27 + 13 + 64,
+	varbitDirectEncoding:         27 + 64,
+}
+
+// varbitChunk implements the chunk interface.
+type varbitChunk []byte
+
+// newVarbitChunk returns a newly allocated varbitChunk.  For simplicity, all
+// varbit chunks must have the length as determined by the ChunkLen constant.
+func newVarbitChunk(enc varbitValueEncoding) *varbitChunk {
+	if ChunkLen < varbitMinLength || ChunkLen > varbitMaxLength {
+		panic(fmt.Errorf(
+			"invalid chunk length of %d bytes, need at least %d bytes and at most %d bytes",
+			ChunkLen, varbitMinLength, varbitMaxLength,
+		))
+	}
+	if enc > varbitDirectEncoding {
+		panic(fmt.Errorf("unknown varbit value encoding: %v", enc))
+	}
+	c := make(varbitChunk, ChunkLen)
+	c.setValueEncoding(enc)
+	return &c
+}
+
+// Add implements chunk.
+func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
+	offset := c.nextSampleOffset()
+	switch {
+	case c.closed():
+		return addToOverflowChunk(c, s)
+	case offset > varbitNextSampleBitOffsetThreshold:
+		return c.addLastSample(s), nil
+	case offset == varbitFirstSampleBitOffset:
+		return c.addFirstSample(s), nil
+	case offset == varbitSecondSampleBitOffset:
+		return c.addSecondSample(s)
+	}
+	return c.addLaterSample(s, offset)
+}
+
+// NewIterator implements chunk.
+func (c varbitChunk) NewIterator() Iterator {
+	return newVarbitChunkIterator(c)
+}
+
+func (c *varbitChunk) Slice(_, _ model.Time) Chunk {
+	return c
+}
+
+// Marshal implements chunk.
+func (c varbitChunk) Marshal(w io.Writer) error {
+	n, err := w.Write(c)
+	if err != nil {
+		return err
+	}
+	if n != cap(c) {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	}
+	return nil
+}
+
+// Unmarshal implements chunk.
+func (c varbitChunk) Unmarshal(r io.Reader) error {
+	_, err := io.ReadFull(r, c)
+	return err
+}
+
+// UnmarshalFromBuf implements chunk.
+func (c varbitChunk) UnmarshalFromBuf(buf []byte) error {
+	if copied := copy(c, buf); copied != cap(c) {
+		return fmt.Errorf("insufficient bytes copied from buffer during unmarshaling, want %d, got %d", cap(c), copied)
+	}
+	return nil
+}
+
+// Encoding implements chunk.
+func (c varbitChunk) Encoding() Encoding { return Varbit }
+
+// Utilization implements chunk.
+func (c varbitChunk) Utilization() float64 {
+	// 15 bytes is the length of the chunk footer.
+	return math.Min(float64(c.nextSampleOffset()/8+15)/float64(cap(c)), 1)
+}
+
+// Len implements chunk.  Runs in O(n).
+func (c varbitChunk) Len() int {
+	it := c.NewIterator()
+	i := 0
+	for ; it.Scan(); i++ {
+	}
+	return i
+}
+
+func (c varbitChunk) Size() int {
+	return len(c)
+}
+
+func (c varbitChunk) firstTime() model.Time {
+	return model.Time(
+		binary.BigEndian.Uint64(
+			c[varbitFirstTimeOffset:],
+		),
+	)
+}
+
+func (c varbitChunk) firstValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.BigEndian.Uint64(
+				c[varbitFirstValueOffset:],
+			),
+		),
+	)
+}
+
+func (c varbitChunk) lastTime() model.Time {
+	return model.Time(
+		binary.BigEndian.Uint64(
+			c[varbitLastTimeOffset:],
+		),
+	)
+}
+
+func (c varbitChunk) lastValue() model.SampleValue {
+	return model.SampleValue(
+		math.Float64frombits(
+			binary.BigEndian.Uint64(
+				c[varbitLastValueOffset:],
+			),
+		),
+	)
+}
+
+func (c varbitChunk) firstTimeDelta() model.Time {
+	// Only the first 3 bytes are actually the timestamp, so get rid of the
+	// last one by bitshifting.
+	return model.Time(c[varbitFirstTimeDeltaOffset+2]) |
+		model.Time(c[varbitFirstTimeDeltaOffset+1])<<8 |
+		model.Time(c[varbitFirstTimeDeltaOffset])<<16
+}
+
+// firstValueDelta returns an undefined result if the encoding type is not 1.
+func (c varbitChunk) firstValueDelta() int32 {
+	return int32(binary.BigEndian.Uint32(c[varbitFirstValueDeltaOffset:]))
+}
+
+// lastTimeDelta returns an undefined result if the chunk is closed already.
+func (c varbitChunk) lastTimeDelta() model.Time {
+	return model.Time(c[varbitLastTimeDeltaOffset+2]) |
+		model.Time(c[varbitLastTimeDeltaOffset+1])<<8 |
+		model.Time(c[varbitLastTimeDeltaOffset])<<16
+}
+
+// setLastTimeDelta must not be called if the chunk is closed already. It most
+// not be called with a time that doesn't fit into 24bit, either.
+func (c varbitChunk) setLastTimeDelta(dT model.Time) {
+	if dT > varbitMaxTimeDelta {
+		panic("Δt overflows 24 bit")
+	}
+	c[varbitLastTimeDeltaOffset] = byte(dT >> 16)
+	c[varbitLastTimeDeltaOffset+1] = byte(dT >> 8)
+	c[varbitLastTimeDeltaOffset+2] = byte(dT)
+}
+
+// lastValueDelta returns an undefined result if the chunk is closed already.
+func (c varbitChunk) lastValueDelta() int32 {
+	return int32(binary.BigEndian.Uint32(c[varbitLastValueDeltaOffset:]))
+}
+
+// setLastValueDelta must not be called if the chunk is closed already.
+func (c varbitChunk) setLastValueDelta(dV int32) {
+	binary.BigEndian.PutUint32(c[varbitLastValueDeltaOffset:], uint32(dV))
+}
+
+func (c varbitChunk) nextSampleOffset() uint16 {
+	return binary.BigEndian.Uint16(c[varbitNextSampleBitOffsetOffset:])
+}
+
+func (c varbitChunk) setNextSampleOffset(offset uint16) {
+	binary.BigEndian.PutUint16(c[varbitNextSampleBitOffsetOffset:], offset)
+}
+
+func (c varbitChunk) valueEncoding() varbitValueEncoding {
+	return varbitValueEncoding(c[varbitFlagOffset] & 0x03)
+}
+
+func (c varbitChunk) setValueEncoding(enc varbitValueEncoding) {
+	if enc > varbitDirectEncoding {
+		panic("invalid varbit value encoding")
+	}
+	c[varbitFlagOffset] &^= 0x03     // Clear.
+	c[varbitFlagOffset] |= byte(enc) // Set.
+}
+
+func (c varbitChunk) closed() bool {
+	return c[varbitFlagOffset] > 0x7F // Most significant bit set.
+}
+
+func (c varbitChunk) zeroDDTRepeats() (repeats uint64, offset uint16) {
+	offset = binary.BigEndian.Uint16(c[varbitCountOffsetBitOffset:])
+	if offset == 0 {
+		return 0, 0
+	}
+	return c.readBitPattern(offset, 7) + 1, offset
+}
+
+func (c varbitChunk) setZeroDDTRepeats(repeats uint64, offset uint16) {
+	switch repeats {
+	case 0:
+		// Just clear the offset.
+		binary.BigEndian.PutUint16(c[varbitCountOffsetBitOffset:], 0)
+		return
+	case 1:
+		// First time we set a repeat here, so set the offset. But only
+		// if we haven't reached the footer yet. (If that's the case, we
+		// would overwrite ourselves below, and we don't need the offset
+		// later anyway because no more samples will be added to this
+		// chunk.)
+		if offset+7 <= varbitNextSampleBitOffsetThreshold {
+			binary.BigEndian.PutUint16(c[varbitCountOffsetBitOffset:], offset)
+		}
+	default:
+		// For a change, we are writing somewhere where we have written
+		// before. We need to clear the bits first.
+		posIn1stByte := offset % 8
+		c[offset/8] &^= bitMask[7][posIn1stByte]
+		if posIn1stByte > 1 {
+			c[offset/8+1] &^= bitMask[posIn1stByte-1][0]
+		}
+	}
+	c.addBitPattern(offset, repeats-1, 7)
+}
+
+func (c varbitChunk) setLastSample(s model.SamplePair) {
+	binary.BigEndian.PutUint64(
+		c[varbitLastTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.BigEndian.PutUint64(
+		c[varbitLastValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+}
+
+// addFirstSample is a helper method only used by c.add(). It adds timestamp and
+// value as base time and value.
+func (c *varbitChunk) addFirstSample(s model.SamplePair) []Chunk {
+	binary.BigEndian.PutUint64(
+		(*c)[varbitFirstTimeOffset:],
+		uint64(s.Timestamp),
+	)
+	binary.BigEndian.PutUint64(
+		(*c)[varbitFirstValueOffset:],
+		math.Float64bits(float64(s.Value)),
+	)
+	c.setLastSample(s) // To simplify handling of single-sample chunks.
+	c.setNextSampleOffset(varbitSecondSampleBitOffset)
+	return []Chunk{c}
+}
+
+// addSecondSample is a helper method only used by c.add(). It calculates the
+// first time delta from the provided sample and adds it to the chunk together
+// with the provided sample as the last sample.
+func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
+	firstTimeDelta := s.Timestamp - c.firstTime()
+	if firstTimeDelta < 0 {
+		return nil, fmt.Errorf("first Δt is less than zero: %v", firstTimeDelta)
+	}
+	if firstTimeDelta > varbitMaxTimeDelta {
+		// A time delta too great. Still, we can add it as a last sample
+		// before overflowing.
+		return c.addLastSample(s), nil
+	}
+	(*c)[varbitFirstTimeDeltaOffset] = byte(firstTimeDelta >> 16)
+	(*c)[varbitFirstTimeDeltaOffset+1] = byte(firstTimeDelta >> 8)
+	(*c)[varbitFirstTimeDeltaOffset+2] = byte(firstTimeDelta)
+
+	// Also set firstTimeDelta as the last time delta to be able to use the
+	// normal methods for adding later samples.
+	c.setLastTimeDelta(firstTimeDelta)
+
+	c.setLastSample(s)
+	c.setNextSampleOffset(varbitThirdSampleBitOffset)
+	return []Chunk{c}, nil
+}
+
+// addLastSample is a helper method only used by c.add() and in other helper
+// methods called by c.add(). It simply sets the given sample as the last sample
+// in the heador and declares the chunk closed. In other words, addLastSample
+// adds the very last sample added to this chunk ever, while setLastSample sets
+// the sample most recently added to the chunk so that it can be used for the
+// calculations required to add the next sample.
+func (c *varbitChunk) addLastSample(s model.SamplePair) []Chunk {
+	c.setLastSample(s)
+	(*c)[varbitFlagOffset] |= 0x80
+	return []Chunk{c}
+}
+
+// addLaterSample is a helper method only used by c.add(). It adds a third or
+// later sample.
+func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk, error) {
+	var (
+		lastTime      = c.lastTime()
+		lastTimeDelta = c.lastTimeDelta()
+		newTimeDelta  = s.Timestamp - lastTime
+		lastValue     = c.lastValue()
+		encoding      = c.valueEncoding()
+	)
+
+	if newTimeDelta < 0 {
+		return nil, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
+	}
+	if offset == varbitThirdSampleBitOffset {
+		offset, encoding = c.prepForThirdSample(lastValue, s.Value, encoding)
+	}
+	if newTimeDelta > varbitMaxTimeDelta {
+		// A time delta too great. Still, we can add it as a last sample
+		// before overflowing.
+		return c.addLastSample(s), nil
+	}
+
+	// Analyze worst case, does it fit? If not, set new sample as the last.
+	if int(offset)+varbitWorstCaseBitsPerSample[encoding] > ChunkLen*8 {
+		return c.addLastSample(s), nil
+	}
+
+	// Transcoding/overflow decisions first.
+	if encoding == varbitZeroEncoding && s.Value != lastValue {
+		// Cannot go on with zero encoding.
+		if offset > ChunkLen*4 {
+			// Chunk already half full. Don't transcode, overflow instead.
+			return addToOverflowChunk(c, s)
+		}
+		if isInt32(s.Value - lastValue) {
+			// Trying int encoding looks promising.
+			return transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
+		}
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+	}
+	if encoding == varbitIntDoubleDeltaEncoding && !isInt32(s.Value-lastValue) {
+		// Cannot go on with int encoding.
+		if offset > ChunkLen*4 {
+			// Chunk already half full. Don't transcode, overflow instead.
+			return addToOverflowChunk(c, s)
+		}
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+	}
+
+	offset, overflow := c.addDDTime(offset, lastTimeDelta, newTimeDelta)
+	if overflow {
+		return c.addLastSample(s), nil
+	}
+	switch encoding {
+	case varbitZeroEncoding:
+		// Nothing to do.
+	case varbitIntDoubleDeltaEncoding:
+		offset = c.addDDValue(offset, lastValue, s.Value)
+	case varbitXOREncoding:
+		offset = c.addXORValue(offset, lastValue, s.Value)
+	case varbitDirectEncoding:
+		offset = c.addBitPattern(offset, math.Float64bits(float64(s.Value)), 64)
+	default:
+		return nil, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
+	}
+
+	c.setNextSampleOffset(offset)
+	c.setLastSample(s)
+	return []Chunk{c}, nil
+}
+
+func (c varbitChunk) prepForThirdSample(
+	lastValue, newValue model.SampleValue, encoding varbitValueEncoding,
+) (uint16, varbitValueEncoding) {
+	var (
+		offset                   = varbitThirdSampleBitOffset
+		firstValue               = c.firstValue()
+		firstValueDelta          = lastValue - firstValue
+		firstXOR                 = math.Float64bits(float64(firstValue)) ^ math.Float64bits(float64(lastValue))
+		_, firstSignificantBits  = countBits(firstXOR)
+		secondXOR                = math.Float64bits(float64(lastValue)) ^ math.Float64bits(float64(newValue))
+		_, secondSignificantBits = countBits(secondXOR)
+	)
+	// Now pick an initial encoding and prepare things accordingly.
+	// However, never pick an encoding "below" the one initially set.
+	switch {
+	case encoding == varbitZeroEncoding && lastValue == firstValue && lastValue == newValue:
+		// Stay at zero encoding.
+		// No value to be set.
+		// No offset change required.
+	case encoding <= varbitIntDoubleDeltaEncoding && isInt32(firstValueDelta):
+		encoding = varbitIntDoubleDeltaEncoding
+		binary.BigEndian.PutUint32(
+			c[varbitFirstValueDeltaOffset:],
+			uint32(int32(firstValueDelta)),
+		)
+		c.setLastValueDelta(int32(firstValueDelta))
+		offset += 32
+	case encoding == varbitDirectEncoding || firstSignificantBits+secondSignificantBits > 100:
+		// Heuristics based on three samples only is a bit weak,
+		// but if we need 50+13 = 63 bits per sample already
+		// now, we might be better off going for direct encoding.
+		encoding = varbitDirectEncoding
+		// Put bit pattern directly where otherwise the delta would have gone.
+		binary.BigEndian.PutUint64(
+			c[varbitFirstValueDeltaOffset:],
+			math.Float64bits(float64(lastValue)),
+		)
+		offset += 64
+	default:
+		encoding = varbitXOREncoding
+		offset = c.addXORValue(offset, firstValue, lastValue)
+	}
+	c.setValueEncoding(encoding)
+	c.setNextSampleOffset(offset)
+	return offset, encoding
+}
+
+// addDDTime requires that lastTimeDelta and newTimeDelta are positive and don't overflow 24bit.
+func (c varbitChunk) addDDTime(offset uint16, lastTimeDelta, newTimeDelta model.Time) (newOffset uint16, overflow bool) {
+	timeDD := newTimeDelta - lastTimeDelta
+
+	if !isSignedIntN(int64(timeDD), 23) {
+		return offset, true
+	}
+
+	c.setLastTimeDelta(newTimeDelta)
+	repeats, repeatsOffset := c.zeroDDTRepeats()
+
+	if timeDD == 0 {
+		if repeats == 0 || repeats == 128 {
+			// First zeroDDT, or counter full, prepare new counter.
+			offset = c.addZeroBit(offset)
+			repeatsOffset = offset
+			offset += 7
+			repeats = 0
+		}
+		c.setZeroDDTRepeats(repeats+1, repeatsOffset)
+		return offset, false
+	}
+
+	// No zero repeat. If we had any before, clear the DDT offset.
+	c.setZeroDDTRepeats(0, repeatsOffset)
+
+	switch {
+	case isSignedIntN(int64(timeDD), 6):
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		offset = c.addSignedInt(offset, int64(timeDD), 6)
+	case isSignedIntN(int64(timeDD), 17):
+		offset = c.addOneBitsWithTrailingZero(offset, 2)
+		offset = c.addSignedInt(offset, int64(timeDD), 17)
+	case isSignedIntN(int64(timeDD), 23):
+		offset = c.addOneBits(offset, 3)
+		offset = c.addSignedInt(offset, int64(timeDD), 23)
+	default:
+		panic("unexpected required bits for ΔΔt")
+	}
+	return offset, false
+}
+
+// addDDValue requires that newValue-lastValue can be represented with an int32.
+func (c varbitChunk) addDDValue(offset uint16, lastValue, newValue model.SampleValue) uint16 {
+	newValueDelta := int64(newValue - lastValue)
+	lastValueDelta := c.lastValueDelta()
+	valueDD := newValueDelta - int64(lastValueDelta)
+	c.setLastValueDelta(int32(newValueDelta))
+
+	switch {
+	case valueDD == 0:
+		return c.addZeroBit(offset)
+	case isSignedIntN(valueDD, 6):
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		return c.addSignedInt(offset, valueDD, 6)
+	case isSignedIntN(valueDD, 13):
+		offset = c.addOneBitsWithTrailingZero(offset, 2)
+		return c.addSignedInt(offset, valueDD, 13)
+	case isSignedIntN(valueDD, 20):
+		offset = c.addOneBitsWithTrailingZero(offset, 3)
+		return c.addSignedInt(offset, valueDD, 20)
+	case isSignedIntN(valueDD, 33):
+		offset = c.addOneBits(offset, 4)
+		return c.addSignedInt(offset, valueDD, 33)
+	default:
+		panic("unexpected required bits for ΔΔv")
+	}
+}
+
+func (c varbitChunk) addXORValue(offset uint16, lastValue, newValue model.SampleValue) uint16 {
+	lastPattern := math.Float64bits(float64(lastValue))
+	newPattern := math.Float64bits(float64(newValue))
+	xor := lastPattern ^ newPattern
+	if xor == 0 {
+		return c.addZeroBit(offset)
+	}
+
+	lastLeadingBits := c[varbitLastLeadingZerosCountOffset]
+	lastSignificantBits := c[varbitLastSignificantBitsCountOffset]
+	newLeadingBits, newSignificantBits := countBits(xor)
+
+	// Short entry if the new significant bits fit into the same box as the
+	// last significant bits.  However, should the new significant bits be
+	// shorter by 10 or more, go for a long entry instead, as we will
+	// probably save more (11 bit one-time overhead, potentially more to
+	// save later).
+	if newLeadingBits >= lastLeadingBits &&
+		newLeadingBits+newSignificantBits <= lastLeadingBits+lastSignificantBits &&
+		lastSignificantBits-newSignificantBits < 10 {
+		offset = c.addOneBitsWithTrailingZero(offset, 1)
+		return c.addBitPattern(
+			offset,
+			xor>>(64-lastLeadingBits-lastSignificantBits),
+			uint16(lastSignificantBits),
+		)
+	}
+
+	// Long entry.
+	c[varbitLastLeadingZerosCountOffset] = newLeadingBits
+	c[varbitLastSignificantBitsCountOffset] = newSignificantBits
+	offset = c.addOneBits(offset, 2)
+	offset = c.addBitPattern(offset, uint64(newLeadingBits), 5)
+	offset = c.addBitPattern(offset, uint64(newSignificantBits-1), 6) // Note -1!
+	return c.addBitPattern(
+		offset,
+		xor>>(64-newLeadingBits-newSignificantBits),
+		uint16(newSignificantBits),
+	)
+}
+
+func (c varbitChunk) addZeroBit(offset uint16) uint16 {
+	if offset < varbitNextSampleBitOffsetThreshold {
+		// Writing a zero to a never touched area is a no-op.
+		// Just increase the offset.
+		return offset + 1
+	}
+	newByte := c[offset/8] &^ bitMask[1][offset%8]
+	c[offset/8] = newByte
+	// TODO(beorn7): The two lines above could be written as
+	//     c[offset/8] &^= bitMask[1][offset%8]
+	// However, that tickles a compiler bug with GOARCH=386.
+	// See https://github.com/prometheus/prometheus/issues/1509
+	return offset + 1
+}
+
+func (c varbitChunk) addOneBits(offset uint16, n uint16) uint16 {
+	if n > 7 {
+		panic("unexpected number of control bits")
+	}
+	b := 8 - offset%8
+	if b > n {
+		b = n
+	}
+	c[offset/8] |= bitMask[b][offset%8]
+	offset += b
+	b = n - b
+	if b > 0 {
+		c[offset/8] |= bitMask[b][0]
+		offset += b
+	}
+	return offset
+}
+func (c varbitChunk) addOneBitsWithTrailingZero(offset uint16, n uint16) uint16 {
+	offset = c.addOneBits(offset, n)
+	return c.addZeroBit(offset)
+}
+
+// addSignedInt adds i as a signed integer with n bits. It requires i to be
+// representable as such. (Check with isSignedIntN first.)
+func (c varbitChunk) addSignedInt(offset uint16, i int64, n uint16) uint16 {
+	if i < 0 && n < 64 {
+		i += 1 << n
+	}
+	return c.addBitPattern(offset, uint64(i), n)
+}
+
+// addBitPattern adds the last n bits of the given pattern. Other bits in the
+// pattern must be 0.
+func (c varbitChunk) addBitPattern(offset uint16, pattern uint64, n uint16) uint16 {
+	var (
+		byteOffset  = offset / 8
+		bitsToWrite = 8 - offset%8
+		newOffset   = offset + n
+	)
+
+	// Clean up the parts of the footer we will write into. (But not more as
+	// we are still using the value related part of the footer when we have
+	// already overwritten timestamp related parts.)
+	if newOffset > varbitNextSampleBitOffsetThreshold {
+		pos := offset
+		if pos < varbitNextSampleBitOffsetThreshold {
+			pos = varbitNextSampleBitOffsetThreshold
+		}
+		for pos < newOffset {
+			posInByte := pos % 8
+			bitsToClear := newOffset - pos
+			if bitsToClear > 8-posInByte {
+				bitsToClear = 8 - posInByte
+			}
+			c[pos/8] &^= bitMask[bitsToClear][posInByte]
+			pos += bitsToClear
+		}
+	}
+
+	for n > 0 {
+		if n <= bitsToWrite {
+			c[byteOffset] |= byte(pattern << (bitsToWrite - n))
+			break
+		}
+		c[byteOffset] |= byte(pattern >> (n - bitsToWrite))
+		n -= bitsToWrite
+		bitsToWrite = 8
+		byteOffset++
+	}
+	return newOffset
+}
+
+// readBitPattern reads n bits at the given offset and returns them as the last
+// n bits in a uint64.
+func (c varbitChunk) readBitPattern(offset, n uint16) uint64 {
+	var (
+		result                   uint64
+		byteOffset               = offset / 8
+		bitOffset                = offset % 8
+		trailingBits, bitsToRead uint16
+	)
+
+	for n > 0 {
+		trailingBits = 0
+		bitsToRead = 8 - bitOffset
+		if bitsToRead > n {
+			trailingBits = bitsToRead - n
+			bitsToRead = n
+		}
+		result <<= bitsToRead
+		result |= uint64(
+			(c[byteOffset] & bitMask[bitsToRead][bitOffset]) >> trailingBits,
+		)
+		n -= bitsToRead
+		byteOffset++
+		bitOffset = 0
+	}
+	return result
+}
+
+type varbitChunkIterator struct {
+	c varbitChunk
+	// pos is the bit position within the chunk for the next sample to be
+	// decoded when scan() is called (i.e. it is _not_ the bit position of
+	// the sample currently returned by value()). The symbolic values
+	// varbitFirstSampleBitOffset and varbitSecondSampleBitOffset are also
+	// used for pos. len is the offset of the first bit in the chunk that is
+	// not part of the payload. If pos==len, then the iterator is positioned
+	// behind the last sample in the payload. However, the next call of
+	// scan() still has to check if the chunk is closed, in which case there
+	// is one more sample, saved in the header. To mark the iterator as
+	// having scanned that last sample, too, pos is set to len+1.
+	pos, len             uint16
+	t, dT                model.Time
+	repeats              byte // Repeats of ΔΔt=0.
+	v                    model.SampleValue
+	dV                   int64 // Only used for int value encoding.
+	leading, significant uint16
+	enc                  varbitValueEncoding
+	lastError            error
+	rewound              bool
+	nextT                model.Time        // Only for rewound state.
+	nextV                model.SampleValue // Only for rewound state.
+}
+
+func newVarbitChunkIterator(c varbitChunk) *varbitChunkIterator {
+	return &varbitChunkIterator{
+		c:           c,
+		len:         c.nextSampleOffset(),
+		t:           model.Earliest,
+		enc:         c.valueEncoding(),
+		significant: 1,
+	}
+}
+
+// scan implements Iterator.
+func (it *varbitChunkIterator) Scan() bool {
+	if it.lastError != nil {
+		return false
+	}
+	if it.rewound {
+		it.t = it.nextT
+		it.v = it.nextV
+		it.rewound = false
+		return true
+	}
+	if it.pos > it.len {
+		return false
+	}
+	if it.pos == it.len && it.repeats == 0 {
+		it.pos = it.len + 1
+		if !it.c.closed() {
+			return false
+		}
+		it.t = it.c.lastTime()
+		it.v = it.c.lastValue()
+		return it.lastError == nil
+	}
+	if it.pos == varbitFirstSampleBitOffset {
+		it.t = it.c.firstTime()
+		it.v = it.c.firstValue()
+		it.pos = varbitSecondSampleBitOffset
+		return it.lastError == nil
+	}
+	if it.pos == varbitSecondSampleBitOffset {
+		if it.len == varbitThirdSampleBitOffset && !it.c.closed() {
+			// Special case: Chunk has only two samples.
+			it.t = it.c.lastTime()
+			it.v = it.c.lastValue()
+			it.pos = it.len + 1
+			return it.lastError == nil
+		}
+		it.dT = it.c.firstTimeDelta()
+		it.t += it.dT
+		// Value depends on encoding.
+		switch it.enc {
+		case varbitZeroEncoding:
+			it.pos = varbitThirdSampleBitOffset
+		case varbitIntDoubleDeltaEncoding:
+			it.dV = int64(it.c.firstValueDelta())
+			it.v += model.SampleValue(it.dV)
+			it.pos = varbitThirdSampleBitOffset + 32
+		case varbitXOREncoding:
+			it.pos = varbitThirdSampleBitOffset
+			it.readXOR()
+		case varbitDirectEncoding:
+			it.v = model.SampleValue(math.Float64frombits(
+				binary.BigEndian.Uint64(it.c[varbitThirdSampleBitOffset/8:]),
+			))
+			it.pos = varbitThirdSampleBitOffset + 64
+		default:
+			it.lastError = fmt.Errorf("unknown varbit value encoding: %v", it.enc)
+		}
+		return it.lastError == nil
+	}
+	// 3rd sample or later does not have special cases anymore.
+	it.readDDT()
+	switch it.enc {
+	case varbitZeroEncoding:
+		// Do nothing.
+	case varbitIntDoubleDeltaEncoding:
+		it.readDDV()
+	case varbitXOREncoding:
+		it.readXOR()
+	case varbitDirectEncoding:
+		it.v = model.SampleValue(math.Float64frombits(it.readBitPattern(64)))
+		return it.lastError == nil
+	default:
+		it.lastError = fmt.Errorf("unknown varbit value encoding: %v", it.enc)
+		return false
+	}
+	return it.lastError == nil
+}
+
+// findAtOrAfter implements Iterator.
+func (it *varbitChunkIterator) FindAtOrAfter(t model.Time) bool {
+	if it.len == 0 || t.After(it.c.lastTime()) {
+		return false
+	}
+	first := it.c.firstTime()
+	if !t.After(first) {
+		it.reset()
+		return it.Scan()
+	}
+	if t == it.t {
+		return it.lastError == nil
+	}
+	if t.Before(it.t) {
+		it.reset()
+	}
+	for it.Scan() && t.After(it.t) {
+		// TODO(beorn7): If we are in a repeat, we could iterate forward
+		// much faster.
+	}
+	return it.lastError == nil
+}
+
+// value implements Iterator.
+func (it *varbitChunkIterator) Value() model.SamplePair {
+	return model.SamplePair{
+		Timestamp: it.t,
+		Value:     it.v,
+	}
+}
+
+func (it *varbitChunkIterator) Batch(size int) Batch {
+	var batch Batch
+	j := 0
+	for j < size {
+		batch.Timestamps[j] = int64(it.t)
+		batch.Values[j] = float64(it.v)
+		j++
+		if j < size && !it.Scan() {
+			break
+		}
+	}
+	batch.Index = 0
+	batch.Length = j
+	return batch
+}
+
+// err implements Iterator.
+func (it *varbitChunkIterator) Err() error {
+	return it.lastError
+}
+
+func (it *varbitChunkIterator) readDDT() {
+	if it.repeats > 0 {
+		it.repeats--
+	} else {
+		switch it.readControlBits(3) {
+		case 0:
+			it.repeats = byte(it.readBitPattern(7))
+		case 1:
+			it.dT += model.Time(it.readSignedInt(6))
+		case 2:
+			it.dT += model.Time(it.readSignedInt(17))
+		case 3:
+			it.dT += model.Time(it.readSignedInt(23))
+		default:
+			panic("unexpected number of control bits")
+		}
+	}
+	it.t += it.dT
+}
+
+func (it *varbitChunkIterator) readDDV() {
+	switch it.readControlBits(4) {
+	case 0:
+		// Do nothing.
+	case 1:
+		it.dV += it.readSignedInt(6)
+	case 2:
+		it.dV += it.readSignedInt(13)
+	case 3:
+		it.dV += it.readSignedInt(20)
+	case 4:
+		it.dV += it.readSignedInt(33)
+	default:
+		panic("unexpected number of control bits")
+	}
+	it.v += model.SampleValue(it.dV)
+}
+
+func (it *varbitChunkIterator) readXOR() {
+	switch it.readControlBits(2) {
+	case 0:
+		return
+	case 1:
+		// Do nothing right now. All done below.
+	case 2:
+		it.leading = uint16(it.readBitPattern(5))
+		it.significant = uint16(it.readBitPattern(6)) + 1
+	default:
+		panic("unexpected number of control bits")
+	}
+	pattern := math.Float64bits(float64(it.v))
+	pattern ^= it.readBitPattern(it.significant) << (64 - it.significant - it.leading)
+	it.v = model.SampleValue(math.Float64frombits(pattern))
+}
+
+// readControlBits reads successive 1-bits and stops after reading the first
+// 0-bit. It also stops once it has read max bits. It returns the number of read
+// 1-bits.
+func (it *varbitChunkIterator) readControlBits(max uint16) uint16 {
+	var count uint16
+	for count < max && int(it.pos/8) < len(it.c) {
+		b := it.c[it.pos/8] & bitMask[1][it.pos%8]
+		it.pos++
+		if b == 0 {
+			return count
+		}
+		count++
+	}
+	if int(it.pos/8) >= len(it.c) {
+		it.lastError = errChunkBoundsExceeded
+	}
+	return count
+}
+
+func (it *varbitChunkIterator) readBitPattern(n uint16) uint64 {
+	if len(it.c)*8 < int(it.pos)+int(n) {
+		it.lastError = errChunkBoundsExceeded
+		return 0
+	}
+	u := it.c.readBitPattern(it.pos, n)
+	it.pos += n
+	return u
+}
+
+func (it *varbitChunkIterator) readSignedInt(n uint16) int64 {
+	u := it.readBitPattern(n)
+	if n < 64 && u >= 1<<(n-1) {
+		u -= 1 << n
+	}
+	return int64(u)
+}
+
+// reset puts the chunk iterator into the state it had upon creation.
+func (it *varbitChunkIterator) reset() {
+	it.pos = 0
+	it.t = model.Earliest
+	it.dT = 0
+	it.repeats = 0
+	it.v = 0
+	it.dV = 0
+	it.leading = 0
+	it.significant = 1
+	it.rewound = false
+}
+
+// rewind "rewinds" the chunk iterator by one step. Since one cannot simply
+// rewind a Varbit chunk, the old values have to be provided by the
+// caller. Rewinding an already rewound chunk panics. After a call of scan or
+// reset, a chunk can be rewound again.
+func (it *varbitChunkIterator) rewind(t model.Time, v model.SampleValue) {
+	if it.rewound {
+		panic("cannot rewind varbit chunk twice")
+	}
+	it.rewound = true
+	it.nextT = it.t
+	it.nextV = it.v
+	it.t = t
+	it.v = v
+}
diff --git a/encoding/varbit_helpers.go b/encoding/varbit_helpers.go
new file mode 100644
index 0000000000000..9ca639e9421d9
--- /dev/null
+++ b/encoding/varbit_helpers.go
@@ -0,0 +1,78 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import "github.com/prometheus/common/model"
+
+var (
+	// bit masks for consecutive bits in a byte at various offsets.
+	bitMask = [][]byte{
+		{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, // 0 bit
+		{0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}, // 1 bit
+		{0xC0, 0x60, 0x30, 0x18, 0x0C, 0x06, 0x03, 0x01}, // 2 bit
+		{0xE0, 0x70, 0x38, 0x1C, 0x0E, 0x07, 0x03, 0x01}, // 3 bit
+		{0xF0, 0x78, 0x3C, 0x1E, 0x0F, 0x07, 0x03, 0x01}, // 4 bit
+		{0xF8, 0x7C, 0x3E, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 5 bit
+		{0xFC, 0x7E, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 6 bit
+		{0xFE, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 7 bit
+		{0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01}, // 8 bit
+	}
+)
+
+// isInt32 returns true if v can be represented as an int32.
+func isInt32(v model.SampleValue) bool {
+	return model.SampleValue(int32(v)) == v
+}
+
+// countBits returs the number of leading zero bits and the number of
+// significant bits after that in the given bit pattern. The maximum number of
+// leading zeros is 31 (so that it can be represented by a 5bit number). Leading
+// zeros beyond that are considered part of the significant bits.
+func countBits(pattern uint64) (leading, significant byte) {
+	// TODO(beorn7): This would probably be faster with ugly endless switch
+	// statements.
+	if pattern == 0 {
+		return
+	}
+	for pattern < 1<<63 {
+		leading++
+		pattern <<= 1
+	}
+	for pattern > 0 {
+		significant++
+		pattern <<= 1
+	}
+	if leading > 31 { // 5 bit limit.
+		significant += leading - 31
+		leading = 31
+	}
+	return
+}
+
+// isSignedIntN returns if n can be represented as a signed int with the given
+// bit length.
+func isSignedIntN(i int64, n byte) bool {
+	upper := int64(1) << (n - 1)
+	if i >= upper {
+		return false
+	}
+	lower := upper - (1 << n)
+	if i < lower {
+		return false
+	}
+	return true
+}
diff --git a/encoding/varbit_test.go b/encoding/varbit_test.go
new file mode 100644
index 0000000000000..d5f0cd7eeaf88
--- /dev/null
+++ b/encoding/varbit_test.go
@@ -0,0 +1,55 @@
+// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
+// The original license header is included below:
+//
+// Copyright 2016 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package encoding
+
+import "testing"
+
+func TestCountBits(t *testing.T) {
+	for i := byte(0); i < 56; i++ {
+		for j := byte(0); j <= 8; j++ {
+			for k := byte(0); k < 8; k++ {
+				p := uint64(bitMask[j][k]) << i
+				gotLeading, gotSignificant := countBits(p)
+				wantLeading := 56 - i + k
+				wantSignificant := j
+				if j+k > 8 {
+					wantSignificant -= j + k - 8
+				}
+				if wantLeading > 31 {
+					wantSignificant += wantLeading - 31
+					wantLeading = 31
+				}
+				if p == 0 {
+					wantLeading = 0
+					wantSignificant = 0
+				}
+				if wantLeading != gotLeading {
+					t.Errorf(
+						"unexpected leading bit count for i=%d, j=%d, k=%d; want %d, got %d",
+						i, j, k, wantLeading, gotLeading,
+					)
+				}
+				if wantSignificant != gotSignificant {
+					t.Errorf(
+						"unexpected significant bit count for i=%d, j=%d, k=%d; want %d, got %d",
+						i, j, k, wantSignificant, gotSignificant,
+					)
+				}
+			}
+		}
+	}
+}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 8d0f0e102b343..ae1da7a3aad20 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -5,7 +5,7 @@ import (
 	"strconv"
 	"time"
 
-	promchunk "github.com/cortexproject/cortex/pkg/prom1/storage/local/chunk"
+	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 

From ca6ef30d37aa7e00fbfb0b7240d2112efafaa7ae Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 9 Nov 2018 12:05:57 +0000
Subject: [PATCH 209/660] gofmt 1.10

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache_test.go    |  2 +-
 chunk_store_test.go    | 32 ++++++++++++++++----------------
 chunk_test.go          | 12 ++++++------
 testutils/testutils.go |  4 ++--
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index d4753dbe902ee..b87fb83921b7a 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -37,7 +37,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			promChunk[0],
 			ts,
diff --git a/chunk_store_test.go b/chunk_store_test.go
index a2de276d6fc98..d123b39be2342 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -135,25 +135,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -315,14 +315,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	})
 
 	for _, tc := range []struct {
@@ -424,7 +424,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar":                 "baz",
+						"bar": "baz",
 					},
 					chunks[0],
 					ts,
@@ -488,7 +488,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			chunks[0],
 			ts,
@@ -534,7 +534,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/chunk_test.go b/chunk_test.go
index 4db4aec1b1942..a396a66507491 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -19,8 +19,8 @@ const userID = "userID"
 func dummyChunk(now model.Time) Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	})
 }
 
@@ -150,8 +150,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	}
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, metric)
@@ -169,8 +169,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create another chunk with a different metric
 	otherMetric := model.Metric{
 		model.MetricNameLabel: "foo2",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	}
 	chunk3 := dummyChunkFor(now, otherMetric)
 	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
diff --git a/testutils/testutils.go b/testutils/testutils.go
index ae1da7a3aad20..bcbe7b0708448 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	})
 }
 

From f41466cbd93e28b4197bc9555e9c6aac760c7748 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 9 Nov 2018 17:46:36 +0000
Subject: [PATCH 210/660] Go back to 120 samples per subchunk, due to increase
 memory usage.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/bigchunk.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 3ae3b9ddc9e76..833d307f690db 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -11,7 +11,7 @@ import (
 	"github.com/prometheus/tsdb/chunkenc"
 )
 
-const samplesPerChunk = 60
+const samplesPerChunk = 120
 
 var errOutOfBounds = errors.New("out of bounds")
 

From 5d29d9d96a83d75f5839ec087711cf8f10d2bbb1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 9 Nov 2018 17:03:47 +0000
Subject: [PATCH 211/660] Update grpc-opentracing to latest version at new home

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 gcp/instrumentation.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 53701fc912b94..38853cd0b1647 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -1,8 +1,8 @@
 package gcp
 
 import (
-	"github.com/grpc-ecosystem/grpc-opentracing/go/otgrpc"
 	"github.com/mwitkow/go-grpc-middleware"
+	otgrpc "github.com/opentracing-contrib/go-grpc"
 	"github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"

From dd31258081e8f1d437400f6e3d87987e934214fb Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 9 Nov 2018 17:26:45 +0000
Subject: [PATCH 212/660] go-grpc-middleware has moved

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 gcp/instrumentation.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 38853cd0b1647..62b7b9e050ce9 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -1,7 +1,7 @@
 package gcp
 
 import (
-	"github.com/mwitkow/go-grpc-middleware"
+	"github.com/grpc-ecosystem/go-grpc-middleware"
 	otgrpc "github.com/opentracing-contrib/go-grpc"
 	"github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"

From 28f29557f3afbbd1837820d855b6fbe6692009c7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 10 Nov 2018 20:19:42 +0000
Subject: [PATCH 213/660] Only override store type for aws.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 schema_config.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index fd49b5f330256..e0e175d7bae20 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -131,7 +131,9 @@ func (cfg *SchemaConfig) translate() error {
 	})
 	if cfg.legacy.ChunkTablesFrom.IsSet() {
 		cfg.ForEachAfter(cfg.legacy.ChunkTablesFrom.Time, func(config *PeriodConfig) {
-			config.Store = "aws-dynamo"
+			if config.Store == "aws" {
+				config.Store = "aws-dynamo"
+			}
 			config.ChunkTables = cfg.legacy.ChunkTables
 		})
 	}

From 33e2e399aeac8d9cbaae56d4f99d48a6130dee6a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 11 Nov 2018 12:10:14 +0000
Subject: [PATCH 214/660] Don't wrap the storage in a cache twice.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/factory.go | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index 582fe62a7ac0d..55557fa1c8de5 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -90,10 +90,6 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		}
 		storage = newCachingStorageClient(storage, tieredCache, cfg.IndexCacheValidity)
 
-		if tieredCache != nil {
-			storage = newCachingStorageClient(storage, tieredCache, cfg.IndexCacheValidity)
-		}
-
 		err = stores.AddPeriod(storeCfg, s, storage, limits)
 		if err != nil {
 			return nil, err

From 140da9103683407a008453dcce1d011bc506974a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 19 Nov 2018 13:03:28 +0000
Subject: [PATCH 215/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk.go             | 2 +-
 encoding/bigchunk.go | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/chunk.go b/chunk.go
index e2e604a38ed21..78de06218beb5 100644
--- a/chunk.go
+++ b/chunk.go
@@ -319,7 +319,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 
 	c.encoded = input
 	remainingData := input[len(input)-r.Len():]
-	if int(dataLen) > len(remainingData) {
+	if int(dataLen) != len(remainingData) {
 		return ErrDataLength
 	}
 
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 833d307f690db..50e8a318bec89 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -45,7 +45,8 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 
 // addNextChunk adds a new XOR "subchunk" to the internal list of chunks.
 func (b *bigchunk) addNextChunk(start model.Time) error {
-	// To save memory, we "compact" the last chunk.
+	// To save memory, we "compact" the previous chunk - the array backing the slice
+	// will be upto 2x too big, and we can save this space.
 	if l := len(b.chunks); l > 0 {
 		c := b.chunks[l-1]
 		buf := make([]byte, len(c.Bytes()))

From b095fdcfd396415661f4588fca8c13901452e144 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 16 Nov 2018 18:16:44 +0000
Subject: [PATCH 216/660] Add test to trigger panic.

Also, plumb through Stop on StorageClient, so the cachingStorageClient can stop its cache.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/dynamodb_table_client.go      |  3 +++
 aws/storage_client.go             |  3 +++
 aws/storage_client_s3.go          |  3 +++
 cassandra/storage_client.go       |  2 +-
 chunk_store.go                    |  1 +
 gcp/storage_client.go             |  4 +++
 inmemory_storage_client.go        |  4 +++
 storage/caching_storage_client.go |  4 +++
 storage/factory_test.go           | 41 +++++++++++++++++++++++++++++++
 storage_client.go                 |  2 ++
 10 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 storage/factory_test.go

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index ea8ea73fa4450..549ab5c6c3dd5 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -69,6 +69,9 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 	}, nil
 }
 
+func (d *dynamoTableClient) Stop() {
+}
+
 func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
 	return d.callManager.backoffAndRetry(ctx, fn)
 }
diff --git a/aws/storage_client.go b/aws/storage_client.go
index 46ed06e6ec2b1..75320c2a67356 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -165,6 +165,9 @@ func NewStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.S
 	return client, nil
 }
 
+func (a storageClient) Stop() {
+}
+
 func (a storageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
diff --git a/aws/storage_client_s3.go b/aws/storage_client_s3.go
index 8b5c65c9e141d..8361bc212f766 100644
--- a/aws/storage_client_s3.go
+++ b/aws/storage_client_s3.go
@@ -73,6 +73,9 @@ func NewS3StorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.
 	return client, nil
 }
 
+func (a s3storageClient) Stop() {
+}
+
 func (a s3storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.S3")
 	defer sp.Finish()
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 42a862c592894..060b260b584a3 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -138,7 +138,7 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageCl
 	}, nil
 }
 
-func (s *storageClient) Close() {
+func (s *storageClient) Stop() {
 	s.session.Close()
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index 286d26d73513f..3290bfcf41dd3 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -102,6 +102,7 @@ func newStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *val
 
 // Stop any background goroutines (ie in the cache.)
 func (c *store) Stop() {
+	c.storage.Stop()
 	c.Fetcher.Stop()
 }
 
diff --git a/gcp/storage_client.go b/gcp/storage_client.go
index 288c1329311bf..1d1375101010d 100644
--- a/gcp/storage_client.go
+++ b/gcp/storage_client.go
@@ -98,6 +98,10 @@ func newStorageClientColumnKey(cfg Config, client *bigtable.Client, schemaCfg ch
 	}
 }
 
+func (s *storageClientColumnKey) Stop() {
+	s.client.Close()
+}
+
 func (s *storageClientColumnKey) NewWriteBatch() chunk.WriteBatch {
 	return bigtableWriteBatch{
 		tables: map[string]map[string]*bigtable.Mutation{},
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 1a387992a4d81..000c6c072fa48 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -38,6 +38,10 @@ func NewMockStorage() *MockStorage {
 	}
 }
 
+// Stop doesn't do anything.
+func (*MockStorage) Stop() {
+}
+
 // ListTables implements StorageClient.
 func (m *MockStorage) ListTables(_ context.Context) ([]string, error) {
 	m.mtx.RLock()
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 1240d12762ebf..cc09945b81ff0 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -56,6 +56,10 @@ func newCachingStorageClient(client chunk.StorageClient, c cache.Cache, validity
 	}
 }
 
+func (s *cachingStorageClient) Stop() {
+	s.cache.Stop()
+}
+
 func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
diff --git a/storage/factory_test.go b/storage/factory_test.go
new file mode 100644
index 0000000000000..b09d90ce9cf6d
--- /dev/null
+++ b/storage/factory_test.go
@@ -0,0 +1,41 @@
+package storage
+
+import (
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/validation"
+)
+
+func TestFactoryStop(t *testing.T) {
+	var (
+		cfg          Config
+		storeConfig  chunk.StoreConfig
+		schemaConfig chunk.SchemaConfig
+		defaults     validation.Limits
+	)
+	util.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
+	schemaConfig.Configs = []chunk.PeriodConfig{
+		{
+			From:  model.Time(0),
+			Store: "inmemory",
+		},
+		{
+			From:  model.Time(1),
+			Store: "inmemory",
+		},
+	}
+	cfg.memcacheClient.Host = "localhost" // Fake address that should at least resolve.
+
+	limits, err := validation.NewOverrides(defaults)
+	require.NoError(t, err)
+
+	store, err := NewStore(cfg, storeConfig, schemaConfig, limits)
+	require.NoError(t, err)
+
+	store.Stop()
+}
diff --git a/storage_client.go b/storage_client.go
index ecf83f7d62262..167924851e134 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -4,6 +4,8 @@ import "context"
 
 // StorageClient is a client for the persistent storage for Cortex. (e.g. DynamoDB + S3).
 type StorageClient interface {
+	Stop()
+
 	// For the write path.
 	NewWriteBatch() WriteBatch
 	BatchWrite(context.Context, WriteBatch) error

From ff29d37dafeade7f38d906ad98f9b749590da8e6 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 11 Nov 2018 11:46:14 +0000
Subject: [PATCH 217/660] When we share a cache amongst multiple stores, ensure
 it is only stopped once.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/stop_once.go | 23 +++++++++++++++++++++++
 storage/factory.go |  6 +++++-
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 cache/stop_once.go

diff --git a/cache/stop_once.go b/cache/stop_once.go
new file mode 100644
index 0000000000000..1382691abcc8d
--- /dev/null
+++ b/cache/stop_once.go
@@ -0,0 +1,23 @@
+package cache
+
+import "sync"
+
+type stopOnce struct {
+	once sync.Once
+	Cache
+}
+
+// StopOnce wraps a Cache and ensures its only stopped once.
+func StopOnce(cache Cache) Cache {
+	return &stopOnce{
+		Cache: cache,
+	}
+}
+
+func (s *stopOnce) Stop() error {
+	var err error
+	s.once.Do(func() {
+		err = s.Cache.Stop()
+	})
+	return err
+}
diff --git a/storage/factory.go b/storage/factory.go
index 55557fa1c8de5..499bdc9caa0d8 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -47,7 +47,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 
 // NewStore makes the storage clients based on the configuration.
 func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits *validation.Overrides) (chunk.Store, error) {
-	var tieredCache cache.Cache
 	var err error
 
 	// Building up from deprecated flags.
@@ -67,6 +66,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		}, memcache))
 	}
 
+	var tieredCache cache.Cache
 	if len(caches) > 0 {
 		tieredCache = cache.NewTiered(caches)
 		cfg.indexQueriesCacheConfig.DefaultValidity = cfg.IndexCacheValidity
@@ -77,6 +77,10 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		}
 	}
 
+	// Cache is shared by multiple stores, which means they will try and Stop
+	// it more than once.  Wrap in a StopOnce to prevent this.
+	tieredCache = cache.StopOnce(tieredCache)
+
 	err = schemaCfg.Load()
 	if err != nil {
 		return nil, errors.Wrap(err, "error loading schema config")

From b21810c04231ad3c7600086da336a1725e971bc7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Sat, 29 Sep 2018 11:13:08 +0000
Subject: [PATCH 218/660] Write only as many chunk bytes as needed

- Only effective for varbit chunks at present.
- Also allow undersized varbit chunks to be unmarshalled.
- Use varbit encoding in chunk tests, since that's what we use most commonly in production
- Remove chunk.Unmarshal(io.Reader), its only used in tests.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_test.go           |  4 ++++
 encoding/bigchunk.go    |  9 ---------
 encoding/chunk.go       |  1 -
 encoding/chunk_test.go  |  5 +++--
 encoding/delta.go       |  9 ---------
 encoding/delta_test.go  |  6 ------
 encoding/doubledelta.go |  9 ---------
 encoding/varbit.go      | 30 +++++++++++++++++++-----------
 8 files changed, 26 insertions(+), 47 deletions(-)

diff --git a/chunk_test.go b/chunk_test.go
index a396a66507491..5324719a2410f 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -16,6 +16,10 @@ import (
 
 const userID = "userID"
 
+func init() {
+	encoding.DefaultEncoding = encoding.Varbit
+}
+
 func dummyChunk(now model.Time) Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 50e8a318bec89..5be2372ad64d6 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -5,7 +5,6 @@ import (
 	"encoding/binary"
 	"errors"
 	"io"
-	"io/ioutil"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/tsdb/chunkenc"
@@ -95,14 +94,6 @@ func (b *bigchunk) MarshalToBuf(buf []byte) error {
 	return b.Marshal(writer)
 }
 
-func (b *bigchunk) Unmarshal(r io.Reader) error {
-	buf, err := ioutil.ReadAll(r)
-	if err != nil {
-		return err
-	}
-	return b.UnmarshalFromBuf(buf)
-}
-
 func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 	r := reader{buf: buf}
 	numChunks, err := r.ReadUint16()
diff --git a/encoding/chunk.go b/encoding/chunk.go
index 3ed281af2f3b8..1260adf619fcf 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -86,7 +86,6 @@ type Chunk interface {
 	Add(sample model.SamplePair) ([]Chunk, error)
 	NewIterator() Iterator
 	Marshal(io.Writer) error
-	Unmarshal(io.Reader) error
 	UnmarshalFromBuf([]byte) error
 	Encoding() Encoding
 	Utilization() float64
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index b4a3acf731327..6266c99815bf1 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -112,7 +112,7 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 
 	bs1 := buf.Bytes()
 	chunk, err = NewForEncoding(encoding)
-	err = chunk.Unmarshal(&buf)
+	err = chunk.UnmarshalFromBuf(bs1)
 	require.NoError(t, err)
 
 	// Check all the samples are in there.
@@ -127,11 +127,12 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 	require.NoError(t, iter.Err())
 
 	// Check the byte representation after another Marshall is the same.
+	buf = bytes.Buffer{}
 	err = chunk.Marshal(&buf)
 	require.NoError(t, err)
 	bs2 := buf.Bytes()
 
-	require.True(t, bytes.Equal(bs1, bs2))
+	require.Equal(t, bs1, bs2)
 }
 
 // testChunkSeek checks seek works as expected.
diff --git a/encoding/delta.go b/encoding/delta.go
index dfe4659d1d598..2b7a9c76d2c88 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -213,15 +213,6 @@ func (c deltaEncodedChunk) Marshal(w io.Writer) error {
 	return nil
 }
 
-// Unmarshal implements chunk.
-func (c *deltaEncodedChunk) Unmarshal(r io.Reader) error {
-	*c = (*c)[:cap(*c)]
-	if _, err := io.ReadFull(r, *c); err != nil {
-		return err
-	}
-	return c.setLen()
-}
-
 // UnmarshalFromBuf implements chunk.
 func (c *deltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
 	*c = (*c)[:cap(*c)]
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
index 01700861dc3b9..6363a5df2e2fb 100644
--- a/encoding/delta_test.go
+++ b/encoding/delta_test.go
@@ -99,9 +99,6 @@ func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
 		err = cs[0].UnmarshalFromBuf(buf)
 		verifyUnmarshallingError(err, c.chunkTypeName, "buf", "invalid number of time bytes")
 
-		err = cs[0].Unmarshal(bytes.NewBuffer(buf))
-		verifyUnmarshallingError(err, c.chunkTypeName, "Reader", "invalid number of time bytes")
-
 		// Fix the corruption to go on.
 		buf[c.timeBytesPos] = byte(d1)
 
@@ -111,9 +108,6 @@ func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
 
 			err = cs[0].UnmarshalFromBuf(buf)
 			verifyUnmarshallingError(err, c.chunkTypeName, "buf", "header size")
-
-			err = cs[0].Unmarshal(bytes.NewBuffer(buf))
-			verifyUnmarshallingError(err, c.chunkTypeName, "Reader", "header size")
 		}
 	}
 }
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index 59d9f95da2bc4..8e7852eef3d33 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -241,15 +241,6 @@ func (c doubleDeltaEncodedChunk) MarshalToBuf(buf []byte) error {
 	return nil
 }
 
-// Unmarshal implements chunk.
-func (c *doubleDeltaEncodedChunk) Unmarshal(r io.Reader) error {
-	*c = (*c)[:cap(*c)]
-	if _, err := io.ReadFull(r, *c); err != nil {
-		return err
-	}
-	return c.setLen()
-}
-
 // UnmarshalFromBuf implements chunk.
 func (c *doubleDeltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
 	*c = (*c)[:cap(*c)]
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 91fc3eaad47c2..c7ed63f1c3a67 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -286,26 +286,21 @@ func (c *varbitChunk) Slice(_, _ model.Time) Chunk {
 
 // Marshal implements chunk.
 func (c varbitChunk) Marshal(w io.Writer) error {
-	n, err := w.Write(c)
+	size := c.MarshalLen()
+	n, err := w.Write(c[:size])
 	if err != nil {
 		return err
 	}
-	if n != cap(c) {
-		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
+	if n != size {
+		return fmt.Errorf("wanted to write %d bytes, wrote %d", size, n)
 	}
 	return nil
 }
 
-// Unmarshal implements chunk.
-func (c varbitChunk) Unmarshal(r io.Reader) error {
-	_, err := io.ReadFull(r, c)
-	return err
-}
-
 // UnmarshalFromBuf implements chunk.
 func (c varbitChunk) UnmarshalFromBuf(buf []byte) error {
-	if copied := copy(c, buf); copied != cap(c) {
-		return fmt.Errorf("insufficient bytes copied from buffer during unmarshaling, want %d, got %d", cap(c), copied)
+	if copied := copy(c, buf); copied != cap(c) && copied != c.MarshalLen() {
+		return fmt.Errorf("incorrect byte count copied from buffer during unmarshaling, want %d or %d, got %d", c.MarshalLen(), ChunkLen, copied)
 	}
 	return nil
 }
@@ -319,6 +314,19 @@ func (c varbitChunk) Utilization() float64 {
 	return math.Min(float64(c.nextSampleOffset()/8+15)/float64(cap(c)), 1)
 }
 
+// MarshalLen implements chunk.
+func (c varbitChunk) MarshalLen() int {
+	bits := c.nextSampleOffset()
+	if bits < varbitThirdSampleBitOffset {
+		bits = varbitThirdSampleBitOffset
+	}
+	bytes := int(bits)/8 + 1
+	if bytes > len(c) {
+		bytes = len(c)
+	}
+	return bytes
+}
+
 // Len implements chunk.  Runs in O(n).
 func (c varbitChunk) Len() int {
 	it := c.NewIterator()

From 0c45e2613b50d5a468b01b4382a10a4945bba828 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 26 Nov 2018 16:06:42 +0530
Subject: [PATCH 219/660] Cache older index entries

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store.go                    |   3 +
 schema.go                         |   3 +
 schema_caching.go                 | 112 ++++++++++++++++++++++++++++++
 series_store.go                   |  10 +++
 storage/caching_storage_client.go |  12 +++-
 storage/factory.go                |   5 +-
 6 files changed, 140 insertions(+), 5 deletions(-)
 create mode 100644 schema_caching.go

diff --git a/chunk_store.go b/chunk_store.go
index 3290bfcf41dd3..f526deeef708a 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -61,6 +61,8 @@ type StoreConfig struct {
 	CardinalityCacheSize     int
 	CardinalityCacheValidity time.Duration
 	CardinalityLimit         int
+
+	CacheLookupsOlderThan time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -73,6 +75,7 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
 	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
 	f.IntVar(&cfg.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.")
+	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 }
 
 // store implements Store
diff --git a/schema.go b/schema.go
index c326137b23c99..a0ad0804b702a 100644
--- a/schema.go
+++ b/schema.go
@@ -58,6 +58,9 @@ type IndexQuery struct {
 
 	// Filters for querying
 	ValueEqual []byte
+
+	// If the result of this lookup can be cached or not.
+	Cacheable bool
 }
 
 // IndexEntry describes an entry in the chunk index
diff --git a/schema_caching.go b/schema_caching.go
new file mode 100644
index 0000000000000..d0851c26672a4
--- /dev/null
+++ b/schema_caching.go
@@ -0,0 +1,112 @@
+package chunk
+
+import (
+	"time"
+
+	"github.com/prometheus/common/model"
+)
+
+type cachingSchema struct {
+	Schema
+
+	cacheOlderThan time.Duration
+}
+
+func (s *cachingSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+
+	cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName)
+	if err != nil {
+		return nil, err
+	}
+
+	activeQueries, err := s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	if err != nil {
+		return nil, err
+	}
+
+	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+}
+
+func (s *cachingSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+
+	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName)
+	if err != nil {
+		return nil, err
+	}
+
+	activeQueries, err := s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+	if err != nil {
+		return nil, err
+	}
+
+	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+}
+
+func (s *cachingSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+
+	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue)
+	if err != nil {
+		return nil, err
+	}
+
+	activeQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+	if err != nil {
+		return nil, err
+	}
+
+	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+}
+
+// If the query resulted in series IDs, use this method to find chunks.
+func (s *cachingSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+
+	cacheableQueries, err := s.Schema.GetChunksForSeries(cFrom, cThrough, userID, seriesID)
+	if err != nil {
+		return nil, err
+	}
+
+	activeQueries, err := s.Schema.GetChunksForSeries(from, through, userID, seriesID)
+	if err != nil {
+		return nil, err
+	}
+
+	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+}
+
+func splitTimesByCacheability(from, through model.Time, cacheBefore model.Time) (model.Time, model.Time, model.Time, model.Time) {
+	if from.After(cacheBefore) {
+		return 0, 0, from, through
+	}
+
+	if through.Before(cacheBefore) {
+		return from, through, 0, 0
+	}
+
+	return from, cacheBefore, cacheBefore, through
+}
+
+func mergeCacheableAndActiveQueries(cacheableQueries []IndexQuery, activeQueries []IndexQuery) []IndexQuery {
+	finalQueries := make([]IndexQuery, 0, len(cacheableQueries)+len(activeQueries))
+
+Outer:
+	for _, cq := range cacheableQueries {
+		for _, aq := range activeQueries {
+			// When deduping, the bucket values only influence TableName and HashValue
+			// and just checking those is enough.
+			if cq.TableName == aq.TableName && cq.HashValue == aq.HashValue {
+				continue Outer
+			}
+		}
+
+		cq.Cacheable = true
+		finalQueries = append(finalQueries, cq)
+	}
+
+	finalQueries = append(finalQueries, activeQueries...)
+
+	return finalQueries
+}
diff --git a/series_store.go b/series_store.go
index ac0612af2abe0..6ee08104a81f5 100644
--- a/series_store.go
+++ b/series_store.go
@@ -5,6 +5,7 @@ import (
 	"encoding/hex"
 	"fmt"
 	"net/http"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
@@ -60,6 +61,8 @@ type seriesStore struct {
 	cardinalityCache *cache.FifoCache
 
 	writeDedupeCache cache.Cache
+
+	cacheLookupsOlderThan time.Duration
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
@@ -73,6 +76,13 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limit
 		return nil, err
 	}
 
+	if cfg.CacheLookupsOlderThan != 0 {
+		schema = &cachingSchema{
+			Schema:         schema,
+			cacheOlderThan: cfg.CacheLookupsOlderThan,
+		}
+	}
+
 	return &seriesStore{
 		store: store{
 			cfg:     cfg,
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index cc09945b81ff0..742fa798eb245 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -100,10 +100,18 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 			TableName: queries[0].TableName,
 			HashValue: queries[0].HashValue,
 		})
-		results[key] = ReadBatch{
+
+		rb := ReadBatch{
 			Key:    key,
 			Expiry: expiryTime.UnixNano(),
 		}
+
+		// If the query is cacheable forever, nil the expiry.
+		if queries[0].Cacheable {
+			rb.Expiry = 0
+		}
+
+		results[key] = rb
 	}
 
 	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
@@ -232,7 +240,7 @@ func (s *cachingStorageClient) cacheFetch(ctx context.Context, keys []string) (b
 
 		// Make sure the hash(key) is not a collision in the cache by looking at the
 		// key in the value.
-		if key != readBatch.Key || time.Now().After(time.Unix(0, readBatch.Expiry)) {
+		if key != readBatch.Key || (readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry))) {
 			cacheCorruptErrs.Inc()
 			continue
 		}
diff --git a/storage/factory.go b/storage/factory.go
index 499bdc9caa0d8..e45bc0105dd5c 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -39,10 +39,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 
 	// Deprecated flags!!
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
-	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Deprecated: Use -store.index-cache-read.*; Period for which entries in the index cache are valid. Should be no higher than -ingester.max-chunk-idle.")
 	cfg.memcacheClient.RegisterFlagsWithPrefix("index.", "Deprecated: Use -store.index-cache-read.*;", f)
 
 	cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
+	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }
 
 // NewStore makes the storage clients based on the configuration.
@@ -52,7 +52,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	// Building up from deprecated flags.
 	var caches []cache.Cache
 	if cfg.IndexCacheSize > 0 {
-		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize, Validity: cfg.IndexCacheValidity}))
+		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize}))
 		caches = append(caches, fifocache)
 	}
 	if cfg.memcacheClient.Host != "" {
@@ -69,7 +69,6 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	var tieredCache cache.Cache
 	if len(caches) > 0 {
 		tieredCache = cache.NewTiered(caches)
-		cfg.indexQueriesCacheConfig.DefaultValidity = cfg.IndexCacheValidity
 	} else {
 		tieredCache, err = cache.New(cfg.indexQueriesCacheConfig)
 		if err != nil {

From 44c34858989850463de29e53bea5be1c26e96796 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 26 Nov 2018 21:54:55 +0530
Subject: [PATCH 220/660] Add tests for the caching of old entries

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 schema_caching.go                      |  9 ++--
 schema_caching_test.go                 | 75 ++++++++++++++++++++++++++
 storage/caching_storage_client_test.go | 74 ++++++++++++++++++++++++-
 3 files changed, 152 insertions(+), 6 deletions(-)
 create mode 100644 schema_caching_test.go

diff --git a/schema_caching.go b/schema_caching.go
index d0851c26672a4..e5f181d33486a 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -4,6 +4,7 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/weaveworks/common/mtime"
 )
 
 type cachingSchema struct {
@@ -13,7 +14,7 @@ type cachingSchema struct {
 }
 
 func (s *cachingSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName)
 	if err != nil {
@@ -29,7 +30,7 @@ func (s *cachingSchema) GetReadQueriesForMetric(from, through model.Time, userID
 }
 
 func (s *cachingSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName)
 	if err != nil {
@@ -45,7 +46,7 @@ func (s *cachingSchema) GetReadQueriesForMetricLabel(from, through model.Time, u
 }
 
 func (s *cachingSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue)
 	if err != nil {
@@ -62,7 +63,7 @@ func (s *cachingSchema) GetReadQueriesForMetricLabelValue(from, through model.Ti
 
 // If the query resulted in series IDs, use this method to find chunks.
 func (s *cachingSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.Now().Add(-s.cacheOlderThan))
+	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetChunksForSeries(cFrom, cThrough, userID, seriesID)
 	if err != nil {
diff --git a/schema_caching_test.go b/schema_caching_test.go
new file mode 100644
index 0000000000000..08f23ac42137e
--- /dev/null
+++ b/schema_caching_test.go
@@ -0,0 +1,75 @@
+package chunk
+
+import (
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/mtime"
+)
+
+func TestCachingSchema(t *testing.T) {
+	const (
+		userID         = "userid"
+		periodicPrefix = "periodicPrefix"
+	)
+
+	dailyBuckets := makeSchema("v3")
+	schema := &cachingSchema{
+		Schema:         dailyBuckets,
+		cacheOlderThan: 24 * time.Hour,
+	}
+
+	baseTime := time.Unix(0, 0)
+	baseTime = baseTime.Add(30*24*time.Hour - 1)
+
+	mtime.NowForce(baseTime)
+
+	for _, tc := range []struct {
+		from, through time.Time
+
+		cacheableIdx int
+	}{
+		{
+			// Completely cacheable.
+			baseTime.Add(-36 * time.Hour),
+			baseTime.Add(-25 * time.Hour),
+			0,
+		},
+		{
+			// Completely active.
+			baseTime.Add(-23 * time.Hour),
+			baseTime.Add(-2 * time.Hour),
+			-1,
+		},
+		{
+			// Mix of both but the cacheable entry is also active.
+			baseTime.Add(-36 * time.Hour),
+			baseTime.Add(-2 * time.Hour),
+			-1,
+		},
+		{
+			// Mix of both.
+			baseTime.Add(-50 * time.Hour),
+			baseTime.Add(-2 * time.Hour),
+			0,
+		},
+	} {
+		have, err := schema.GetReadQueriesForMetric(
+			model.TimeFromUnix(tc.from.Unix()), model.TimeFromUnix(tc.through.Unix()),
+			userID, model.LabelValue("foo"),
+		)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		for i := range have {
+			if i <= tc.cacheableIdx {
+				require.True(t, have[i].Cacheable)
+			} else {
+				require.False(t, have[i].Cacheable)
+			}
+		}
+	}
+}
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index a87e9910ffb9f..71d81ba4d95de 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -54,7 +54,7 @@ func TestCachingStorageClientBasic(t *testing.T) {
 	assert.EqualValues(t, 1, store.queries)
 }
 
-func TestCachingStorageClient(t *testing.T) {
+func TestTempCachingStorageClient(t *testing.T) {
 	store := &mockStore{
 		results: ReadBatch{
 			Entries: []Entry{{
@@ -64,7 +64,7 @@ func TestCachingStorageClient(t *testing.T) {
 		},
 	}
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingStorageClient(store, cache, 1*time.Second)
+	client := newCachingStorageClient(store, cache, 100*time.Millisecond)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
 		{TableName: "table", HashValue: "bar"},
@@ -94,6 +94,76 @@ func TestCachingStorageClient(t *testing.T) {
 	require.NoError(t, err)
 	assert.EqualValues(t, len(queries), store.queries)
 	assert.EqualValues(t, len(queries), results)
+
+	// If we do the query after validity, it should see the queries.
+	time.Sleep(100 * time.Millisecond)
+	results = 0
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, 2*len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
+}
+
+func TestPermCachingStorageClient(t *testing.T) {
+	store := &mockStore{
+		results: ReadBatch{
+			Entries: []Entry{{
+				Column: []byte("foo"),
+				Value:  []byte("bar"),
+			}},
+		},
+	}
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	client := newCachingStorageClient(store, cache, 100*time.Millisecond)
+	queries := []chunk.IndexQuery{
+		{TableName: "table", HashValue: "foo", Cacheable: true},
+		{TableName: "table", HashValue: "bar", Cacheable: true},
+		{TableName: "table", HashValue: "baz", Cacheable: true},
+	}
+	results := 0
+	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
+
+	// If we do the query to the cache again, the underlying store shouldn't see it.
+	results = 0
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
+
+	// If we do the query after validity, it still shouldn't see the queries.
+	time.Sleep(200 * time.Millisecond)
+	results = 0
+	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+	assert.EqualValues(t, len(queries), store.queries)
+	assert.EqualValues(t, len(queries), results)
 }
 
 func TestCachingStorageClientEmptyResponse(t *testing.T) {

From 09cb171a72330c026e86ee5509155897bc708e72 Mon Sep 17 00:00:00 2001
From: Roger Steneteg <rsteneteg@ea.com>
Date: Wed, 28 Nov 2018 10:30:45 -0600
Subject: [PATCH 221/660] Added retention feature to table manager to
 automatically remove older tables

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 aws/dynamodb_table_client.go |  18 ++++++
 cassandra/table_client.go    |   6 ++
 gcp/table_client.go          |   8 +++
 inmemory_storage_client.go   |  14 ++++
 schema_config.go             |  12 +++-
 table_client.go              |   1 +
 table_manager.go             |  51 +++++++++++++--
 table_manager_test.go        | 122 +++++++++++++++++++++++++++++++++++
 8 files changed, 223 insertions(+), 9 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 549ab5c6c3dd5..496a87e2f09a7 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -189,6 +189,24 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 	return nil
 }
 
+func (d dynamoTableClient) DeleteTable(ctx context.Context, name string) error {
+	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DeleteTable", dynamoRequestDuration, func(ctx context.Context) error {
+			input := &dynamodb.DeleteTableInput{TableName: aws.String(name)}
+			_, err := d.DynamoDB.DeleteTableWithContext(ctx, input)
+			if err != nil {
+				return err
+			}
+
+			return nil
+		})
+	}); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	var tableARN *string
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 1e683fc40ca4b..e4335632ae45a 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -50,6 +50,12 @@ func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 	return errors.WithStack(err)
 }
 
+func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
+	err := c.session.Query(fmt.Sprintf(`
+		DROP TABLE IF EXISTS %s;`, name)).WithContext(ctx).Exec()
+	return errors.WithStack(err)
+}
+
 func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,
diff --git a/gcp/table_client.go b/gcp/table_client.go
index a7162a71e85e6..74ebd39fbe35d 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -74,6 +74,14 @@ func alreadyExistsError(err error) bool {
 	return ok && strings.Contains(serr.Message(), "already exists")
 }
 
+func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
+	if err := c.client.DeleteTable(ctx, name); err != nil {
+		return err
+	}
+
+	return nil
+}
+
 func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 000c6c072fa48..a397b74db8fe9 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -74,6 +74,20 @@ func (m *MockStorage) CreateTable(_ context.Context, desc TableDesc) error {
 	return nil
 }
 
+// DeleteTable implements StorageClient.
+func (m *MockStorage) DeleteTable(_ context.Context, name string) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	if _, ok := m.tables[name]; !ok {
+		return fmt.Errorf("table does not exist")
+	}
+
+	delete(m.tables, name)
+
+	return nil
+}
+
 // DescribeTable implements StorageClient.
 func (m *MockStorage) DescribeTable(_ context.Context, name string) (desc TableDesc, isActive bool, err error) {
 	m.mtx.RLock()
diff --git a/schema_config.go b/schema_config.go
index e0e175d7bae20..b65a5e2cccac4 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -299,15 +299,17 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 
 // PeriodicTableConfig is configuration for a set of time-sharded tables.
 type PeriodicTableConfig struct {
-	Prefix string        `yaml:"prefix"`
-	Period time.Duration `yaml:"period,omitempty"`
-	Tags   Tags          `yaml:"tags,omitempty"`
+	Prefix    string        `yaml:"prefix"`
+	Period    time.Duration `yaml:"period,omitempty"`
+	Retention int64         `yaml:"retention,omitempty"`
+	Tags      Tags          `yaml:"tags,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
+	f.Int64Var(&cfg.Retention, argPrefix+".retention", 0, "Number of tables to keep, (0 disables retention).")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
 }
 
@@ -347,6 +349,10 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 	if through.Unix()%secondsInDay == 0 {
 		lastTable--
 	}
+	// Don't make tables further back than the configured retention
+	if cfg.Retention > 0 && lastTable > cfg.Retention && lastTable-firstTable >= cfg.Retention {
+		firstTable = lastTable - cfg.Retention
+	}
 	for i := firstTable; i <= lastTable; i++ {
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
diff --git a/table_client.go b/table_client.go
index 24d175229b14a..c7a447992bd3c 100644
--- a/table_client.go
+++ b/table_client.go
@@ -6,6 +6,7 @@ import "context"
 type TableClient interface {
 	ListTables(ctx context.Context) ([]string, error)
 	CreateTable(ctx context.Context, desc TableDesc) error
+	DeleteTable(ctx context.Context, name string) error
 	DescribeTable(ctx context.Context, name string) (desc TableDesc, isActive bool, err error)
 	UpdateTable(ctx context.Context, current, expected TableDesc) error
 }
diff --git a/table_manager.go b/table_manager.go
index c837584da96a9..910e4832e4e87 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -47,6 +47,9 @@ type TableManagerConfig struct {
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
 	ThroughputUpdatesDisabled bool
 
+	// Master 'off-switch' for table retention deletions
+	RetentionDeletesDisabled bool
+
 	// Period with which the table manager will poll for tables.
 	DynamoDBPollInterval time.Duration
 
@@ -72,6 +75,7 @@ type ProvisionConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
+	f.BoolVar(&cfg.RetentionDeletesDisabled, "table-manager.retention-deletes-disabled", false, "If true, disable all deletes of DB tables")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
@@ -198,11 +202,15 @@ func (m *TableManager) SyncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
 	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", len(expected))
 
-	toCreate, toCheckThroughput, err := m.partitionTables(ctx, expected)
+	toCreate, toCheckThroughput, toDelete, err := m.partitionTables(ctx, expected)
 	if err != nil {
 		return err
 	}
 
+	if err := m.deleteTables(ctx, toDelete); err != nil {
+		return err
+	}
+
 	if err := m.createTables(ctx, toCreate); err != nil {
 		return err
 	}
@@ -274,14 +282,20 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 }
 
 // partitionTables works out tables that need to be created vs tables that need to be updated
-func (m *TableManager) partitionTables(ctx context.Context, descriptions []TableDesc) ([]TableDesc, []TableDesc, error) {
+func (m *TableManager) partitionTables(ctx context.Context, descriptions []TableDesc) ([]TableDesc, []TableDesc, []TableDesc, error) {
 	existingTables, err := m.client.ListTables(ctx)
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 	sort.Strings(existingTables)
 
-	toCreate, toCheck := []TableDesc{}, []TableDesc{}
+	tablePrefixes := map[string]struct{}{}
+	for _, cfg := range m.schemaCfg.Configs {
+		tablePrefixes[cfg.IndexTables.Prefix] = struct{}{}
+		tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{}
+	}
+
+	toCreate, toCheck, toDelete := []TableDesc{}, []TableDesc{}, []TableDesc{}
 	i, j := 0, 0
 	for i < len(descriptions) && j < len(existingTables) {
 		if descriptions[i].Name < existingTables[j] {
@@ -289,7 +303,16 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 			toCreate = append(toCreate, descriptions[i])
 			i++
 		} else if descriptions[i].Name > existingTables[j] {
-			// existingTables[j].name isn't in descriptions, can ignore
+			// existingTables[j].name isn't in descriptions, and can be removed
+			isMatch := false
+			for tblPrefix := range tablePrefixes {
+				if strings.HasPrefix(existingTables[j], tblPrefix) {
+					isMatch = true
+				}
+			}
+			if isMatch {
+				toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
+			}
 			j++
 		} else {
 			// Table exists, need to check it has correct throughput
@@ -302,7 +325,7 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 		toCreate = append(toCreate, descriptions[i])
 	}
 
-	return toCreate, toCheck, nil
+	return toCreate, toCheck, toDelete, nil
 }
 
 func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
@@ -316,6 +339,22 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 	return nil
 }
 
+func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDesc) error {
+	for _, desc := range descriptions {
+		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)
+		if m.cfg.RetentionDeletesDisabled {
+			continue
+		}
+
+		level.Info(util.Logger).Log("msg", "deleting table", "table", desc.Name)
+		err := m.client.DeleteTable(ctx, desc.Name)
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, expected := range descriptions {
 		level.Debug(util.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
diff --git a/table_manager_test.go b/table_manager_test.go
index 7e70b6f482743..7606d6d2ba6d3 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -69,6 +69,14 @@ func (m *mockTableClient) CreateTable(_ context.Context, desc TableDesc) error {
 	return nil
 }
 
+func (m *mockTableClient) DeleteTable(_ context.Context, name string) error {
+	m.Lock()
+	defer m.Unlock()
+
+	delete(m.tables, name)
+	return nil
+}
+
 func (m *mockTableClient) DescribeTable(_ context.Context, name string) (desc TableDesc, isActive bool, err error) {
 	m.Lock()
 	defer m.Unlock()
@@ -440,3 +448,117 @@ func TestTableManagerTags(t *testing.T) {
 		)
 	}
 }
+
+func TestTableManagerRetentionOnly(t *testing.T) {
+	client := newMockTableClient()
+
+	cfg := SchemaConfig{
+		Configs: []PeriodConfig{
+			{
+				From: model.TimeFromUnix(baseTableStart.Unix()),
+				IndexTables: PeriodicTableConfig{
+					Prefix:    tablePrefix,
+					Period:    tablePeriod,
+					Retention: 2,
+				},
+
+				ChunkTables: PeriodicTableConfig{
+					Prefix:    chunkTablePrefix,
+					Period:    tablePeriod,
+					Retention: 2,
+				},
+			},
+		},
+	}
+	tbmConfig := TableManagerConfig{
+		CreationGracePeriod: gracePeriod,
+		IndexTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+			InactiveWriteScale:         inactiveScalingConfig,
+			InactiveWriteScaleLastN:    autoScaleLastN,
+		},
+		ChunkTables: ProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			InactiveWriteThroughput:    inactiveWrite,
+			InactiveReadThroughput:     inactiveRead,
+		},
+	}
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check at time zero, we have one weekly table
+	tmTest(t, client, tableManager,
+		"Initial test",
+		baseTableStart,
+		[]TableDesc{
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Check after one week, we have two weekly tables
+	tmTest(t, client, tableManager,
+		"Move forward by one table period",
+		baseTableStart.Add(tablePeriod),
+		[]TableDesc{
+			{Name: tablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Check after two weeks, we have three tables (two previous periods and the new one)
+	tmTest(t, client, tableManager,
+		"Move forward by two table periods",
+		baseTableStart.Add(tablePeriod*2),
+		[]TableDesc{
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Check after three weeks, we have three tables (two previous periods and the new one), table 0 was deleted
+	tmTest(t, client, tableManager,
+		"Move forward by three table periods",
+		baseTableStart.Add(tablePeriod*3),
+		[]TableDesc{
+			{Name: tablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Verify that with RetentionDeletesDisasbled set no tables are removed
+	tableManager.cfg.RetentionDeletesDisabled = true
+	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
+	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
+	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	tmTest(t, client, tableManager,
+		"Move forward by three table periods (no deletes)",
+		baseTableStart.Add(tablePeriod*3),
+		[]TableDesc{
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+}

From 74679b20ddb90e0975433dfda6ae6261fa899c39 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 21 Nov 2018 09:49:34 +0000
Subject: [PATCH 222/660] Rewrite the bigchunk iterator to improve performance.

Also, add a specific test which only seeks forward.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/bigchunk.go   | 62 ++++++++++++++++++++++--------------------
 encoding/chunk_test.go | 29 +++++++++++++++++++-
 2 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 50e8a318bec89..d2be0e18f11c1 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -166,6 +166,7 @@ func (b *bigchunk) Size() int {
 func (b *bigchunk) NewIterator() Iterator {
 	return &bigchunkIterator{
 		bigchunk: b,
+		curr:     b.chunks[0].Iterator(),
 	}
 }
 
@@ -224,71 +225,74 @@ func (r *reader) ReadBytes(count int) ([]byte, error) {
 type bigchunkIterator struct {
 	*bigchunk
 
-	iter chunkenc.Iterator
+	curr chunkenc.Iterator
 	i    int
 }
 
-func (i *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
-	// On average we'll have about 12*3600/15/120 = 24 chunks, so just linear
-	// scan for now.
-	i.i = 0
-	for i.i < len(i.chunks) {
-		if int64(target) <= i.ends[i.i] {
-			break
+func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
+	// If the seek is outside the current chunk, use the index to find the right
+	// chunk.
+	if int64(target) < it.starts[it.i] || int64(target) > it.ends[it.i] {
+		it.curr = nil
+		for it.i = 0; it.i < len(it.chunks) && int64(target) > it.ends[it.i]; it.i++ {
 		}
-		i.i++
 	}
 
-	if i.i >= len(i.chunks) {
+	if it.i >= len(it.chunks) {
 		return false
 	}
 
-	i.iter = i.chunks[i.i].Iterator()
-	i.i++
+	if it.curr == nil {
+		it.curr = it.chunks[it.i].Iterator()
+	} else if t, _ := it.curr.At(); int64(target) <= t {
+		it.curr = it.chunks[it.i].Iterator()
+	}
 
-	for i.iter.Next() {
-		t, _ := i.iter.At()
+	for it.curr.Next() {
+		t, _ := it.curr.At()
 		if t >= int64(target) {
 			return true
 		}
 	}
-
 	return false
 }
 
-func (i *bigchunkIterator) Scan() bool {
-	if i.iter != nil && i.iter.Next() {
+func (it *bigchunkIterator) Scan() bool {
+	if it.curr.Next() {
 		return true
 	}
+	if err := it.curr.Err(); err != nil {
+		return false
+	}
 
-	for i.i < len(i.chunks) {
-		i.iter = i.chunks[i.i].Iterator()
-		i.i++
-		if i.iter.Next() {
+	for it.i < len(it.chunks)-1 {
+		it.i++
+		it.curr = it.chunks[it.i].Iterator()
+		if it.curr.Next() {
 			return true
 		}
 	}
 	return false
 }
 
-func (i *bigchunkIterator) Value() model.SamplePair {
-	t, v := i.iter.At()
+func (it *bigchunkIterator) Value() model.SamplePair {
+	t, v := it.curr.At()
 	return model.SamplePair{
 		Timestamp: model.Time(t),
 		Value:     model.SampleValue(v),
 	}
 }
 
-func (i *bigchunkIterator) Batch(size int) Batch {
+func (it *bigchunkIterator) Batch(size int) Batch {
 	var result Batch
 	j := 0
 	for j < size {
-		t, v := i.iter.At()
+		t, v := it.curr.At()
 		result.Timestamps[j] = t
 		result.Values[j] = v
 		j++
 
-		if j < size && !i.Scan() {
+		if j < size && !it.Scan() {
 			break
 		}
 	}
@@ -296,9 +300,9 @@ func (i *bigchunkIterator) Batch(size int) Batch {
 	return result
 }
 
-func (i *bigchunkIterator) Err() error {
-	if i.iter != nil {
-		return i.iter.Err()
+func (it *bigchunkIterator) Err() error {
+	if it.curr != nil {
+		return it.curr.Err()
 	}
 	return nil
 }
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index b4a3acf731327..2c545bb45156d 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -63,7 +63,7 @@ func TestChunk(t *testing.T) {
 		{Varbit, 2048},
 		{Bigchunk, 4096},
 	} {
-		for samples := 0; samples < tc.maxSamples; samples += tc.maxSamples / 10 {
+		for samples := tc.maxSamples / 10; samples < tc.maxSamples; samples += tc.maxSamples / 10 {
 
 			// DoubleDelta doesn't support zero length chunks.
 			if tc.encoding == DoubleDelta && samples == 0 {
@@ -78,6 +78,10 @@ func TestChunk(t *testing.T) {
 				testChunkSeek(t, tc.encoding, samples)
 			})
 
+			t.Run(fmt.Sprintf("testChunkSeekForward/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+				testChunkSeekForward(t, tc.encoding, samples)
+			})
+
 			t.Run(fmt.Sprintf("testChunkBatch/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
 				testChunkBatch(t, tc.encoding, samples)
 			})
@@ -135,6 +139,7 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 }
 
 // testChunkSeek checks seek works as expected.
+// This version of the test will seek backwards.
 func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 	chunk := mkChunk(t, encoding, samples)
 
@@ -157,6 +162,28 @@ func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 	}
 }
 
+func testChunkSeekForward(t *testing.T, encoding Encoding, samples int) {
+	chunk := mkChunk(t, encoding, samples)
+
+	iter := chunk.NewIterator()
+	for i := 0; i < samples; i += samples / 10 {
+		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
+		sample := iter.Value()
+		require.EqualValues(t, model.Time(i*step), sample.Timestamp)
+		require.EqualValues(t, model.SampleValue(i), sample.Value)
+
+		j := i + 1
+		for ; j < (i+samples/10) && j < samples; j++ {
+			require.True(t, iter.Scan())
+			sample := iter.Value()
+			require.EqualValues(t, model.Time(j*step), sample.Timestamp)
+			require.EqualValues(t, model.SampleValue(j), sample.Value)
+		}
+	}
+	require.False(t, iter.Scan())
+	require.NoError(t, iter.Err())
+}
+
 func testChunkBatch(t *testing.T, encoding Encoding, samples int) {
 	chunk := mkChunk(t, encoding, samples)
 

From 61aa9c873d54f1e41680fe14d3a0c4befb24a049 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 21 Nov 2018 16:36:05 +0000
Subject: [PATCH 223/660] Check we don't fall off the end of the chunk.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/bigchunk.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index d2be0e18f11c1..9b488e66c1bfc 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -230,6 +230,10 @@ type bigchunkIterator struct {
 }
 
 func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
+	if it.i >= len(it.chunks) {
+		return false
+	}
+
 	// If the seek is outside the current chunk, use the index to find the right
 	// chunk.
 	if int64(target) < it.starts[it.i] || int64(target) > it.ends[it.i] {

From d50275e96e0fbf59792ef84acf6de960dbd7410f Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 29 Nov 2018 15:25:41 +0000
Subject: [PATCH 224/660] Add an option to control whether varbit chunks are
 saved full-size

This allows all components to be rolled out in a mode which accepts
either size of chunk, then changed over to write the new way at a
later date.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/chunk_test.go |  1 +
 encoding/varbit.go     | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index 6266c99815bf1..1256b3e885895 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -55,6 +55,7 @@ func TestLen(t *testing.T) {
 var step = int(15 * time.Second / time.Millisecond)
 
 func TestChunk(t *testing.T) {
+	alwaysMarshalFullsizeChunks = false
 	for _, tc := range []struct {
 		encoding   Encoding
 		maxSamples int
diff --git a/encoding/varbit.go b/encoding/varbit.go
index c7ed63f1c3a67..387eea9d8ee24 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -18,6 +18,7 @@ package encoding
 
 import (
 	"encoding/binary"
+	"flag"
 	"fmt"
 	"io"
 	"math"
@@ -314,8 +315,21 @@ func (c varbitChunk) Utilization() float64 {
 	return math.Min(float64(c.nextSampleOffset()/8+15)/float64(cap(c)), 1)
 }
 
+// MarshalConfig configures the behaviour of marshalling
+type MarshalConfig struct{}
+
+var alwaysMarshalFullsizeChunks = true
+
+// RegisterFlags registers configuration settings.
+func (MarshalConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.BoolVar(&alwaysMarshalFullsizeChunks, "store.fullsize-chunks", alwaysMarshalFullsizeChunks, "When saving varbit chunks, pad to 1024 bytes")
+}
+
 // MarshalLen implements chunk.
 func (c varbitChunk) MarshalLen() int {
+	if alwaysMarshalFullsizeChunks {
+		return cap(c)
+	}
 	bits := c.nextSampleOffset()
 	if bits < varbitThirdSampleBitOffset {
 		bits = varbitThirdSampleBitOffset

From 8c782fbad623331564cfc69005cd2cee28645055 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 29 Nov 2018 15:54:57 +0000
Subject: [PATCH 225/660] Make varbitChunk.Size() more accurate

Also un-export MarshalLen since it is not called from outside.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/varbit.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/encoding/varbit.go b/encoding/varbit.go
index 387eea9d8ee24..01a2ed74bb988 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -287,7 +287,7 @@ func (c *varbitChunk) Slice(_, _ model.Time) Chunk {
 
 // Marshal implements chunk.
 func (c varbitChunk) Marshal(w io.Writer) error {
-	size := c.MarshalLen()
+	size := c.marshalLen()
 	n, err := w.Write(c[:size])
 	if err != nil {
 		return err
@@ -300,8 +300,8 @@ func (c varbitChunk) Marshal(w io.Writer) error {
 
 // UnmarshalFromBuf implements chunk.
 func (c varbitChunk) UnmarshalFromBuf(buf []byte) error {
-	if copied := copy(c, buf); copied != cap(c) && copied != c.MarshalLen() {
-		return fmt.Errorf("incorrect byte count copied from buffer during unmarshaling, want %d or %d, got %d", c.MarshalLen(), ChunkLen, copied)
+	if copied := copy(c, buf); copied != cap(c) && copied != c.marshalLen() {
+		return fmt.Errorf("incorrect byte count copied from buffer during unmarshaling, want %d or %d, got %d", c.marshalLen(), ChunkLen, copied)
 	}
 	return nil
 }
@@ -325,8 +325,8 @@ func (MarshalConfig) RegisterFlags(f *flag.FlagSet) {
 	flag.BoolVar(&alwaysMarshalFullsizeChunks, "store.fullsize-chunks", alwaysMarshalFullsizeChunks, "When saving varbit chunks, pad to 1024 bytes")
 }
 
-// MarshalLen implements chunk.
-func (c varbitChunk) MarshalLen() int {
+// marshalLen returns the number of bytes that should be marshalled for this chunk
+func (c varbitChunk) marshalLen() int {
 	if alwaysMarshalFullsizeChunks {
 		return cap(c)
 	}
@@ -351,7 +351,7 @@ func (c varbitChunk) Len() int {
 }
 
 func (c varbitChunk) Size() int {
-	return len(c)
+	return c.marshalLen()
 }
 
 func (c varbitChunk) firstTime() model.Time {

From 6b4e51b56092eff4efc091ace338ec4b6fa61dd0 Mon Sep 17 00:00:00 2001
From: Roger Steneteg <rsteneteg@ea.com>
Date: Thu, 29 Nov 2018 13:40:12 -0600
Subject: [PATCH 226/660] Switched retention from int to a duration, to make
 the number of tables to keep dynamic based on retention and period

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 schema_config.go      | 9 +++++----
 table_manager_test.go | 5 +++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index b65a5e2cccac4..4f93befa3ad3c 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -301,7 +301,7 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 type PeriodicTableConfig struct {
 	Prefix    string        `yaml:"prefix"`
 	Period    time.Duration `yaml:"period,omitempty"`
-	Retention int64         `yaml:"retention,omitempty"`
+	Retention time.Duration `yaml:"retention,omitempty"`
 	Tags      Tags          `yaml:"tags,omitempty"`
 }
 
@@ -309,7 +309,7 @@ type PeriodicTableConfig struct {
 func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
-	f.Int64Var(&cfg.Retention, argPrefix+".retention", 0, "Number of tables to keep, (0 disables retention).")
+	f.DurationVar(&cfg.Retention, argPrefix+".retention", 0, "Tables passed this retention period is deleted, (0 disables retention).")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
 }
 
@@ -342,6 +342,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		endGraceSecs   = int64(endGrace / time.Second)
 		firstTable     = from.Unix() / periodSecs
 		lastTable      = through.Unix() / periodSecs
+		tablesToKeep   = int64(int64(cfg.Retention/time.Second) / periodSecs)
 		now            = mtime.Now().Unix()
 		result         = []TableDesc{}
 	)
@@ -350,8 +351,8 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		lastTable--
 	}
 	// Don't make tables further back than the configured retention
-	if cfg.Retention > 0 && lastTable > cfg.Retention && lastTable-firstTable >= cfg.Retention {
-		firstTable = lastTable - cfg.Retention
+	if cfg.Retention > 0 && lastTable > tablesToKeep && lastTable-firstTable >= tablesToKeep {
+		firstTable = lastTable - tablesToKeep
 	}
 	for i := firstTable; i <= lastTable; i++ {
 		table := TableDesc{
diff --git a/table_manager_test.go b/table_manager_test.go
index 7606d6d2ba6d3..faabe1f6ef455 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -18,6 +18,7 @@ const (
 	table2Prefix      = "cortex2_"
 	chunkTablePrefix  = "chunks_"
 	chunkTable2Prefix = "chunks2_"
+	tableRetention    = 2 * 7 * 24 * time.Hour
 	tablePeriod       = 7 * 24 * time.Hour
 	gracePeriod       = 15 * time.Minute
 	maxChunkAge       = 12 * time.Hour
@@ -459,13 +460,13 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 				IndexTables: PeriodicTableConfig{
 					Prefix:    tablePrefix,
 					Period:    tablePeriod,
-					Retention: 2,
+					Retention: tableRetention,
 				},
 
 				ChunkTables: PeriodicTableConfig{
 					Prefix:    chunkTablePrefix,
 					Period:    tablePeriod,
-					Retention: 2,
+					Retention: tableRetention,
 				},
 			},
 		},

From 2a8278937cd4e52aa8e799832fbb41d64f75344d Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 30 Nov 2018 08:51:46 +0000
Subject: [PATCH 227/660] Allow smaller varbit chunks on read

The flag -store.fullsize-chunks should control writing only; on the
reading side either size should be allowed.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/varbit.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/encoding/varbit.go b/encoding/varbit.go
index 01a2ed74bb988..2b602065df6f6 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -287,7 +287,7 @@ func (c *varbitChunk) Slice(_, _ model.Time) Chunk {
 
 // Marshal implements chunk.
 func (c varbitChunk) Marshal(w io.Writer) error {
-	size := c.marshalLen()
+	size := c.Size()
 	n, err := w.Write(c[:size])
 	if err != nil {
 		return err
@@ -327,9 +327,6 @@ func (MarshalConfig) RegisterFlags(f *flag.FlagSet) {
 
 // marshalLen returns the number of bytes that should be marshalled for this chunk
 func (c varbitChunk) marshalLen() int {
-	if alwaysMarshalFullsizeChunks {
-		return cap(c)
-	}
 	bits := c.nextSampleOffset()
 	if bits < varbitThirdSampleBitOffset {
 		bits = varbitThirdSampleBitOffset
@@ -351,6 +348,9 @@ func (c varbitChunk) Len() int {
 }
 
 func (c varbitChunk) Size() int {
+	if alwaysMarshalFullsizeChunks {
+		return cap(c)
+	}
 	return c.marshalLen()
 }
 

From 8a83bc0515e775c72edcf9cb0e74fc993098c132 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 22 Nov 2018 13:51:41 +0000
Subject: [PATCH 228/660] Move & split up util/flags.go to utils/flagext.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/storage_client.go   |  7 ++++---
 chunk_store_test.go     | 44 ++++++++++++++++++++---------------------
 schema_config.go        | 19 +++++++++---------
 storage/factory_test.go |  4 ++--
 testutils/testutils.go  |  8 ++++----
 5 files changed, 42 insertions(+), 40 deletions(-)

diff --git a/aws/storage_client.go b/aws/storage_client.go
index 75320c2a67356..ba8153e77d642 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -24,6 +24,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
@@ -97,9 +98,9 @@ func init() {
 
 // DynamoDBConfig specifies config for a DynamoDB database.
 type DynamoDBConfig struct {
-	DynamoDB               util.URLValue
+	DynamoDB               flagext.URLValue
 	APILimit               float64
-	ApplicationAutoScaling util.URLValue
+	ApplicationAutoScaling flagext.URLValue
 	Metrics                MetricsAutoScalingConfig
 	ChunkGangSize          int
 	ChunkGetMaxParallelism int
@@ -123,7 +124,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 // StorageConfig specifies config for storing data on AWS.
 type StorageConfig struct {
 	DynamoDBConfig
-	S3 util.URLValue
+	S3 flagext.URLValue
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/chunk_store_test.go b/chunk_store_test.go
index d123b39be2342..f3d55392a3e70 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -17,8 +17,8 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
@@ -49,7 +49,7 @@ var stores = []struct {
 			var (
 				storeCfg StoreConfig
 			)
-			util.DefaultValues(&storeCfg)
+			flagext.DefaultValues(&storeCfg)
 			return storeCfg
 		},
 	},
@@ -59,7 +59,7 @@ var stores = []struct {
 			var (
 				storeCfg StoreConfig
 			)
-			util.DefaultValues(&storeCfg)
+			flagext.DefaultValues(&storeCfg)
 
 			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
 				Size: 500,
@@ -75,7 +75,7 @@ func newTestChunkStore(t *testing.T, schemaName string) Store {
 	var (
 		storeCfg StoreConfig
 	)
-	util.DefaultValues(&storeCfg)
+	flagext.DefaultValues(&storeCfg)
 	return newTestChunkStoreConfig(t, schemaName, storeCfg)
 }
 
@@ -84,7 +84,7 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 		tbmConfig TableManagerConfig
 		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
 	)
-	util.DefaultValues(&tbmConfig)
+	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
 	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage)
 	require.NoError(t, err)
@@ -93,7 +93,7 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 	require.NoError(t, err)
 
 	var limits validation.Limits
-	util.DefaultValues(&limits)
+	flagext.DefaultValues(&limits)
 	limits.MaxQueryLength = 30 * 24 * time.Hour
 	overrides, err := validation.NewOverrides(limits)
 	require.NoError(t, err)
@@ -135,25 +135,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -315,14 +315,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	})
 
 	for _, tc := range []struct {
@@ -424,7 +424,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar": "baz",
+						"bar":                 "baz",
 					},
 					chunks[0],
 					ts,
@@ -488,7 +488,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			chunks[0],
 			ts,
@@ -534,7 +534,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/schema_config.go b/schema_config.go
index e0e175d7bae20..84d72bc3cc7ee 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -11,6 +11,7 @@ import (
 	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/weaveworks/common/mtime"
 )
 
@@ -45,20 +46,20 @@ type LegacySchemaConfig struct {
 
 	// After midnight on this day, we start bucketing indexes by day instead of by
 	// hour.  Only the day matters, not the time within the day.
-	DailyBucketsFrom      util.DayValue
-	Base64ValuesFrom      util.DayValue
-	V4SchemaFrom          util.DayValue
-	V5SchemaFrom          util.DayValue
-	V6SchemaFrom          util.DayValue
-	V9SchemaFrom          util.DayValue
-	BigtableColumnKeyFrom util.DayValue
+	DailyBucketsFrom      flagext.DayValue
+	Base64ValuesFrom      flagext.DayValue
+	V4SchemaFrom          flagext.DayValue
+	V5SchemaFrom          flagext.DayValue
+	V6SchemaFrom          flagext.DayValue
+	V9SchemaFrom          flagext.DayValue
+	BigtableColumnKeyFrom flagext.DayValue
 
 	// Config for the index & chunk tables.
 	OriginalTableName string
 	UsePeriodicTables bool
-	IndexTablesFrom   util.DayValue
+	IndexTablesFrom   flagext.DayValue
 	IndexTables       PeriodicTableConfig
-	ChunkTablesFrom   util.DayValue
+	ChunkTablesFrom   flagext.DayValue
 	ChunkTables       PeriodicTableConfig
 }
 
diff --git a/storage/factory_test.go b/storage/factory_test.go
index b09d90ce9cf6d..10a41ba06a59b 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -7,7 +7,7 @@ import (
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
@@ -18,7 +18,7 @@ func TestFactoryStop(t *testing.T) {
 		schemaConfig chunk.SchemaConfig
 		defaults     validation.Limits
 	)
-	util.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
+	flagext.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
 	schemaConfig.Configs = []chunk.PeriodConfig{
 		{
 			From:  model.Time(0),
diff --git a/testutils/testutils.go b/testutils/testutils.go
index bcbe7b0708448..6ad32a62a229e 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -6,10 +6,10 @@ import (
 	"time"
 
 	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -26,7 +26,7 @@ type Fixture interface {
 // Setup a fixture with initial tables
 func Setup(fixture Fixture, tableName string) (chunk.StorageClient, error) {
 	var tbmConfig chunk.TableManagerConfig
-	util.DefaultValues(&tbmConfig)
+	flagext.DefaultValues(&tbmConfig)
 	storageClient, tableClient, schemaConfig, err := fixture.Clients()
 	if err != nil {
 		return nil, err
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 

From 819845e50568cb5d1fc2f374ab392b073e4201d1 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 29 Nov 2018 18:57:59 +0100
Subject: [PATCH 229/660] Lint, build and test.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_test.go    | 32 ++++++++++++++++----------------
 testutils/testutils.go |  4 ++--
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index f3d55392a3e70..c6e1e824653bc 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -135,25 +135,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -315,14 +315,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+		"bar":  "baz",
+		"toms": "code",
+		"flip": "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+		"bar":  "beep",
+		"toms": "code",
 	})
 
 	for _, tc := range []struct {
@@ -424,7 +424,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar":                 "baz",
+						"bar": "baz",
 					},
 					chunks[0],
 					ts,
@@ -488,7 +488,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+				"bar": "baz",
 			},
 			chunks[0],
 			ts,
@@ -534,7 +534,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
+		"bar": "baz",
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 6ad32a62a229e..dc2a7f9b7d642 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	})
 }
 

From bd7126eabf05cd0ec2d65f528957c60f589998a1 Mon Sep 17 00:00:00 2001
From: Roger Steneteg <rsteneteg@ea.com>
Date: Tue, 4 Dec 2018 15:23:57 -0600
Subject: [PATCH 230/660] moved retention to a table manager parameter instead
 of a schema config to be able to avoid deleting tables outside the schemas
 from range while retention is zero

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 schema_config.go      | 14 ++++++--------
 table_manager.go      | 24 +++++++++++++++---------
 table_manager_test.go | 37 ++++++++++++++++++++++++++++++-------
 3 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 4f93befa3ad3c..bbee031369bdd 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -299,17 +299,15 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 
 // PeriodicTableConfig is configuration for a set of time-sharded tables.
 type PeriodicTableConfig struct {
-	Prefix    string        `yaml:"prefix"`
-	Period    time.Duration `yaml:"period,omitempty"`
-	Retention time.Duration `yaml:"retention,omitempty"`
-	Tags      Tags          `yaml:"tags,omitempty"`
+	Prefix string        `yaml:"prefix"`
+	Period time.Duration `yaml:"period,omitempty"`
+	Tags   Tags          `yaml:"tags,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
 	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
-	f.DurationVar(&cfg.Retention, argPrefix+".retention", 0, "Tables passed this retention period is deleted, (0 disables retention).")
 	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
 }
 
@@ -335,14 +333,14 @@ func (cfg *AutoScalingConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Float64Var(&cfg.TargetValue, argPrefix+".target-value", 80, "DynamoDB target ratio of consumed capacity to provisioned capacity.")
 }
 
-func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg ProvisionConfig, beginGrace, endGrace time.Duration) []TableDesc {
+func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg ProvisionConfig, beginGrace, endGrace time.Duration, retention time.Duration) []TableDesc {
 	var (
 		periodSecs     = int64(cfg.Period / time.Second)
 		beginGraceSecs = int64(beginGrace / time.Second)
 		endGraceSecs   = int64(endGrace / time.Second)
 		firstTable     = from.Unix() / periodSecs
 		lastTable      = through.Unix() / periodSecs
-		tablesToKeep   = int64(int64(cfg.Retention/time.Second) / periodSecs)
+		tablesToKeep   = int64(int64(retention/time.Second) / periodSecs)
 		now            = mtime.Now().Unix()
 		result         = []TableDesc{}
 	)
@@ -351,7 +349,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		lastTable--
 	}
 	// Don't make tables further back than the configured retention
-	if cfg.Retention > 0 && lastTable > tablesToKeep && lastTable-firstTable >= tablesToKeep {
+	if retention > 0 && lastTable > tablesToKeep && lastTable-firstTable >= tablesToKeep {
 		firstTable = lastTable - tablesToKeep
 	}
 	for i := firstTable; i <= lastTable; i++ {
diff --git a/table_manager.go b/table_manager.go
index 910e4832e4e87..26f7ad793fbda 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -50,6 +50,9 @@ type TableManagerConfig struct {
 	// Master 'off-switch' for table retention deletions
 	RetentionDeletesDisabled bool
 
+	// How far back tables will be kept before they are deleted
+	RetentionPeriod time.Duration
+
 	// Period with which the table manager will poll for tables.
 	DynamoDBPollInterval time.Duration
 
@@ -76,6 +79,7 @@ type ProvisionConfig struct {
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.BoolVar(&cfg.RetentionDeletesDisabled, "table-manager.retention-deletes-disabled", false, "If true, disable all deletes of DB tables")
+	f.DurationVar(&cfg.RetentionPeriod, "table-manager.retention-period", 0, "How far back tables will be kept before they are deleted (default all tables are saved)")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
@@ -267,11 +271,11 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 			}
 			endModelTime := model.TimeFromUnix(endTime.Unix())
 			result = append(result, config.IndexTables.periodicTables(
-				config.From, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge,
+				config.From, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
 			)...)
 			if config.ChunkTables.Prefix != "" {
 				result = append(result, config.ChunkTables.periodicTables(
-					config.From, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge,
+					config.From, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
 				)...)
 			}
 		}
@@ -304,14 +308,16 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 			i++
 		} else if descriptions[i].Name > existingTables[j] {
 			// existingTables[j].name isn't in descriptions, and can be removed
-			isMatch := false
-			for tblPrefix := range tablePrefixes {
-				if strings.HasPrefix(existingTables[j], tblPrefix) {
-					isMatch = true
+			if m.cfg.RetentionPeriod > 0 {
+				isMatch := false
+				for tblPrefix := range tablePrefixes {
+					if strings.HasPrefix(existingTables[j], tblPrefix) {
+						isMatch = true
+					}
+				}
+				if isMatch {
+					toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
 				}
-			}
-			if isMatch {
-				toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
 			}
 			j++
 		} else {
diff --git a/table_manager_test.go b/table_manager_test.go
index faabe1f6ef455..14e0a2ee75b67 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -458,20 +458,19 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			{
 				From: model.TimeFromUnix(baseTableStart.Unix()),
 				IndexTables: PeriodicTableConfig{
-					Prefix:    tablePrefix,
-					Period:    tablePeriod,
-					Retention: tableRetention,
+					Prefix: tablePrefix,
+					Period: tablePeriod,
 				},
 
 				ChunkTables: PeriodicTableConfig{
-					Prefix:    chunkTablePrefix,
-					Period:    tablePeriod,
-					Retention: tableRetention,
+					Prefix: chunkTablePrefix,
+					Period: tablePeriod,
 				},
 			},
 		},
 	}
 	tbmConfig := TableManagerConfig{
+		RetentionPeriod:     tableRetention,
 		CreationGracePeriod: gracePeriod,
 		IndexTables: ProvisionConfig{
 			ProvisionedWriteThroughput: write,
@@ -543,7 +542,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 
-	// Verify that with RetentionDeletesDisasbled set no tables are removed
+	// Verify that with RetentionDeletesDisabled set no tables are removed
 	tableManager.cfg.RetentionDeletesDisabled = true
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
 	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
@@ -562,4 +561,28 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
+
+	// Re-enable table deletions (default)
+	tableManager.cfg.RetentionDeletesDisabled = false
+
+	// Verify that with a retention period of zero no tables outside the configs 'From' range are removed
+	tableManager.cfg.RetentionPeriod = 0
+	tableManager.schemaCfg.Configs[0].From = model.TimeFromUnix(baseTableStart.Add(tablePeriod).Unix())
+	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
+	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
+	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	tmTest(t, client, tableManager,
+		"Move forward by three table periods (no deletes) and move From one table forward",
+		baseTableStart.Add(tablePeriod*3),
+		[]TableDesc{
+			{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "2", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
 }

From 132bf76560001b332da387a6d333a18d76bd17ec Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 6 Dec 2018 16:36:10 +0000
Subject: [PATCH 231/660] Allow people importing cortex to add their own chunk
 formats.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/chunk.go   |  66 --------------------------
 encoding/factory.go | 110 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 66 deletions(-)
 create mode 100644 encoding/factory.go

diff --git a/encoding/chunk.go b/encoding/chunk.go
index 1260adf619fcf..3a0116a0b2a98 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -18,7 +18,6 @@ package encoding
 
 import (
 	"errors"
-	"fmt"
 	"io"
 	"sort"
 
@@ -30,50 +29,11 @@ import (
 // ChunkLen is the length of a chunk in bytes.
 const ChunkLen = 1024
 
-// DefaultEncoding can be changed via a flag.
-var DefaultEncoding = DoubleDelta
-
 var (
 	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
 	errAddedToEvictedChunk = errors.New("attempted to add sample to evicted chunk")
 )
 
-// Encoding defines which encoding we are using, delta, doubledelta, or varbit
-type Encoding byte
-
-// String implements flag.Value.
-func (e Encoding) String() string {
-	return fmt.Sprintf("%d", e)
-}
-
-// Set implements flag.Value.
-func (e *Encoding) Set(s string) error {
-	switch s {
-	case "0":
-		*e = Delta
-	case "1":
-		*e = DoubleDelta
-	case "2":
-		*e = Varbit
-	case "3":
-		*e = Bigchunk
-	default:
-		return fmt.Errorf("invalid chunk encoding: %s", s)
-	}
-	return nil
-}
-
-const (
-	// Delta encoding
-	Delta Encoding = iota
-	// DoubleDelta encoding
-	DoubleDelta
-	// Varbit encoding
-	Varbit
-	// Bigchunk encoding
-	Bigchunk
-)
-
 // Chunk is the interface for all chunks. Chunks are generally not
 // goroutine-safe.
 type Chunk interface {
@@ -199,32 +159,6 @@ func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error)
 	return append(body, NewChunks...), nil
 }
 
-// New creates a new chunk according to the encoding set by the
-// DefaultEncoding flag.
-func New() Chunk {
-	chunk, err := NewForEncoding(DefaultEncoding)
-	if err != nil {
-		panic(err)
-	}
-	return chunk
-}
-
-// NewForEncoding allows configuring what chunk type you want
-func NewForEncoding(encoding Encoding) (Chunk, error) {
-	switch encoding {
-	case Delta:
-		return newDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
-	case DoubleDelta:
-		return newDoubleDeltaEncodedChunk(d1, d0, true, ChunkLen), nil
-	case Varbit:
-		return newVarbitChunk(varbitZeroEncoding), nil
-	case Bigchunk:
-		return newBigchunk(), nil
-	default:
-		return nil, fmt.Errorf("unknown chunk encoding: %v", encoding)
-	}
-}
-
 // indexAccessor allows accesses to samples by index.
 type indexAccessor interface {
 	timestampAtIndex(int) model.Time
diff --git a/encoding/factory.go b/encoding/factory.go
new file mode 100644
index 0000000000000..92897acf87459
--- /dev/null
+++ b/encoding/factory.go
@@ -0,0 +1,110 @@
+package encoding
+
+import (
+	"fmt"
+	"strconv"
+)
+
+// Encoding defines which encoding we are using, delta, doubledelta, or varbit
+type Encoding byte
+
+// DefaultEncoding can be changed via a flag.
+var DefaultEncoding = DoubleDelta
+
+// String implements flag.Value.
+func (e Encoding) String() string {
+	return fmt.Sprintf("%d", e)
+}
+
+const (
+	// Delta encoding
+	Delta Encoding = iota
+	// DoubleDelta encoding
+	DoubleDelta
+	// Varbit encoding
+	Varbit
+	// Bigchunk encoding
+	Bigchunk
+)
+
+type encoding struct {
+	Name string
+	New  func() Chunk
+}
+
+var encodings = map[Encoding]encoding{
+	Delta: {
+		Name: "Delta",
+		New: func() Chunk {
+			return newDeltaEncodedChunk(d1, d0, true, ChunkLen)
+		},
+	},
+	DoubleDelta: {
+		Name: "DoubleDelta",
+		New: func() Chunk {
+			return newDoubleDeltaEncodedChunk(d1, d0, true, ChunkLen)
+		},
+	},
+	Varbit: {
+		Name: "Varbit",
+		New: func() Chunk {
+			return newVarbitChunk(varbitZeroEncoding)
+		},
+	},
+	Bigchunk: {
+		Name: "Bigchunk",
+		New: func() Chunk {
+			return newBigchunk()
+		},
+	},
+}
+
+// Set implements flag.Value.
+func (e *Encoding) Set(s string) error {
+	i, err := strconv.Atoi(s)
+	if err != nil {
+		return err
+	}
+
+	_, ok := encodings[Encoding(i)]
+	if !ok {
+		return fmt.Errorf("invalid chunk encoding: %s", s)
+	}
+
+	*e = Encoding(i)
+	return nil
+}
+
+// New creates a new chunk according to the encoding set by the
+// DefaultEncoding flag.
+func New() Chunk {
+	chunk, err := NewForEncoding(DefaultEncoding)
+	if err != nil {
+		panic(err)
+	}
+	return chunk
+}
+
+// NewForEncoding allows configuring what chunk type you want
+func NewForEncoding(encoding Encoding) (Chunk, error) {
+	enc, ok := encodings[encoding]
+	if !ok {
+		return nil, fmt.Errorf("unknown chunk encoding: %v", encoding)
+	}
+
+	return enc.New(), nil
+}
+
+// MustRegisterEncoding add a new chunk encoding.  There is no locking, so this
+// must be called in init().
+func MustRegisterEncoding(enc Encoding, name string, new func() Chunk) {
+	_, ok := encodings[enc]
+	if ok {
+		panic("double register encoding")
+	}
+
+	encodings[enc] = encoding{
+		Name: name,
+		New:  new,
+	}
+}

From 29004d4e19c8e13e07a55055b81724dbdc05fb66 Mon Sep 17 00:00:00 2001
From: Roger Steneteg <rsteneteg@ea.com>
Date: Thu, 6 Dec 2018 17:05:49 -0600
Subject: [PATCH 232/660] optimized the populating the toDelete list

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 table_manager.go | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 26f7ad793fbda..d6280aca52e8c 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -309,15 +309,12 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 		} else if descriptions[i].Name > existingTables[j] {
 			// existingTables[j].name isn't in descriptions, and can be removed
 			if m.cfg.RetentionPeriod > 0 {
-				isMatch := false
 				for tblPrefix := range tablePrefixes {
 					if strings.HasPrefix(existingTables[j], tblPrefix) {
-						isMatch = true
+						toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
+						break
 					}
 				}
-				if isMatch {
-					toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
-				}
 			}
 			j++
 		} else {

From 23b90f7d1eb130adb26abe206886537fe604477d Mon Sep 17 00:00:00 2001
From: Roger Steneteg <rsteneteg@ea.com>
Date: Mon, 10 Dec 2018 14:10:16 -0800
Subject: [PATCH 233/660] updated retention-period parameter description, and
 changed dry-run switch name and made it enabled by default for extra safety

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 table_manager.go      | 10 +++++-----
 table_manager_test.go | 13 +++++++------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index d6280aca52e8c..0ccccdca4205f 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -47,8 +47,8 @@ type TableManagerConfig struct {
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
 	ThroughputUpdatesDisabled bool
 
-	// Master 'off-switch' for table retention deletions
-	RetentionDeletesDisabled bool
+	// Master 'on-switch' for table retention deletions
+	RetentionDeletesEnabled bool
 
 	// How far back tables will be kept before they are deleted
 	RetentionPeriod time.Duration
@@ -78,8 +78,8 @@ type ProvisionConfig struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
-	f.BoolVar(&cfg.RetentionDeletesDisabled, "table-manager.retention-deletes-disabled", false, "If true, disable all deletes of DB tables")
-	f.DurationVar(&cfg.RetentionPeriod, "table-manager.retention-period", 0, "How far back tables will be kept before they are deleted (default all tables are saved)")
+	f.BoolVar(&cfg.RetentionDeletesEnabled, "table-manager.retention-deletes-enabled", false, "If true, enables retention deletes of DB tables")
+	f.DurationVar(&cfg.RetentionPeriod, "table-manager.retention-period", 0, "Tables older than this retention period are deleted. Note: This setting is destructive to data!(default: 0, which disables deletion)")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
@@ -345,7 +345,7 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)
-		if m.cfg.RetentionDeletesDisabled {
+		if !m.cfg.RetentionDeletesEnabled {
 			continue
 		}
 
diff --git a/table_manager_test.go b/table_manager_test.go
index 14e0a2ee75b67..19fd8334949b1 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -470,8 +470,9 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	}
 	tbmConfig := TableManagerConfig{
-		RetentionPeriod:     tableRetention,
-		CreationGracePeriod: gracePeriod,
+		RetentionPeriod:         tableRetention,
+		RetentionDeletesEnabled: true,
+		CreationGracePeriod:     gracePeriod,
 		IndexTables: ProvisionConfig{
 			ProvisionedWriteThroughput: write,
 			ProvisionedReadThroughput:  read,
@@ -542,8 +543,8 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 
-	// Verify that with RetentionDeletesDisabled set no tables are removed
-	tableManager.cfg.RetentionDeletesDisabled = true
+	// Verify that without RetentionDeletesEnabled no tables are removed
+	tableManager.cfg.RetentionDeletesEnabled = false
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
 	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
 	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
@@ -562,8 +563,8 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 
-	// Re-enable table deletions (default)
-	tableManager.cfg.RetentionDeletesDisabled = false
+	// Re-enable table deletions
+	tableManager.cfg.RetentionDeletesEnabled = true
 
 	// Verify that with a retention period of zero no tables outside the configs 'From' range are removed
 	tableManager.cfg.RetentionPeriod = 0

From 2f1c671f94ecdcec84d96eabeb581271cf2d536c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 4 Dec 2018 16:47:25 +0000
Subject: [PATCH 234/660] Update Cortex for changes in upstream Prometheus.

- Querier.Select now returns a third type, warnings.  Just return nil in all cases.
- Querier.LabelNames is needed; return nil for now, until we implement it.
- promql.ErrStorage has changed.  Update for that.
- promql.NewEngine takes a struct; also new option for limiting # samples in a query.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go       | 2 +-
 chunk_store_utils.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 3290bfcf41dd3..ab51b7e2a0768 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -266,7 +266,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	// Now fetch the actual chunk data from Memcache / S3
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
-		return nil, promql.ErrStorage(err)
+		return nil, promql.ErrStorage{Err: err}
 	}
 
 	// Filter out chunks based on the empty matchers in the query.
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index cb919a20022b8..be20488d0a777 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -131,7 +131,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	}
 
 	if err != nil {
-		return nil, promql.ErrStorage(err)
+		return nil, promql.ErrStorage{Err: err}
 	}
 
 	allChunks := append(fromCache, fromStorage...)

From 5a6450ae94e57f4c518e0b09c04d4a6a480c75e5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 2 Jan 2019 15:33:29 +0000
Subject: [PATCH 235/660] Update instrumentation calls to remove deprecated
 interface

New version should be slightly more efficient as it avoids creating an
object on every call.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/aws_autoscaling.go       | 18 +++++++++---------
 aws/dynamodb_table_client.go | 16 ++++++++--------
 aws/storage_client.go        | 12 ++++++------
 aws/storage_client_s3.go     | 10 +++++-----
 cache/instrumented.go        | 10 +++++-----
 cache/memcached.go           | 12 ++++++------
 table_manager.go             | 10 +++++-----
 7 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
index d32b9a8370ec3..4643ed7897e47 100644
--- a/aws/aws_autoscaling.go
+++ b/aws/aws_autoscaling.go
@@ -19,7 +19,7 @@ const (
 	autoScalingPolicyNamePrefix = "DynamoScalingPolicy_cortex_"
 )
 
-var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+var applicationAutoScalingRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 	Namespace: "cortex",
 	Name:      "application_autoscaling_request_duration_seconds",
 	Help:      "Time spent doing ApplicationAutoScaling requests.",
@@ -27,10 +27,10 @@ var applicationAutoScalingRequestDuration = prometheus.NewHistogramVec(prometheu
 	// AWS latency seems to range from a few ms to a few sec. So use 8 buckets
 	// from 128us to 2s. TODO: Confirm that this is the case for ApplicationAutoScaling.
 	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
-}, []string{"operation", "status_code"})
+}, []string{"operation", "status_code"}))
 
 func init() {
-	prometheus.MustRegister(applicationAutoScalingRequestDuration)
+	applicationAutoScalingRequestDuration.Register()
 }
 
 type awsAutoscale struct {
@@ -58,7 +58,7 @@ func (a *awsAutoscale) PostCreateTable(ctx context.Context, desc chunk.TableDesc
 
 func (a *awsAutoscale) DescribeTable(ctx context.Context, desc *chunk.TableDesc) error {
 	err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := a.ApplicationAutoScaling.DescribeScalableTargetsWithContext(ctx, &applicationautoscaling.DescribeScalableTargetsInput{
 				ResourceIds:       []*string{aws.String("table/" + desc.Name)},
 				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
@@ -94,7 +94,7 @@ func (a *awsAutoscale) DescribeTable(ctx context.Context, desc *chunk.TableDesc)
 	}
 
 	err = a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := a.ApplicationAutoScaling.DescribeScalingPoliciesWithContext(ctx, &applicationautoscaling.DescribeScalingPoliciesInput{
 				PolicyNames:       []*string{aws.String(autoScalingPolicyNamePrefix + desc.Name)},
 				ResourceId:        aws.String("table/" + desc.Name),
@@ -151,7 +151,7 @@ func (a *awsAutoscale) UpdateTable(ctx context.Context, current chunk.TableDesc,
 func (a *awsAutoscale) enableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
 	// Registers or updates a scalable target
 	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &applicationautoscaling.RegisterScalableTargetInput{
 				MinCapacity:       aws.Int64(desc.WriteScale.MinCapacity),
 				MaxCapacity:       aws.Int64(desc.WriteScale.MaxCapacity),
@@ -172,7 +172,7 @@ func (a *awsAutoscale) enableAutoScaling(ctx context.Context, desc chunk.TableDe
 
 	// Puts or updates a scaling policy
 	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &applicationautoscaling.PutScalingPolicyInput{
 				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
 				PolicyType:        aws.String("TargetTrackingScaling"),
@@ -197,7 +197,7 @@ func (a *awsAutoscale) enableAutoScaling(ctx context.Context, desc chunk.TableDe
 func (a *awsAutoscale) disableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
 	// Deregister scalable target
 	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &applicationautoscaling.DeregisterScalableTargetInput{
 				ResourceId:        aws.String("table/" + desc.Name),
 				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
@@ -212,7 +212,7 @@ func (a *awsAutoscale) disableAutoScaling(ctx context.Context, desc chunk.TableD
 
 	// Delete scaling policy
 	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &applicationautoscaling.DeleteScalingPolicyInput{
 				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
 				ResourceId:        aws.String("table/" + desc.Name),
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 549ab5c6c3dd5..f9bc715723e55 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -100,7 +100,7 @@ func (d callManager) backoffAndRetry(ctx context.Context, fn func(context.Contex
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	table := []string{}
 	err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
 				for _, s := range resp.TableNames {
 					table = append(table, *s)
@@ -126,7 +126,7 @@ func chunkTagsToDynamoDB(ts chunk.Tags) []*dynamodb.Tag {
 func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	var tableARN *string
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &dynamodb.CreateTableInput{
 				TableName: aws.String(desc.Name),
 				AttributeDefinitions: []*dynamodb.AttributeDefinition{
@@ -177,7 +177,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 	tags := chunkTagsToDynamoDB(desc.Tags)
 	if len(tags) > 0 {
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 					ResourceArn: tableARN,
 					Tags:        tags,
@@ -192,7 +192,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	var tableARN *string
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 				TableName: aws.String(name),
 			})
@@ -222,7 +222,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 	}
 
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
 				ResourceArn: tableARN,
 			})
@@ -254,7 +254,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 	if current.ProvisionedRead != expected.ProvisionedRead || current.ProvisionedWrite != expected.ProvisionedWrite {
 		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				_, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
 					TableName: aws.String(expected.Name),
 					ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
@@ -277,7 +277,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 	if !current.Tags.Equals(expected.Tags) {
 		var tableARN *string
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 					TableName: aws.String(expected.Name),
 				})
@@ -294,7 +294,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		}
 
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.TimeRequestHistogram(ctx, "DynamoDB.TagResource", dynamoRequestDuration, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 					ResourceArn: tableARN,
 					Tags:        chunkTagsToDynamoDB(expected.Tags),
diff --git a/aws/storage_client.go b/aws/storage_client.go
index ba8153e77d642..a86f7a2c13eca 100644
--- a/aws/storage_client.go
+++ b/aws/storage_client.go
@@ -47,7 +47,7 @@ const (
 )
 
 var (
-	dynamoRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	dynamoRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_request_duration_seconds",
 		Help:      "Time spent doing DynamoDB requests.",
@@ -55,7 +55,7 @@ var (
 		// DynamoDB latency seems to range from a few ms to a few sec and is
 		// important.  So use 8 buckets from 128us to 2s.
 		Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
-	}, []string{"operation", "status_code"})
+	}, []string{"operation", "status_code"}))
 	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_consumed_capacity_total",
@@ -88,7 +88,7 @@ var (
 )
 
 func init() {
-	prometheus.MustRegister(dynamoRequestDuration)
+	dynamoRequestDuration.Register()
 	prometheus.MustRegister(dynamoConsumedCapacity)
 	prometheus.MustRegister(dynamoFailures)
 	prometheus.MustRegister(dynamoQueryPagesCount)
@@ -214,7 +214,7 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
 
-		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, func(ctx context.Context) error {
+		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return request.Send()
 		})
 		resp := request.Data().(*dynamodb.BatchWriteItemOutput)
@@ -350,7 +350,7 @@ func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput
 
 	var err error
 	for backoff.Ongoing() {
-		err = instrument.TimeRequestHistogram(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, func(_ context.Context) error {
+		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(_ context.Context) error {
 			return page.Send()
 		})
 
@@ -527,7 +527,7 @@ func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chu
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
 
-		err := instrument.TimeRequestHistogram(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, func(ctx context.Context) error {
+		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return request.Send()
 		})
 		response := request.Data().(*dynamodb.BatchGetItemOutput)
diff --git a/aws/storage_client_s3.go b/aws/storage_client_s3.go
index 8361bc212f766..c1cbe65f803e5 100644
--- a/aws/storage_client_s3.go
+++ b/aws/storage_client_s3.go
@@ -22,16 +22,16 @@ import (
 )
 
 var (
-	s3RequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	s3RequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "s3_request_duration_seconds",
 		Help:      "Time spent doing S3 requests.",
 		Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
-	}, []string{"operation", "status_code"})
+	}, []string{"operation", "status_code"}))
 )
 
 func init() {
-	prometheus.MustRegister(s3RequestDuration)
+	s3RequestDuration.Register()
 }
 
 type s3storageClient struct {
@@ -116,7 +116,7 @@ func (a s3storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([
 
 func (a s3storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.Chunk, error) {
 	var resp *s3.GetObjectOutput
-	err := instrument.TimeRequestHistogram(ctx, "S3.GetObject", s3RequestDuration, func(ctx context.Context) error {
+	err := instrument.CollectedRequest(ctx, "S3.GetObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		var err error
 		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
 			Bucket: aws.String(a.bucketName),
@@ -175,7 +175,7 @@ func (a s3storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) er
 }
 
 func (a s3storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
-	return instrument.TimeRequestHistogram(ctx, "S3.PutObject", s3RequestDuration, func(ctx context.Context) error {
+	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
 			Body:   bytes.NewReader(buf),
 			Bucket: aws.String(a.bucketName),
diff --git a/cache/instrumented.go b/cache/instrumented.go
index 1d194d74ad463..28b0c6fa934b6 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -10,13 +10,13 @@ import (
 )
 
 var (
-	requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	requestDuration = instr.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "cache_request_duration_seconds",
 		Help:      "Total time spent in seconds doing cache requests.",
 		// Cache requests are very quick: smallest bucket is 16us, biggest is 1s.
 		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code"})
+	}, []string{"method", "status_code"}))
 
 	fetchedKeys = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
@@ -32,7 +32,7 @@ var (
 )
 
 func init() {
-	prometheus.MustRegister(requestDuration)
+	requestDuration.Register()
 	prometheus.MustRegister(fetchedKeys)
 	prometheus.MustRegister(hits)
 }
@@ -55,7 +55,7 @@ type instrumentedCache struct {
 
 func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	method := i.name + ".store"
-	instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+	instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys", len(keys)))
 		i.Cache.Store(ctx, keys, bufs)
@@ -71,7 +71,7 @@ func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string,
 		method  = i.name + ".fetch"
 	)
 
-	instr.TimeRequestHistogram(ctx, method, requestDuration, func(ctx context.Context) error {
+	instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
diff --git a/cache/memcached.go b/cache/memcached.go
index 5875ffde3f3a7..86c766fb36309 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -18,17 +18,17 @@ import (
 )
 
 var (
-	memcacheRequestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	memcacheRequestDuration = instr.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "memcache_request_duration_seconds",
 		Help:      "Total time spent in seconds doing memcache requests.",
 		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
 		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code"})
+	}, []string{"method", "status_code"}))
 )
 
 func init() {
-	prometheus.MustRegister(memcacheRequestDuration)
+	memcacheRequestDuration.Register()
 }
 
 // MemcachedConfig is config to make a Memcached
@@ -116,7 +116,7 @@ func memcacheStatusCode(err error) string {
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	instr.TimeRequestHistogramStatus(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
+	instr.CollectedRequest(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
 		if c.cfg.BatchSize == 0 {
 			found, bufs, missed = c.fetch(ctx, keys)
 			return nil
@@ -130,7 +130,7 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	instr.TimeRequestHistogramStatus(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+	instr.CollectedRequest(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
 		sp := opentracing.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
@@ -202,7 +202,7 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 // Store stores the key in the cache.
 func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	for i := range keys {
-		err := instr.TimeRequestHistogramStatus(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		err := instr.CollectedRequest(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
 			item := memcache.Item{
 				Key:        keys[i],
 				Value:      bufs[i],
diff --git a/table_manager.go b/table_manager.go
index c837584da96a9..d49b32088477b 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -24,12 +24,12 @@ const (
 )
 
 var (
-	syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	syncTableDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_sync_tables_seconds",
 		Help:      "Time spent doing SyncTables.",
 		Buckets:   prometheus.DefBuckets,
-	}, []string{"operation", "status_code"})
+	}, []string{"operation", "status_code"}))
 	tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_table_capacity_units",
@@ -39,7 +39,7 @@ var (
 
 func init() {
 	prometheus.MustRegister(tableCapacity)
-	prometheus.MustRegister(syncTableDuration)
+	syncTableDuration.Register()
 }
 
 // TableManagerConfig holds config for a TableManager
@@ -172,7 +172,7 @@ func (m *TableManager) loop() {
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
-	if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.SyncTables", syncTableDuration, func(ctx context.Context) error {
+	if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", syncTableDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		return m.SyncTables(ctx)
 	}); err != nil {
 		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
@@ -181,7 +181,7 @@ func (m *TableManager) loop() {
 	for {
 		select {
 		case <-ticker.C:
-			if err := instrument.TimeRequestHistogram(context.Background(), "TableManager.SyncTables", syncTableDuration, func(ctx context.Context) error {
+			if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", syncTableDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				return m.SyncTables(ctx)
 			}); err != nil {
 				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)

From 8395264ce00d0cb0529df3268a37b871b228475c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 31 Dec 2018 15:41:13 +0000
Subject: [PATCH 236/660] Add a name parameter to the memcache client, so we
 can see where the errors come from.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache.go          |  2 +-
 cache/cache_test.go     |  4 ++--
 cache/memcached.go      | 34 +++++++++++++++++++++++++---------
 cache/memcached_test.go |  8 ++++----
 storage/factory.go      |  2 +-
 5 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index 1f230c515a448..62ef40958da93 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -81,7 +81,7 @@ func New(cfg Config) (Cache, error) {
 		}
 
 		client := NewMemcachedClient(cfg.memcacheClient)
-		cache := NewMemcached(cfg.memcache, client)
+		cache := NewMemcached(cfg.memcache, client, cfg.prefix)
 
 		cacheName := cfg.prefix + "memcache"
 		caches = append(caches, NewBackground(cacheName, cfg.background, Instrument(cacheName, cache)))
diff --git a/cache/cache_test.go b/cache/cache_test.go
index b87fb83921b7a..cc72820be2797 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -140,7 +140,7 @@ func testCache(t *testing.T, cache cache.Cache) {
 
 func TestMemcache(t *testing.T) {
 	t.Run("Unbatched", func(t *testing.T) {
-		cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache())
+		cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache(), "test")
 		testCache(t, cache)
 	})
 
@@ -148,7 +148,7 @@ func TestMemcache(t *testing.T) {
 		cache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 3,
-		}, newMockMemcache())
+		}, newMockMemcache(), "test")
 		testCache(t, cache)
 	})
 }
diff --git a/cache/memcached.go b/cache/memcached.go
index 86c766fb36309..37836c77df018 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -14,21 +14,28 @@ import (
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	instr "github.com/weaveworks/common/instrument"
 )
 
 var (
-	memcacheRequestDuration = instr.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	memcacheRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "memcache_request_duration_seconds",
 		Help:      "Total time spent in seconds doing memcache requests.",
 		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
 		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code"}))
+	}, []string{"method", "status_code", "name"})
 )
 
-func init() {
-	memcacheRequestDuration.Register()
+type observableVecCollector struct {
+	v prometheus.ObserverVec
+}
+
+func (observableVecCollector) Register()                             {}
+func (observableVecCollector) Before(method string, start time.Time) {}
+func (o observableVecCollector) After(method, statusCode string, start time.Time) {
+	o.v.WithLabelValues(method, statusCode).Observe(time.Now().Sub(start).Seconds())
 }
 
 // MemcachedConfig is config to make a Memcached
@@ -50,16 +57,25 @@ func (cfg *MemcachedConfig) RegisterFlagsWithPrefix(prefix, description string,
 type Memcached struct {
 	cfg      MemcachedConfig
 	memcache MemcachedClient
+	name     string
+
+	requestDuration observableVecCollector
 
 	wg      sync.WaitGroup
 	inputCh chan *work
 }
 
 // NewMemcached makes a new Memcache
-func NewMemcached(cfg MemcachedConfig, client MemcachedClient) *Memcached {
+func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string) *Memcached {
 	c := &Memcached{
 		cfg:      cfg,
 		memcache: client,
+		name:     name,
+		requestDuration: observableVecCollector{
+			v: memcacheRequestDuration.MustCurryWith(prometheus.Labels{
+				"name": name,
+			}),
+		},
 	}
 
 	if cfg.BatchSize == 0 || cfg.Parallelism == 0 {
@@ -116,7 +132,7 @@ func memcacheStatusCode(err error) string {
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	instr.CollectedRequest(ctx, "Memcache.Get", memcacheRequestDuration, memcacheStatusCode, func(ctx context.Context) error {
+	instr.CollectedRequest(ctx, "Memcache.Get", c.requestDuration, memcacheStatusCode, func(ctx context.Context) error {
 		if c.cfg.BatchSize == 0 {
 			found, bufs, missed = c.fetch(ctx, keys)
 			return nil
@@ -130,7 +146,7 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	instr.CollectedRequest(ctx, "Memcache.GetMulti", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+	instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(_ context.Context) error {
 		sp := opentracing.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
@@ -202,7 +218,7 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 // Store stores the key in the cache.
 func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	for i := range keys {
-		err := instr.CollectedRequest(ctx, "Memcache.Put", memcacheRequestDuration, memcacheStatusCode, func(_ context.Context) error {
+		err := instr.CollectedRequest(ctx, "Memcache.Put", c.requestDuration, memcacheStatusCode, func(_ context.Context) error {
 			item := memcache.Item{
 				Key:        keys[i],
 				Value:      bufs[i],
@@ -211,7 +227,7 @@ func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 			return c.memcache.Set(&item)
 		})
 		if err != nil {
-			level.Error(util.Logger).Log("msg", "failed to put to memcached", "err", err)
+			level.Error(util.Logger).Log("msg", "failed to put to memcached", "name", c.name, "err", err)
 		}
 	}
 }
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index dce7863a79439..3a6c8982d158f 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -14,7 +14,7 @@ import (
 func TestMemcached(t *testing.T) {
 	t.Run("unbatched", func(t *testing.T) {
 		client := newMockMemcache()
-		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client)
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client, "test")
 
 		testMemcache(t, memcache)
 	})
@@ -24,7 +24,7 @@ func TestMemcached(t *testing.T) {
 		memcache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 5,
-		}, client)
+		}, client, "test")
 
 		testMemcache(t, memcache)
 	})
@@ -89,7 +89,7 @@ func (c *mockMemcacheFailing) GetMulti(keys []string) (map[string]*memcache.Item
 func TestMemcacheFailure(t *testing.T) {
 	t.Run("unbatched", func(t *testing.T) {
 		client := newMockMemcacheFailing()
-		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client)
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client, "test")
 
 		testMemcacheFailing(t, memcache)
 	})
@@ -99,7 +99,7 @@ func TestMemcacheFailure(t *testing.T) {
 		memcache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 5,
-		}, client)
+		}, client, "test")
 
 		testMemcacheFailing(t, memcache)
 	})
diff --git a/storage/factory.go b/storage/factory.go
index 499bdc9caa0d8..36da1e6569b12 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -59,7 +59,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		client := cache.NewMemcachedClient(cfg.memcacheClient)
 		memcache := cache.Instrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
 			Expiration: cfg.IndexCacheValidity,
-		}, client))
+		}, client, "memcache-index"))
 		caches = append(caches, cache.NewBackground("memcache-index", cache.BackgroundConfig{
 			WriteBackGoroutines: 10,
 			WriteBackBuffer:     100,

From 6307c90beabae13cddea34cfe687d10897f2bf6c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Wed, 2 Jan 2019 18:31:33 +0000
Subject: [PATCH 237/660] Revert "Update Prometheus & TSDB"

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go       | 2 +-
 chunk_store_utils.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index ab51b7e2a0768..3290bfcf41dd3 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -266,7 +266,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	// Now fetch the actual chunk data from Memcache / S3
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
-		return nil, promql.ErrStorage{Err: err}
+		return nil, promql.ErrStorage(err)
 	}
 
 	// Filter out chunks based on the empty matchers in the query.
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index be20488d0a777..cb919a20022b8 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -131,7 +131,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	}
 
 	if err != nil {
-		return nil, promql.ErrStorage{Err: err}
+		return nil, promql.ErrStorage(err)
 	}
 
 	allChunks := append(fromCache, fromStorage...)

From 54417a076746fdc01c3ea564406d7fb38aeac633 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 3 Jan 2019 11:38:44 +0000
Subject: [PATCH 238/660] Update Prometheus & TSDB (take 2)

This reverts commit 9f88a81f739400911e074da2f793cb848017d198.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go       | 2 +-
 chunk_store_utils.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 3290bfcf41dd3..ab51b7e2a0768 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -266,7 +266,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	// Now fetch the actual chunk data from Memcache / S3
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
-		return nil, promql.ErrStorage(err)
+		return nil, promql.ErrStorage{Err: err}
 	}
 
 	// Filter out chunks based on the empty matchers in the query.
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index cb919a20022b8..be20488d0a777 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -131,7 +131,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	}
 
 	if err != nil {
-		return nil, promql.ErrStorage(err)
+		return nil, promql.ErrStorage{Err: err}
 	}
 
 	allChunks := append(fromCache, fromStorage...)

From 6c35eab9edb5c36850b812cde6e3d8ed880ffefe Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 11 Nov 2018 12:07:45 +0000
Subject: [PATCH 239/660] Split the index and chunk store so we can mix and
 match.

This PR allows us for have the index in Bigtable, but chunks in GCS; or the index in Cassandra, and chunks in S3 etc.

Splitting the responsibilities has also reduce the amount of embedding in the various clients and made responsibilites clearer.

Also, unify the parallel fetching of multiple chunks between the Cassandra and S3 stores.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 ...e_client.go => dynamodb_storage_client.go} |  38 +++--
 ...est.go => dynamodb_storage_client_test.go} |   6 +-
 aws/dynamodb_table_client_test.go             |   8 +-
 aws/fixtures.go                               |  34 ++--
 aws/metrics_autoscaling_test.go               |   4 +-
 aws/mock.go                                   |   2 +-
 ...rage_client_s3.go => s3_storage_client.go} |  75 ++-------
 cassandra/fixtures.go                         |  22 +--
 cassandra/storage_client.go                   |  61 +++----
 chunk_store.go                                |  20 ++-
 chunk_store_test.go                           |   2 +-
 chunk_store_utils.go                          |   4 +-
 composite_store.go                            |   6 +-
 fixtures.go                                   |   6 +-
 gcp/bigtable_chunk_client.go                  | 156 ++++++++++++++++++
 ...age_client.go => bigtable_index_client.go} | 144 ++--------------
 gcp/table_client.go                           |   2 +-
 schema_config.go                              |  17 +-
 series_store.go                               |  11 +-
 storage/caching_fixtures.go                   |   8 +-
 ...rage_client.go => caching_index_client.go} |  24 +--
 ...lient.proto => caching_index_client.proto} |   0
 ...t_test.go => caching_index_client_test.go} |  10 +-
 storage/factory.go                            |  63 +++++--
 storage/factory_test.go                       |   8 +-
 .../{index_test.go => index_client_test.go}   |   4 +-
 ...e_client_test.go => object_client_test.go} |   2 +-
 storage/utils_test.go                         |  10 +-
 storage_client.go                             |  10 +-
 testutils/testutils.go                        |  14 +-
 util/parallel_chunk_fetch.go                  |  71 ++++++++
 31 files changed, 469 insertions(+), 373 deletions(-)
 rename aws/{storage_client.go => dynamodb_storage_client.go} (93%)
 rename aws/{storage_client_test.go => dynamodb_storage_client_test.go} (88%)
 rename aws/{storage_client_s3.go => s3_storage_client.go} (58%)
 create mode 100644 gcp/bigtable_chunk_client.go
 rename gcp/{storage_client.go => bigtable_index_client.go} (72%)
 rename storage/{caching_storage_client.go => caching_index_client.go} (88%)
 rename storage/{caching_storage_client.proto => caching_index_client.proto} (100%)
 rename storage/{caching_storage_client_test.go => caching_index_client_test.go} (94%)
 rename storage/{index_test.go => index_client_test.go} (96%)
 rename storage/{storage_client_test.go => object_client_test.go} (94%)
 create mode 100644 util/parallel_chunk_fetch.go

diff --git a/aws/storage_client.go b/aws/dynamodb_storage_client.go
similarity index 93%
rename from aws/storage_client.go
rename to aws/dynamodb_storage_client.go
index a86f7a2c13eca..31bd2545a3c81 100644
--- a/aws/storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -135,7 +135,8 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 }
 
-type storageClient struct {
+// DynamoDBStorageClient implements both chunk.IndexClient and chunk.ObjectClient for DynamoDB.
+type DynamoDBStorageClient struct {
 	cfg       DynamoDBConfig
 	schemaCfg chunk.SchemaConfig
 
@@ -148,14 +149,14 @@ type storageClient struct {
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
-// NewStorageClient makes a new AWS-backed StorageClient.
-func NewStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+// NewDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient.
+func NewDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*DynamoDBStorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
 	}
 
-	client := storageClient{
+	client := &DynamoDBStorageClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		DynamoDB:  dynamoDB,
@@ -166,10 +167,12 @@ func NewStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.S
 	return client, nil
 }
 
-func (a storageClient) Stop() {
+// Stop implements chunk.IndexClient.
+func (a DynamoDBStorageClient) Stop() {
 }
 
-func (a storageClient) NewWriteBatch() chunk.WriteBatch {
+// NewWriteBatch implements chunk.IndexClient.
+func (a DynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
@@ -195,7 +198,7 @@ func logRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
 // BatchWrite writes requests to the underlying storage, handling retries and backoff.
 // Structure is identical to getDynamoDBChunks(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
+func (a DynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
@@ -268,11 +271,12 @@ func (a storageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) e
 	return backoff.Err()
 }
 
-func (a storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+// QueryPages implements chunk.IndexClient.
+func (a DynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
 	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
 }
 
-func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (a DynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -342,7 +346,7 @@ func (a storageClient) query(ctx context.Context, query chunk.IndexQuery, callba
 	return nil
 }
 
-func (a storageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
+func (a DynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -388,19 +392,19 @@ type dynamoDBRequest interface {
 	Retryable() bool
 }
 
-func (a storageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+func (a DynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.QueryRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a storageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
+func (a DynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchGetItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a storageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
+func (a DynamoDBStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchWriteItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
@@ -446,7 +450,8 @@ type chunksPlusError struct {
 	err    error
 }
 
-func (a storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+// GetChunks implements chunk.ObjectClient.
+func (a DynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.DynamoDB")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
@@ -498,7 +503,7 @@ var placeholder = []byte{'c'}
 // Fetch a set of chunks from DynamoDB, handling retries and backoff.
 // Structure is identical to BatchWrite(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a storageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+func (a DynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
 	defer sp.Finish()
 	outstanding := dynamoDBReadRequest{}
@@ -616,7 +621,8 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 	return result, nil
 }
 
-func (a storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+// PutChunks implements chunk.ObjectClient.
+func (a DynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
 		dynamoDBWrites = dynamoDBWriteBatch{}
 	)
diff --git a/aws/storage_client_test.go b/aws/dynamodb_storage_client_test.go
similarity index 88%
rename from aws/storage_client_test.go
rename to aws/dynamodb_storage_client_test.go
index 976add5d8009f..45f9ec7d10641 100644
--- a/aws/storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -16,10 +16,10 @@ const (
 func TestChunksPartialError(t *testing.T) {
 	fixture := dynamoDBFixture(0, 10, 20)
 	defer fixture.Teardown()
-	client, err := testutils.Setup(fixture, tableName)
+	_, client, err := testutils.Setup(fixture, tableName)
 	require.NoError(t, err)
 
-	sc, ok := client.(*storageClient)
+	sc, ok := client.(*DynamoDBStorageClient)
 	if !ok {
 		t.Error("DynamoDB test client has unexpected type")
 		return
@@ -32,7 +32,7 @@ func TestChunksPartialError(t *testing.T) {
 	require.NoError(t, err)
 
 	// Make the read fail after 1 success, and keep failing until all retries are exhausted
-	sc.SetErrorParameters(999, 1)
+	sc.setErrorParameters(999, 1)
 	// Try to read back all the chunks we created, so we should get an error plus the first batch
 	chunksWeGot, err := client.GetChunks(ctx, chunks)
 	require.Error(t, err)
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 81647a93cc06d..65909e6f6041f 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -128,10 +128,10 @@ func TestTableManagerAutoScaling(t *testing.T) {
 	cfg := chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{
 			{
-				Store: "aws-dynamo",
+				IndexType: "aws-dynamo",
 			},
 			{
-				Store:       "aws-dynamo",
+				IndexType:   "aws-dynamo",
 				From:        model.TimeFromUnix(0),
 				IndexTables: fixturePeriodicTableConfig(tablePrefix),
 				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
@@ -230,11 +230,11 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 	cfg := chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{
 			{
-				Store:       "aws-dynamo",
+				IndexType:   "aws-dynamo",
 				IndexTables: chunk.PeriodicTableConfig{},
 			},
 			{
-				Store:       "aws-dynamo",
+				IndexType:   "aws-dynamo",
 				IndexTables: fixturePeriodicTableConfig(tablePrefix),
 				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			},
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 22c4b645555d1..8128b620e68e3 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -12,14 +12,14 @@ import (
 
 type fixture struct {
 	name    string
-	clients func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error)
+	clients func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error)
 }
 
 func (f fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
 	return f.clients()
 }
 
@@ -31,23 +31,23 @@ func (f fixture) Teardown() error {
 var Fixtures = []testutils.Fixture{
 	fixture{
 		name: "S3 chunks",
-		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
 			schemaConfig := chunk.SchemaConfig{} // Defaults == S3
 			dynamoDB := newMockDynamoDB(0, 0)
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
-			storage := &s3storageClient{
+			index := &DynamoDBStorageClient{
+				DynamoDB:                dynamoDB,
+				queryRequestFn:          dynamoDB.queryRequest,
+				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
+				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
+				schemaCfg:               schemaConfig,
+			}
+			object := &s3ObjectClient{
 				S3: newMockS3(),
-				storageClient: storageClient{
-					DynamoDB:                dynamoDB,
-					queryRequestFn:          dynamoDB.queryRequest,
-					batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
-					batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
-					schemaCfg:               schemaConfig,
-				},
 			}
-			return storage, table, schemaConfig, nil
+			return index, object, table, schemaConfig, nil
 		},
 	},
 	dynamoDBFixture(0, 10, 20),
@@ -59,12 +59,12 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 	return fixture{
 		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
 			provisionedErr, gangsize, maxParallelism),
-		clients: func() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
 			dynamoDB := newMockDynamoDB(0, provisionedErr)
 			schemaCfg := chunk.SchemaConfig{
 				Configs: []chunk.PeriodConfig{{
-					Store: "aws",
-					From:  model.Now(),
+					IndexType: "aws",
+					From:      model.Now(),
 					ChunkTables: chunk.PeriodicTableConfig{
 						Prefix: "chunks",
 						Period: 10 * time.Minute,
@@ -74,7 +74,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
-			storage := &storageClient{
+			storage := &DynamoDBStorageClient{
 				cfg: DynamoDBConfig{
 					ChunkGangSize:          gangsize,
 					ChunkGetMaxParallelism: maxParallelism,
@@ -90,7 +90,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaCfg,
 			}
-			return storage, table, schemaCfg, nil
+			return storage, storage, table, schemaCfg, nil
 		},
 	}
 }
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index c86dc2a81c89f..23b881eb68ad7 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -40,13 +40,13 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	cfg := chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{
 			{
-				Store: "aws-dynamo",
+				IndexType: "aws-dynamo",
 				IndexTables: chunk.PeriodicTableConfig{
 					Prefix: "a",
 				},
 			},
 			{
-				Store:       "aws-dynamo",
+				IndexType:   "aws-dynamo",
 				IndexTables: fixturePeriodicTableConfig(tablePrefix),
 				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			},
diff --git a/aws/mock.go b/aws/mock.go
index 7f963c47a37ab..df9c63112b763 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -48,7 +48,7 @@ func newMockDynamoDB(unprocessed int, provisionedErr int) *mockDynamoDBClient {
 	}
 }
 
-func (a storageClient) SetErrorParameters(provisionedErr, errAfter int) {
+func (a DynamoDBStorageClient) setErrorParameters(provisionedErr, errAfter int) {
 	if m, ok := a.DynamoDB.(*mockDynamoDBClient); ok {
 		m.provisionedErr = provisionedErr
 		m.errAfter = errAfter
diff --git a/aws/storage_client_s3.go b/aws/s3_storage_client.go
similarity index 58%
rename from aws/storage_client_s3.go
rename to aws/s3_storage_client.go
index c1cbe65f803e5..439970c42e028 100644
--- a/aws/storage_client_s3.go
+++ b/aws/s3_storage_client.go
@@ -7,9 +7,6 @@ import (
 	"io/ioutil"
 	"strings"
 
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
-
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/s3"
@@ -17,6 +14,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 )
@@ -34,19 +32,13 @@ func init() {
 	s3RequestDuration.Register()
 }
 
-type s3storageClient struct {
-	storageClient
+type s3ObjectClient struct {
 	bucketName string
 	S3         s3iface.S3API
 }
 
-// NewS3StorageClient makes a new AWS-backed StorageClient.
-func NewS3StorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
-	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
-	if err != nil {
-		return nil, err
-	}
-
+// NewS3ObjectClient makes a new S3-backed ObjectClient.
+func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
 	if cfg.S3.URL == nil {
 		return nil, fmt.Errorf("no URL specified for S3")
 	}
@@ -54,67 +46,25 @@ func NewS3StorageClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.
 	if err != nil {
 		return nil, err
 	}
+
 	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
 	s3Client := s3.New(session.New(s3Config))
 	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
-
-	client := s3storageClient{
-		storageClient: storageClient{
-			cfg:       cfg.DynamoDBConfig,
-			schemaCfg: schemaCfg,
-			DynamoDB:  dynamoDB,
-		},
+	client := s3ObjectClient{
 		S3:         s3Client,
 		bucketName: bucketName,
 	}
-	client.queryRequestFn = client.queryRequest
-	client.batchGetItemRequestFn = client.batchGetItemRequest
-	client.batchWriteItemRequestFn = client.batchWriteItemRequest
 	return client, nil
 }
 
-func (a s3storageClient) Stop() {
+func (a s3ObjectClient) Stop() {
 }
 
-func (a s3storageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.S3")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
-
-	incomingChunks := make(chan chunk.Chunk)
-	incomingErrors := make(chan error)
-	for _, c := range chunks {
-		go func(c chunk.Chunk) {
-			c, err := a.getS3Chunk(ctx, c)
-			if err != nil {
-				incomingErrors <- err
-				return
-			}
-			incomingChunks <- c
-		}(c)
-	}
-
-	result := []chunk.Chunk{}
-	errors := []error{}
-	for i := 0; i < len(chunks); i++ {
-		select {
-		case chunk := <-incomingChunks:
-			result = append(result, chunk)
-		case err := <-incomingErrors:
-			errors = append(errors, err)
-		}
-	}
-
-	sp.LogFields(otlog.Int("chunks fetched", len(result)))
-	if len(errors) > 0 {
-		sp.LogFields(otlog.String("error", errors[0].Error()))
-		// Return any chunks we did receive: a partial result may be useful
-		return result, errors[0]
-	}
-	return result, nil
+func (a s3ObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, chunks, a.getChunk)
 }
 
-func (a s3storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.Chunk, error) {
+func (a s3ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
 	var resp *s3.GetObjectOutput
 	err := instrument.CollectedRequest(ctx, "S3.GetObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		var err error
@@ -132,14 +82,13 @@ func (a s3storageClient) getS3Chunk(ctx context.Context, c chunk.Chunk) (chunk.C
 	if err != nil {
 		return chunk.Chunk{}, err
 	}
-	decodeContext := chunk.NewDecodeContext()
 	if err := c.Decode(decodeContext, buf); err != nil {
 		return chunk.Chunk{}, err
 	}
 	return c, nil
 }
 
-func (a s3storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (a s3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
 		s3ChunkKeys []string
 		s3ChunkBufs [][]byte
@@ -174,7 +123,7 @@ func (a s3storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) er
 	return lastErr
 }
 
-func (a s3storageClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
+func (a s3ObjectClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
 	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
 			Body:   bytes.NewReader(buf),
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index 361cb0acce7d8..54b5ebae015c2 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -15,18 +15,19 @@ import (
 // $ CASSANDRA_TEST_ADDRESSES=localhost:9042 go test ./pkg/chunk/storage
 
 type fixture struct {
-	name          string
-	storageClient chunk.StorageClient
-	tableClient   chunk.TableClient
-	schemaConfig  chunk.SchemaConfig
+	name         string
+	indexClient  chunk.IndexClient
+	chunkClient  chunk.ObjectClient
+	tableClient  chunk.TableClient
+	schemaConfig chunk.SchemaConfig
 }
 
 func (f fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
-	return f.storageClient, f.tableClient, f.schemaConfig, nil
+func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	return f.indexClient, f.chunkClient, f.tableClient, f.schemaConfig, nil
 }
 
 func (f fixture) Teardown() error {
@@ -62,10 +63,11 @@ func Fixtures() ([]testutils.Fixture, error) {
 
 	return []testutils.Fixture{
 		fixture{
-			name:          "Cassandra",
-			storageClient: storageClient,
-			tableClient:   tableClient,
-			schemaConfig:  schemaConfig,
+			name:         "Cassandra",
+			indexClient:  storageClient,
+			chunkClient:  storageClient,
+			tableClient:  tableClient,
+			schemaConfig: schemaConfig,
 		},
 	}, nil
 }
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 060b260b584a3..e39912069bed6 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -117,28 +117,30 @@ func (cfg *Config) createKeyspace() error {
 	return errors.WithStack(err)
 }
 
-// storageClient implements chunk.storageClient for GCP.
-type storageClient struct {
+// StorageClient implements chunk.IndexClient and chunk.ObjectClient for Cassandra.
+type StorageClient struct {
 	cfg       Config
 	schemaCfg chunk.SchemaConfig
 	session   *gocql.Session
 }
 
 // NewStorageClient returns a new StorageClient.
-func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
 	session, err := cfg.session()
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
 
-	return &storageClient{
+	client := &StorageClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		session:   session,
-	}, nil
+	}
+	return client, nil
 }
 
-func (s *storageClient) Stop() {
+// Stop implement chunk.IndexClient.
+func (s *StorageClient) Stop() {
 	s.session.Close()
 }
 
@@ -148,7 +150,8 @@ type writeBatch struct {
 	entries []chunk.IndexEntry
 }
 
-func (s *storageClient) NewWriteBatch() chunk.WriteBatch {
+// NewWriteBatch implement chunk.IndexClient.
+func (s *StorageClient) NewWriteBatch() chunk.WriteBatch {
 	return &writeBatch{}
 }
 
@@ -161,7 +164,8 @@ func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value [
 	})
 }
 
-func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+// BatchWrite implement chunk.IndexClient.
+func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
 	b := batch.(*writeBatch)
 
 	for _, entry := range b.entries {
@@ -175,11 +179,12 @@ func (s *storageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	return nil
 }
 
-func (s *storageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+// QueryPages implement chunk.IndexClient.
+func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
 	return util.DoParallelQueries(ctx, s.query, queries, callback)
 }
 
-func (s *storageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	var q *gocql.Query
 
 	switch {
@@ -257,7 +262,8 @@ func (b *readBatchIter) Value() []byte {
 	return b.value
 }
 
-func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+// PutChunks implements chunk.ObjectClient.
+func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
 		// Encode the chunk first - checksum is calculated as a side effect.
 		buf, err := chunks[i].Encode()
@@ -278,43 +284,18 @@ func (s *storageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	return nil
 }
 
-func (s *storageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
-	outs := make(chan chunk.Chunk, len(input))
-	errs := make(chan error, len(input))
-
-	for i := 0; i < len(input); i++ {
-		c := input[i]
-		go func(c chunk.Chunk) {
-			out, err := s.getChunk(ctx, c)
-			if err != nil {
-				errs <- err
-			} else {
-				outs <- out
-			}
-		}(c)
-	}
-
-	output := make([]chunk.Chunk, 0, len(input))
-	for i := 0; i < len(input); i++ {
-		select {
-		case c := <-outs:
-			output = append(output, c)
-		case err := <-errs:
-			return nil, err
-		}
-	}
-
-	return output, nil
+// GetChunks implements chunk.ObjectClient.
+func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, input, s.getChunk)
 }
 
-func (s *storageClient) getChunk(ctx context.Context, input chunk.Chunk) (chunk.Chunk, error) {
+func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
 	tableName := s.schemaCfg.ChunkTableFor(input.From)
 	var buf []byte
 	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
 		WithContext(ctx).Scan(&buf); err != nil {
 		return input, errors.WithStack(err)
 	}
-	decodeContext := chunk.NewDecodeContext()
 	err := input.Decode(decodeContext, buf)
 	return input, err
 }
diff --git a/chunk_store.go b/chunk_store.go
index ab51b7e2a0768..8fe7789db3bb1 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -79,21 +79,23 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 type store struct {
 	cfg StoreConfig
 
-	storage StorageClient
-	schema  Schema
-	limits  *validation.Overrides
+	index  IndexClient
+	chunks ObjectClient
+	schema Schema
+	limits *validation.Overrides
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
+func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, chunks)
 	if err != nil {
 		return nil, err
 	}
 
 	return &store{
 		cfg:     cfg,
-		storage: storage,
+		index:   index,
+		chunks:  chunks,
 		schema:  schema,
 		limits:  limits,
 		Fetcher: fetcher,
@@ -140,7 +142,7 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 		return err
 	}
 
-	return c.storage.BatchWrite(ctx, writeReqs)
+	return c.index.BatchWrite(ctx, writeReqs)
 }
 
 // calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
@@ -159,7 +161,7 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 	indexEntriesPerChunk.Observe(float64(len(entries)))
 
 	// Remove duplicate entries based on tableName:hashValue:rangeValue
-	result := c.storage.NewWriteBatch()
+	result := c.index.NewWriteBatch()
 	for _, entry := range entries {
 		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
 		if _, ok := seenIndexEntries[key]; !ok {
@@ -371,7 +373,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	var lock sync.Mutex
 	var entries []IndexEntry
-	err := c.storage.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
+	err := c.index.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
 		iter := resp.Iterator()
 		lock.Lock()
 		for iter.Next() {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index c6e1e824653bc..ccddf40755a4f 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -99,7 +99,7 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 	require.NoError(t, err)
 
 	store := NewCompositeStore()
-	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, overrides)
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides)
 	require.NoError(t, err)
 	return store
 }
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index be20488d0a777..bfbe79f4b62b9 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -47,7 +47,7 @@ outer:
 // and writing back any misses to the cache.  Also responsible for decoding
 // chunks from the cache, in parallel.
 type Fetcher struct {
-	storage StorageClient
+	storage ObjectClient
 	cache   cache.Cache
 
 	wait           sync.WaitGroup
@@ -65,7 +65,7 @@ type decodeResponse struct {
 }
 
 // NewChunkFetcher makes a new ChunkFetcher.
-func NewChunkFetcher(cfg cache.Config, storage StorageClient) (*Fetcher, error) {
+func NewChunkFetcher(cfg cache.Config, storage ObjectClient) (*Fetcher, error) {
 	cache, err := cache.New(cfg)
 	if err != nil {
 		return nil, err
diff --git a/composite_store.go b/composite_store.go
index a706803ab93a9..cdfad5a5a7430 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -40,15 +40,15 @@ func NewCompositeStore() CompositeStore {
 }
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
-func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, storage StorageClient, limits *validation.Overrides) error {
+func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks ObjectClient, limits *validation.Overrides) error {
 	schema := cfg.createSchema()
 	var store Store
 	var err error
 	switch cfg.Schema {
 	case "v9":
-		store, err = newSeriesStore(storeCfg, schema, storage, limits)
+		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits)
 	default:
-		store, err = newStore(storeCfg, schema, storage, limits)
+		store, err = newStore(storeCfg, schema, index, chunks, limits)
 	}
 	if err != nil {
 		return err
diff --git a/fixtures.go b/fixtures.go
index 7517dcee3bd51..b6dc21a71fec6 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -31,9 +31,9 @@ var BenchmarkMetric = model.Metric{
 func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
 	return SchemaConfig{
 		Configs: []PeriodConfig{{
-			Store:  store,
-			Schema: schema,
-			From:   from,
+			IndexType: store,
+			Schema:    schema,
+			From:      from,
 			ChunkTables: PeriodicTableConfig{
 				Prefix: "cortex",
 				Period: 7 * 24 * time.Hour,
diff --git a/gcp/bigtable_chunk_client.go b/gcp/bigtable_chunk_client.go
new file mode 100644
index 0000000000000..c5830df155b38
--- /dev/null
+++ b/gcp/bigtable_chunk_client.go
@@ -0,0 +1,156 @@
+package gcp
+
+import (
+	"context"
+	"fmt"
+
+	"cloud.google.com/go/bigtable"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+type bigtableChunkClient struct {
+	cfg       Config
+	schemaCfg chunk.SchemaConfig
+	client    *bigtable.Client
+}
+
+// NewBigtableChunkClient makes a new chunk.ChunkClient that stores chunks in
+// Bigtable.
+func NewBigtableChunkClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	if err != nil {
+		return nil, err
+	}
+	return newBigtableChunkClient(cfg, schemaCfg, client), nil
+}
+
+func newBigtableChunkClient(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) chunk.ObjectClient {
+	return &bigtableChunkClient{
+		cfg:       cfg,
+		schemaCfg: schemaCfg,
+		client:    client,
+	}
+}
+
+func (s *bigtableChunkClient) Stop() {
+	s.client.Close()
+}
+
+func (s *bigtableChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	keys := map[string][]string{}
+	muts := map[string][]*bigtable.Mutation{}
+
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].ExternalKey()
+		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		keys[tableName] = append(keys[tableName], key)
+
+		mut := bigtable.NewMutation()
+		mut.Set(columnFamily, column, 0, buf)
+		muts[tableName] = append(muts[tableName], mut)
+	}
+
+	for tableName := range keys {
+		table := s.client.Open(tableName)
+		errs, err := table.ApplyBulk(ctx, keys[tableName], muts[tableName])
+		if err != nil {
+			return err
+		}
+		for _, err := range errs {
+			if err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (s *bigtableChunkClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(input)))
+
+	chunks := map[string]map[string]chunk.Chunk{}
+	keys := map[string]bigtable.RowList{}
+	for _, c := range input {
+		tableName := s.schemaCfg.ChunkTableFor(c.From)
+		key := c.ExternalKey()
+		keys[tableName] = append(keys[tableName], key)
+		if _, ok := chunks[tableName]; !ok {
+			chunks[tableName] = map[string]chunk.Chunk{}
+		}
+		chunks[tableName][key] = c
+	}
+
+	outs := make(chan chunk.Chunk, len(input))
+	errs := make(chan error, len(input))
+
+	for tableName := range keys {
+		var (
+			table  = s.client.Open(tableName)
+			keys   = keys[tableName]
+			chunks = chunks[tableName]
+		)
+
+		for i := 0; i < len(keys); i += maxRowReads {
+			page := keys[i:util.Min(i+maxRowReads, len(keys))]
+			go func(page bigtable.RowList) {
+				decodeContext := chunk.NewDecodeContext()
+
+				var processingErr error
+				var recievedChunks = 0
+
+				// rows are returned in key order, not order in row list
+				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
+					chunk, ok := chunks[row.Key()]
+					if !ok {
+						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
+						return false
+					}
+
+					err := chunk.Decode(decodeContext, row[columnFamily][0].Value)
+					if err != nil {
+						processingErr = err
+						return false
+					}
+
+					recievedChunks++
+					outs <- chunk
+					return true
+				})
+
+				if processingErr != nil {
+					errs <- processingErr
+				} else if err != nil {
+					errs <- errors.WithStack(err)
+				} else if recievedChunks < len(page) {
+					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for Bigtable, received %d", len(page), recievedChunks))
+				}
+			}(page)
+		}
+	}
+
+	output := make([]chunk.Chunk, 0, len(input))
+	for i := 0; i < len(input); i++ {
+		select {
+		case c := <-outs:
+			output = append(output, c)
+		case err := <-errs:
+			return nil, err
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		}
+	}
+
+	return output, nil
+}
diff --git a/gcp/storage_client.go b/gcp/bigtable_index_client.go
similarity index 72%
rename from gcp/storage_client.go
rename to gcp/bigtable_index_client.go
index 1d1375101010d..570ce99d1a8db 100644
--- a/gcp/storage_client.go
+++ b/gcp/bigtable_index_client.go
@@ -23,20 +23,19 @@ const (
 	column       = "c"
 	separator    = "\000"
 	maxRowReads  = 100
+	null         = string('\xff')
 )
 
 // Config for a StorageClient
 type Config struct {
-	project  string
-	instance string
-
-	ColumnKey bool
+	Project  string `yaml:"project"`
+	Instance string `yaml:"instance"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.project, "bigtable.project", "", "Bigtable project ID.")
-	f.StringVar(&cfg.instance, "bigtable.instance", "", "Bigtable instance ID.")
+	f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.")
+	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
 }
 
 // storageClientColumnKey implements chunk.storageClient for GCP.
@@ -53,15 +52,15 @@ type storageClientV1 struct {
 }
 
 // NewStorageClientV1 returns a new v1 StorageClient.
-func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
-	return newStorageClientV1(cfg, client, schemaCfg), nil
+	return newStorageClientV1(cfg, schemaCfg, client), nil
 }
 
-func newStorageClientV1(cfg Config, client *bigtable.Client, schemaCfg chunk.SchemaConfig) *storageClientV1 {
+func newStorageClientV1(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) *storageClientV1 {
 	return &storageClientV1{
 		storageClientColumnKey{
 			cfg:       cfg,
@@ -76,16 +75,15 @@ func newStorageClientV1(cfg Config, client *bigtable.Client, schemaCfg chunk.Sch
 }
 
 // NewStorageClientColumnKey returns a new v2 StorageClient.
-func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
-
-	return newStorageClientColumnKey(cfg, client, schemaCfg), nil
+	return newStorageClientColumnKey(cfg, schemaCfg, client), nil
 }
 
-func newStorageClientColumnKey(cfg Config, client *bigtable.Client, schemaCfg chunk.SchemaConfig) *storageClientColumnKey {
+func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) *storageClientColumnKey {
 	return &storageClientColumnKey{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
@@ -238,8 +236,6 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 }
 
 func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	const null = string('\xff')
-
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -313,120 +309,6 @@ func (c *columnKeyIterator) Value() []byte {
 	return c.items[c.i].Value
 }
 
-func (s *storageClientColumnKey) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-	keys := map[string][]string{}
-	muts := map[string][]*bigtable.Mutation{}
-
-	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
-		if err != nil {
-			return err
-		}
-		key := chunks[i].ExternalKey()
-		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
-		keys[tableName] = append(keys[tableName], key)
-
-		mut := bigtable.NewMutation()
-		mut.Set(columnFamily, column, 0, buf)
-		muts[tableName] = append(muts[tableName], mut)
-	}
-
-	for tableName := range keys {
-		table := s.client.Open(tableName)
-		errs, err := table.ApplyBulk(ctx, keys[tableName], muts[tableName])
-		if err != nil {
-			return err
-		}
-		for _, err := range errs {
-			if err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
-
-func (s *storageClientColumnKey) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks requested", len(input)))
-
-	chunks := map[string]map[string]chunk.Chunk{}
-	keys := map[string]bigtable.RowList{}
-	for _, c := range input {
-		tableName := s.schemaCfg.ChunkTableFor(c.From)
-		key := c.ExternalKey()
-		keys[tableName] = append(keys[tableName], key)
-		if _, ok := chunks[tableName]; !ok {
-			chunks[tableName] = map[string]chunk.Chunk{}
-		}
-		chunks[tableName][key] = c
-	}
-
-	outs := make(chan chunk.Chunk, len(input))
-	errs := make(chan error, len(input))
-
-	for tableName := range keys {
-		var (
-			table  = s.client.Open(tableName)
-			keys   = keys[tableName]
-			chunks = chunks[tableName]
-		)
-
-		for i := 0; i < len(keys); i += maxRowReads {
-			page := keys[i:util.Min(i+maxRowReads, len(keys))]
-			go func(page bigtable.RowList) {
-				decodeContext := chunk.NewDecodeContext()
-
-				var processingErr error
-				var recievedChunks = 0
-
-				// rows are returned in key order, not order in row list
-				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
-					chunk, ok := chunks[row.Key()]
-					if !ok {
-						processingErr = errors.WithStack(fmt.Errorf("Got row for unknown chunk: %s", row.Key()))
-						return false
-					}
-
-					err := chunk.Decode(decodeContext, row[columnFamily][0].Value)
-					if err != nil {
-						processingErr = err
-						return false
-					}
-
-					recievedChunks++
-					outs <- chunk
-					return true
-				})
-
-				if processingErr != nil {
-					errs <- processingErr
-				} else if err != nil {
-					errs <- errors.WithStack(err)
-				} else if recievedChunks < len(page) {
-					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for Bigtable, received %d", len(page), recievedChunks))
-				}
-			}(page)
-		}
-	}
-
-	output := make([]chunk.Chunk, 0, len(input))
-	for i := 0; i < len(input); i++ {
-		select {
-		case c := <-outs:
-			output = append(output, c)
-		case err := <-errs:
-			return nil, err
-		case <-ctx.Done():
-			return nil, ctx.Err()
-		}
-	}
-
-	return output, nil
-}
-
 func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
 	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
 }
diff --git a/gcp/table_client.go b/gcp/table_client.go
index a7162a71e85e6..7e48b6a136344 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -17,7 +17,7 @@ type tableClient struct {
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	client, err := bigtable.NewAdminClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/schema_config.go b/schema_config.go
index 84d72bc3cc7ee..453ae83c7054e 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -26,7 +26,8 @@ const (
 type PeriodConfig struct {
 	From        model.Time          `yaml:"-"`              // used when working with config
 	FromStr     string              `yaml:"from,omitempty"` // used when loading from yaml
-	Store       string              `yaml:"store"`
+	IndexType   string              `yaml:"store"`          // type of index client to use.
+	ObjectType  string              `yaml:"object_store"`   // type of object client to use; if omitted, defaults to store.
 	Schema      string              `yaml:"schema"`
 	IndexTables PeriodicTableConfig `yaml:"index"`
 	ChunkTables PeriodicTableConfig `yaml:"chunks,omitempty"`
@@ -95,10 +96,10 @@ func (cfg *SchemaConfig) translate() error {
 
 	add := func(t string, f model.Time) {
 		cfg.Configs = append(cfg.Configs, PeriodConfig{
-			From:    f,
-			FromStr: f.Time().Format("2006-01-02"),
-			Schema:  t,
-			Store:   cfg.legacy.StorageClient,
+			From:      f,
+			FromStr:   f.Time().Format("2006-01-02"),
+			Schema:    t,
+			IndexType: cfg.legacy.StorageClient,
 			IndexTables: PeriodicTableConfig{
 				Prefix: cfg.legacy.OriginalTableName,
 				Tags:   cfg.legacy.IndexTables.Tags,
@@ -132,15 +133,15 @@ func (cfg *SchemaConfig) translate() error {
 	})
 	if cfg.legacy.ChunkTablesFrom.IsSet() {
 		cfg.ForEachAfter(cfg.legacy.ChunkTablesFrom.Time, func(config *PeriodConfig) {
-			if config.Store == "aws" {
-				config.Store = "aws-dynamo"
+			if config.IndexType == "aws" {
+				config.IndexType = "aws-dynamo"
 			}
 			config.ChunkTables = cfg.legacy.ChunkTables
 		})
 	}
 	if cfg.legacy.BigtableColumnKeyFrom.IsSet() {
 		cfg.ForEachAfter(cfg.legacy.BigtableColumnKeyFrom.Time, func(config *PeriodConfig) {
-			config.Store = "gcp-columnkey"
+			config.IndexType = "gcp-columnkey"
 		})
 	}
 	return nil
diff --git a/series_store.go b/series_store.go
index ac0612af2abe0..e7e809ea3d5bf 100644
--- a/series_store.go
+++ b/series_store.go
@@ -62,8 +62,8 @@ type seriesStore struct {
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, storage)
+func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, chunks)
 	if err != nil {
 		return nil, err
 	}
@@ -76,7 +76,8 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limit
 	return &seriesStore{
 		store: store{
 			cfg:     cfg,
-			storage: storage,
+			index:   index,
+			chunks:  chunks,
 			schema:  schema,
 			limits:  limits,
 			Fetcher: fetcher,
@@ -346,7 +347,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 		return err
 	}
 
-	if err := c.storage.BatchWrite(ctx, writeReqs); err != nil {
+	if err := c.index.BatchWrite(ctx, writeReqs); err != nil {
 		return err
 	}
 
@@ -395,7 +396,7 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 	indexEntriesPerChunk.Observe(float64(len(entries)))
 
 	// Remove duplicate entries based on tableName:hashValue:rangeValue
-	result := c.storage.NewWriteBatch()
+	result := c.index.NewWriteBatch()
 	for _, entry := range entries {
 		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
 		if _, ok := seenIndexEntries[key]; !ok {
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index b4d490ec1eff1..99736fba4665c 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -15,13 +15,13 @@ type fixture struct {
 }
 
 func (f fixture) Name() string { return "caching-store" }
-func (f fixture) Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error) {
-	storageClient, tableClient, schemaConfig, err := f.fixture.Clients()
-	client := newCachingStorageClient(storageClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
+func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	indexClient, chunkClient, tableClient, schemaConfig, err := f.fixture.Clients()
+	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		Size:     500,
 		Validity: 5 * time.Minute,
 	}), 5*time.Minute)
-	return client, tableClient, schemaConfig, err
+	return indexClient, chunkClient, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
 
diff --git a/storage/caching_storage_client.go b/storage/caching_index_client.go
similarity index 88%
rename from storage/caching_storage_client.go
rename to storage/caching_index_client.go
index cc09945b81ff0..605e601dc74e6 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_index_client.go
@@ -38,29 +38,29 @@ var (
 	})
 )
 
-type cachingStorageClient struct {
-	chunk.StorageClient
+type cachingIndexClient struct {
+	chunk.IndexClient
 	cache    cache.Cache
 	validity time.Duration
 }
 
-func newCachingStorageClient(client chunk.StorageClient, c cache.Cache, validity time.Duration) chunk.StorageClient {
+func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration) chunk.IndexClient {
 	if c == nil {
 		return client
 	}
 
-	return &cachingStorageClient{
-		StorageClient: client,
-		cache:         cache.NewSnappy(c),
-		validity:      validity,
+	return &cachingIndexClient{
+		IndexClient: client,
+		cache:       cache.NewSnappy(c),
+		validity:    validity,
 	}
 }
 
-func (s *cachingStorageClient) Stop() {
+func (s *cachingIndexClient) Stop() {
 	s.cache.Stop()
 }
 
-func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
 
@@ -106,7 +106,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		}
 	}
 
-	err := s.StorageClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
+	err := s.IndexClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
 		resultsMtx.Lock()
 		defer resultsMtx.Unlock()
 		key := queryKey(cacheableQuery)
@@ -174,7 +174,7 @@ func queryKey(q chunk.IndexQuery) string {
 	return q.TableName + sep + q.HashValue
 }
 
-func (s *cachingStorageClient) cacheStore(ctx context.Context, keys []string, batches []ReadBatch) {
+func (s *cachingIndexClient) cacheStore(ctx context.Context, keys []string, batches []ReadBatch) {
 	cachePuts.Add(float64(len(keys)))
 
 	// We're doing the hashing to handle unicode and key len properly.
@@ -196,7 +196,7 @@ func (s *cachingStorageClient) cacheStore(ctx context.Context, keys []string, ba
 	return
 }
 
-func (s *cachingStorageClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
+func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
 	cacheGets.Inc()
 
 	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
diff --git a/storage/caching_storage_client.proto b/storage/caching_index_client.proto
similarity index 100%
rename from storage/caching_storage_client.proto
rename to storage/caching_index_client.proto
diff --git a/storage/caching_storage_client_test.go b/storage/caching_index_client_test.go
similarity index 94%
rename from storage/caching_storage_client_test.go
rename to storage/caching_index_client_test.go
index a87e9910ffb9f..6f7417e647ec5 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_index_client_test.go
@@ -12,7 +12,7 @@ import (
 )
 
 type mockStore struct {
-	chunk.StorageClient
+	chunk.IndexClient
 	queries int
 	results ReadBatch
 }
@@ -35,7 +35,7 @@ func TestCachingStorageClientBasic(t *testing.T) {
 		},
 	}
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingStorageClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
 		HashValue: "baz",
@@ -64,7 +64,7 @@ func TestCachingStorageClient(t *testing.T) {
 		},
 	}
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingStorageClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
 		{TableName: "table", HashValue: "bar"},
@@ -99,7 +99,7 @@ func TestCachingStorageClient(t *testing.T) {
 func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	store := &mockStore{}
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingStorageClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
 	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		assert.False(t, batch.Iterator().Next())
@@ -135,7 +135,7 @@ func TestCachingStorageClientCollision(t *testing.T) {
 		},
 	}
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingStorageClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("baz")},
diff --git a/storage/factory.go b/storage/factory.go
index 36da1e6569b12..b31320cf8e618 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -22,6 +22,7 @@ import (
 type Config struct {
 	AWSStorageConfig       aws.StorageConfig
 	GCPStorageConfig       gcp.Config
+	GCSConfig              gcp.GCSConfig
 	CassandraStorageConfig cassandra.Config
 
 	IndexCacheSize     int
@@ -88,13 +89,22 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	stores := chunk.NewCompositeStore()
 
 	for _, s := range schemaCfg.Configs {
-		storage, err := nameToStorage(s.Store, cfg, schemaCfg)
+		index, err := NewIndexClient(s.IndexType, cfg, schemaCfg)
 		if err != nil {
-			return nil, errors.Wrap(err, "error creating storage client")
+			return nil, errors.Wrap(err, "error creating index client")
 		}
-		storage = newCachingStorageClient(storage, tieredCache, cfg.IndexCacheValidity)
+		index = newCachingIndexClient(index, tieredCache, cfg.IndexCacheValidity)
 
-		err = stores.AddPeriod(storeCfg, s, storage, limits)
+		objectStoreType := s.ObjectType
+		if objectStoreType == "" {
+			objectStoreType = s.IndexType
+		}
+		chunks, err := NewObjectClient(objectStoreType, cfg, schemaCfg)
+		if err != nil {
+			return nil, errors.Wrap(err, "error creating object client")
+		}
+
+		err = stores.AddPeriod(storeCfg, s, index, chunks, limits)
 		if err != nil {
 			return nil, err
 		}
@@ -103,13 +113,13 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	return stores, nil
 }
 
-func nameToStorage(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.StorageClient, error) {
+// NewIndexClient makes a new index client of the desired type.
+func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
 	switch name {
 	case "inmemory":
-		return chunk.NewMockStorage(), nil
-	case "aws":
-		return aws.NewS3StorageClient(cfg.AWSStorageConfig, schemaCfg)
-	case "aws-dynamo":
+		store := chunk.NewMockStorage()
+		return store, nil
+	case "aws", "aws-dynamo", "dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -117,15 +127,44 @@ func nameToStorage(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "gcp-columnkey":
+	case "gcp-columnkey", "bigtable":
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
+	}
+}
+
+// NewObjectClient makes a new ObjectClient of the desired types.
+func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+	switch name {
+	case "inmemory":
+		store := chunk.NewMockStorage()
+		return store, nil
+	case "aws", "s3":
+		return aws.NewS3ObjectClient(cfg.AWSStorageConfig, schemaCfg)
+	case "aws-dynamo", "dynamo":
+		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
+			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
+		}
+		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
+		if len(path) > 0 {
+			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
+		}
+		return aws.NewDynamoDBStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+	case "gcp":
+		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "gcp-columnkey", "bigtable":
+		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "cassandra":
+		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
 	}
-	return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
 }
 
 // NewTableClient makes a new table client based on the configuration.
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 10a41ba06a59b..7aed255ddbb09 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -21,12 +21,12 @@ func TestFactoryStop(t *testing.T) {
 	flagext.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
 	schemaConfig.Configs = []chunk.PeriodConfig{
 		{
-			From:  model.Time(0),
-			Store: "inmemory",
+			From:      model.Time(0),
+			IndexType: "inmemory",
 		},
 		{
-			From:  model.Time(1),
-			Store: "inmemory",
+			From:      model.Time(1),
+			IndexType: "inmemory",
 		},
 	}
 	cfg.memcacheClient.Host = "localhost" // Fake address that should at least resolve.
diff --git a/storage/index_test.go b/storage/index_client_test.go
similarity index 96%
rename from storage/index_test.go
rename to storage/index_client_test.go
index e0a9394c64f77..08291d73a3ef2 100644
--- a/storage/index_test.go
+++ b/storage/index_client_test.go
@@ -10,7 +10,7 @@ import (
 )
 
 func TestIndexBasic(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
 		// Write out 30 entries, into different hash and range values.
 		batch := client.NewWriteBatch()
 		for i := 0; i < 30; i++ {
@@ -97,7 +97,7 @@ var entries = []chunk.IndexEntry{
 }
 
 func TestQueryPages(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
 		batch := client.NewWriteBatch()
 		for _, entry := range entries {
 			batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
diff --git a/storage/storage_client_test.go b/storage/object_client_test.go
similarity index 94%
rename from storage/storage_client_test.go
rename to storage/object_client_test.go
index 69e5e49e262c5..e2316d6b0234f 100644
--- a/storage/storage_client_test.go
+++ b/storage/object_client_test.go
@@ -15,7 +15,7 @@ import (
 )
 
 func TestChunksBasic(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.StorageClient) {
+	forAllFixtures(t, func(t *testing.T, _ chunk.IndexClient, client chunk.ObjectClient) {
 		const batchSize = 50
 		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 		defer cancel()
diff --git a/storage/utils_test.go b/storage/utils_test.go
index d677a2f289ed8..8aad6a647e48d 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -16,10 +16,12 @@ const (
 	tableName = "test"
 )
 
-type storageClientTest func(*testing.T, chunk.StorageClient)
+type storageClientTest func(*testing.T, chunk.IndexClient, chunk.ObjectClient)
 
 func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
-	fixtures := append(aws.Fixtures, gcp.Fixtures...)
+	var fixtures []testutils.Fixture
+	fixtures = append(fixtures, aws.Fixtures...)
+	fixtures = append(fixtures, gcp.Fixtures...)
 	fixtures = append(fixtures, Fixtures...)
 
 	cassandraFixtures, err := cassandra.Fixtures()
@@ -28,11 +30,11 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 
 	for _, fixture := range fixtures {
 		t.Run(fixture.Name(), func(t *testing.T) {
-			storageClient, err := testutils.Setup(fixture, tableName)
+			indexClient, chunkClient, err := testutils.Setup(fixture, tableName)
 			require.NoError(t, err)
 			defer fixture.Teardown()
 
-			storageClientTest(t, storageClient)
+			storageClientTest(t, indexClient, chunkClient)
 		})
 	}
 }
diff --git a/storage_client.go b/storage_client.go
index 167924851e134..0448239db3c2e 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -2,8 +2,8 @@ package chunk
 
 import "context"
 
-// StorageClient is a client for the persistent storage for Cortex. (e.g. DynamoDB + S3).
-type StorageClient interface {
+// IndexClient is a client for the storage of the index (e.g. DynamoDB or Bigtable).
+type IndexClient interface {
 	Stop()
 
 	// For the write path.
@@ -12,8 +12,12 @@ type StorageClient interface {
 
 	// For the read path.
 	QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error
+}
+
+// ObjectClient is for storing and retrieving chunks.
+type ObjectClient interface {
+	Stop()
 
-	// For storing and retrieving chunks.
 	PutChunks(ctx context.Context, chunks []Chunk) error
 	GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error)
 }
diff --git a/testutils/testutils.go b/testutils/testutils.go
index dc2a7f9b7d642..768b180789736 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -19,33 +19,33 @@ const (
 // Fixture type for per-backend testing.
 type Fixture interface {
 	Name() string
-	Clients() (chunk.StorageClient, chunk.TableClient, chunk.SchemaConfig, error)
+	Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error)
 	Teardown() error
 }
 
 // Setup a fixture with initial tables
-func Setup(fixture Fixture, tableName string) (chunk.StorageClient, error) {
+func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectClient, error) {
 	var tbmConfig chunk.TableManagerConfig
 	flagext.DefaultValues(&tbmConfig)
-	storageClient, tableClient, schemaConfig, err := fixture.Clients()
+	indexClient, chunkClient, tableClient, schemaConfig, err := fixture.Clients()
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
 	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
 	err = tableManager.SyncTables(context.Background())
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
 	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
 		Name: tableName,
 	})
-	return storageClient, err
+	return indexClient, chunkClient, err
 }
 
 // CreateChunks creates some chunks for testing
diff --git a/util/parallel_chunk_fetch.go b/util/parallel_chunk_fetch.go
new file mode 100644
index 0000000000000..cf75317423c02
--- /dev/null
+++ b/util/parallel_chunk_fetch.go
@@ -0,0 +1,71 @@
+package util
+
+import (
+	"context"
+
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+const maxParallel = 1000
+
+// GetParallelChunks fetches chunks in parallel (up to maxParallel).
+func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context.Context, *chunk.DecodeContext, chunk.Chunk) (chunk.Chunk, error)) ([]chunk.Chunk, error) {
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	defer sp.Finish()
+	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
+
+	queuedChunks := make(chan chunk.Chunk)
+
+	go func() {
+		for _, c := range chunks {
+			queuedChunks <- c
+		}
+		close(queuedChunks)
+	}()
+
+	processedChunks := make(chan chunk.Chunk)
+	errors := make(chan error)
+
+	for i := 0; i < max(maxParallel, len(chunks)); i++ {
+		go func() {
+			decodeContext := chunk.NewDecodeContext()
+			for c := range queuedChunks {
+				c, err := f(ctx, decodeContext, c)
+				if err != nil {
+					errors <- err
+				} else {
+					processedChunks <- c
+				}
+			}
+		}()
+	}
+
+	var result = make([]chunk.Chunk, 0, len(chunks))
+	var lastErr error
+	for i := 0; i < len(chunks); i++ {
+		select {
+		case chunk := <-processedChunks:
+			result = append(result, chunk)
+		case err := <-errors:
+			lastErr = err
+		}
+	}
+
+	sp.LogFields(otlog.Int("chunks fetched", len(result)))
+	if lastErr != nil {
+		sp.LogFields(otlog.Error(lastErr))
+	}
+
+	// Return any chunks we did receive: a partial result may be useful
+	return result, lastErr
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}

From 4646f01e67f2b09c3d8768a645e50198ee07b9a7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 5 Dec 2018 10:59:01 +0000
Subject: [PATCH 240/660] Add BoltDB index client for local operation.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 local/boltdb_index_client.go | 228 +++++++++++++++++++++++++++++++++++
 local/boltdb_table_client.go |  32 +++++
 local/fixtures.go            |  69 +++++++++++
 storage/factory.go           |  15 ++-
 storage/utils_test.go        |   2 +
 5 files changed, 342 insertions(+), 4 deletions(-)
 create mode 100644 local/boltdb_index_client.go
 create mode 100644 local/boltdb_table_client.go
 create mode 100644 local/fixtures.go

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
new file mode 100644
index 0000000000000..796929968c7cc
--- /dev/null
+++ b/local/boltdb_index_client.go
@@ -0,0 +1,228 @@
+package local
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"path"
+	"sync"
+
+	"github.com/etcd-io/bbolt"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+var bucketName = []byte("index")
+
+const (
+	separator = "\000"
+	null      = string('\xff')
+)
+
+// BoltDBConfig for a BoltDB index client.
+type BoltDBConfig struct {
+	Directory string `yaml:"directory"`
+}
+
+// RegisterFlags registers flags.
+func (cfg *BoltDBConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Directory, "boltdb.dir", "", "Location of BoltDB index files.")
+}
+
+type boltIndexClient struct {
+	cfg BoltDBConfig
+
+	dbsMtx sync.RWMutex
+	dbs    map[string]*bolt.DB
+}
+
+// NewBoltDBIndexClient creates a new IndexClient that used BoltDB.
+func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
+	if err := ensureDirectory(cfg.Directory); err != nil {
+		return nil, err
+	}
+
+	return &boltIndexClient{
+		cfg: cfg,
+		dbs: map[string]*bolt.DB{},
+	}, nil
+}
+
+func (b *boltIndexClient) Stop() {
+	b.dbsMtx.Lock()
+	defer b.dbsMtx.Unlock()
+	for _, db := range b.dbs {
+		db.Close()
+	}
+}
+
+func (b *boltIndexClient) NewWriteBatch() chunk.WriteBatch {
+	return &boltWriteBatch{
+		tables: map[string]map[string][]byte{},
+	}
+}
+
+func (b *boltIndexClient) getDB(name string) (*bolt.DB, error) {
+	b.dbsMtx.RLock()
+	db, ok := b.dbs[name]
+	b.dbsMtx.RUnlock()
+	if ok {
+		return db, nil
+	}
+
+	b.dbsMtx.Lock()
+	defer b.dbsMtx.Unlock()
+	db, ok = b.dbs[name]
+	if ok {
+		return db, nil
+	}
+
+	// Open the database.
+	db, err := bolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	b.dbs[name] = db
+	return db, nil
+}
+
+func (b *boltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+	for table, kvps := range batch.(*boltWriteBatch).tables {
+		db, err := b.getDB(table)
+		if err != nil {
+			return err
+		}
+
+		if err := db.Update(func(tx *bolt.Tx) error {
+			b, err := tx.CreateBucketIfNotExists(bucketName)
+			if err != nil {
+				return err
+			}
+
+			for key, value := range kvps {
+				if err := b.Put([]byte(key), value); err != nil {
+					return err
+				}
+			}
+
+			return nil
+		}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *boltIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	return chunk_util.DoParallelQueries(ctx, b.query, queries, callback)
+}
+
+func (b *boltIndexClient) query(ctx context.Context, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
+	db, err := b.getDB(query.TableName)
+	if err != nil {
+		return err
+	}
+
+	var start []byte
+	if len(query.RangeValuePrefix) > 0 {
+		start = []byte(query.HashValue + separator + string(query.RangeValuePrefix))
+	} else if len(query.RangeValueStart) > 0 {
+		start = []byte(query.HashValue + separator + string(query.RangeValueStart))
+	} else {
+		start = []byte(query.HashValue + separator)
+	}
+
+	rowPrefix := []byte(query.HashValue + separator)
+
+	return db.View(func(tx *bolt.Tx) error {
+		b := tx.Bucket(bucketName)
+		if b == nil {
+			return nil
+		}
+
+		var batch boltReadBatch
+		c := b.Cursor()
+		for k, v := c.Seek(start); k != nil; k, v = c.Next() {
+			if len(query.ValueEqual) > 0 && !bytes.Equal(v, query.ValueEqual) {
+				continue
+			}
+
+			if len(query.RangeValuePrefix) > 0 && !bytes.HasPrefix(k, start) {
+				break
+			}
+
+			if !bytes.HasPrefix(k, rowPrefix) {
+				break
+			}
+
+			batch.rangeValue = k[len(rowPrefix):]
+			batch.value = v
+			if !callback(&batch) {
+				break
+			}
+		}
+
+		return nil
+	})
+}
+
+type boltWriteBatch struct {
+	tables map[string]map[string][]byte
+}
+
+func (b *boltWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	table, ok := b.tables[tableName]
+	if !ok {
+		table = map[string][]byte{}
+		b.tables[tableName] = table
+	}
+
+	key := hashValue + separator + string(rangeValue)
+	table[key] = value
+}
+
+type boltReadBatch struct {
+	rangeValue []byte
+	value      []byte
+}
+
+func (b boltReadBatch) Iterator() chunk.ReadBatchIterator {
+	return &boltReadBatchIterator{
+		boltReadBatch: b,
+	}
+}
+
+type boltReadBatchIterator struct {
+	consumed bool
+	boltReadBatch
+}
+
+func (b *boltReadBatchIterator) Next() bool {
+	if b.consumed {
+		return false
+	}
+	b.consumed = true
+	return true
+}
+
+func (b *boltReadBatchIterator) RangeValue() []byte {
+	return b.rangeValue
+}
+
+func (b *boltReadBatchIterator) Value() []byte {
+	return b.value
+}
+
+func ensureDirectory(dir string) error {
+	info, err := os.Stat(dir)
+	if os.IsNotExist(err) {
+		return os.MkdirAll(dir, 0777)
+	} else if err == nil && !info.IsDir() {
+		return fmt.Errorf("not a directory: %s", dir)
+	}
+	return err
+}
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
new file mode 100644
index 0000000000000..5abbba8a2d186
--- /dev/null
+++ b/local/boltdb_table_client.go
@@ -0,0 +1,32 @@
+package local
+
+import (
+	"context"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+type tableClient struct{}
+
+// NewTableClient returns a new TableClient.
+func NewTableClient() (chunk.TableClient, error) {
+	return &tableClient{}, nil
+}
+
+func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
+	return nil, nil
+}
+
+func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	return nil
+}
+
+func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
+	return chunk.TableDesc{
+		Name: name,
+	}, true, nil
+}
+
+func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
+	return nil
+}
diff --git a/local/fixtures.go b/local/fixtures.go
new file mode 100644
index 0000000000000..09a84e393ed92
--- /dev/null
+++ b/local/fixtures.go
@@ -0,0 +1,69 @@
+package local
+
+import (
+	"io/ioutil"
+	"os"
+	"time"
+
+	"github.com/prometheus/common/model"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+)
+
+type fixture struct {
+	name    string
+	dirname string
+}
+
+func (f *fixture) Name() string {
+	return f.name
+}
+
+func (f *fixture) Clients() (
+	indexClient chunk.IndexClient, chunkClient chunk.ObjectClient, tableClient chunk.TableClient,
+	schemaConfig chunk.SchemaConfig, err error,
+) {
+	f.dirname, err = ioutil.TempDir(os.TempDir(), "boltdb")
+	if err != nil {
+		return
+	}
+
+	indexClient, err = NewBoltDBIndexClient(BoltDBConfig{
+		Directory: f.dirname,
+	})
+	if err != nil {
+		return
+	}
+
+	chunkClient = chunk.NewMockStorage()
+
+	tableClient, err = NewTableClient()
+	if err != nil {
+		return
+	}
+
+	schemaConfig = chunk.SchemaConfig{
+		Configs: []chunk.PeriodConfig{{
+			IndexType: "boltdb",
+			From:      model.Now(),
+			ChunkTables: chunk.PeriodicTableConfig{
+				Prefix: "chunks",
+				Period: 10 * time.Minute,
+			},
+		}},
+	}
+
+	return
+}
+
+func (f *fixture) Teardown() error {
+	return os.RemoveAll(f.dirname)
+}
+
+// Fixtures for unit testing GCP storage.
+var Fixtures = []testutils.Fixture{
+	&fixture{
+		name: "boltdb",
+	},
+}
diff --git a/storage/factory.go b/storage/factory.go
index b31320cf8e618..298089c3448b9 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -12,6 +12,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/go-kit/kit/log/level"
@@ -20,10 +21,11 @@ import (
 
 // Config chooses which storage client to use.
 type Config struct {
-	AWSStorageConfig       aws.StorageConfig
-	GCPStorageConfig       gcp.Config
-	GCSConfig              gcp.GCSConfig
-	CassandraStorageConfig cassandra.Config
+	AWSStorageConfig       aws.StorageConfig  `yaml:"aws"`
+	GCPStorageConfig       gcp.Config         `yaml:"bigtable"`
+	GCSConfig              gcp.GCSConfig      `yaml:"gcs"`
+	CassandraStorageConfig cassandra.Config   `yaml:"cassandra"`
+	BoltDBConfig           local.BoltDBConfig `yaml:"boltdb"`
 
 	IndexCacheSize     int
 	IndexCacheValidity time.Duration
@@ -37,6 +39,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.AWSStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
+	cfg.BoltDBConfig.RegisterFlags(f)
 
 	// Deprecated flags!!
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
@@ -134,6 +137,8 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+	case "boltdb":
+		return local.NewBoltDBIndexClient(cfg.BoltDBConfig)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
 	}
@@ -182,6 +187,8 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
 	case "cassandra":
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
+	case "boltdb":
+		return local.NewTableClient()
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", name)
 	}
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 8aad6a647e48d..72638dd5f5192 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -7,6 +7,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/aws"
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/stretchr/testify/require"
 )
@@ -22,6 +23,7 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 	var fixtures []testutils.Fixture
 	fixtures = append(fixtures, aws.Fixtures...)
 	fixtures = append(fixtures, gcp.Fixtures...)
+	fixtures = append(fixtures, local.Fixtures...)
 	fixtures = append(fixtures, Fixtures...)
 
 	cassandraFixtures, err := cassandra.Fixtures()

From a6074d52cbdea3b697b2d4104793aa840c309f9e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 5 Dec 2018 11:01:13 +0000
Subject: [PATCH 241/660] Add GCS object client.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_chunk_client.go |  2 +-
 gcp/fixtures.go              | 57 +++++++++++++++-------
 gcp/gcs_chunk_client.go      | 93 ++++++++++++++++++++++++++++++++++++
 storage/factory.go           |  3 ++
 4 files changed, 138 insertions(+), 17 deletions(-)
 create mode 100644 gcp/gcs_chunk_client.go

diff --git a/gcp/bigtable_chunk_client.go b/gcp/bigtable_chunk_client.go
index c5830df155b38..67eab81c7dd6e 100644
--- a/gcp/bigtable_chunk_client.go
+++ b/gcp/bigtable_chunk_client.go
@@ -22,7 +22,7 @@ type bigtableChunkClient struct {
 // NewBigtableChunkClient makes a new chunk.ChunkClient that stores chunks in
 // Bigtable.
 func NewBigtableChunkClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.project, cfg.instance, instrumentation()...)
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 0f0b607eaa92b..1dc336205f008 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -6,6 +6,7 @@ import (
 
 	"cloud.google.com/go/bigtable"
 	"cloud.google.com/go/bigtable/bttest"
+	"github.com/fsouza/fake-gcs-server/fakestorage"
 	"github.com/prometheus/common/model"
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
@@ -19,9 +20,12 @@ const (
 )
 
 type fixture struct {
-	srv  *bttest.Server
+	btsrv  *bttest.Server
+	gcssrv *fakestorage.Server
+
 	name string
 
+	gcsChunkClient  bool
 	columnKeyClient bool
 }
 
@@ -30,15 +34,18 @@ func (f *fixture) Name() string {
 }
 
 func (f *fixture) Clients() (
-	sClient chunk.StorageClient, tClient chunk.TableClient,
+	iClient chunk.IndexClient, cClient chunk.ObjectClient, tClient chunk.TableClient,
 	schemaConfig chunk.SchemaConfig, err error,
 ) {
-	f.srv, err = bttest.NewServer("localhost:0")
+	f.btsrv, err = bttest.NewServer("localhost:0")
 	if err != nil {
 		return
 	}
 
-	conn, err := grpc.Dial(f.srv.Addr, grpc.WithInsecure())
+	f.gcssrv = fakestorage.NewServer(nil)
+	f.gcssrv.CreateBucket("chunks")
+
+	conn, err := grpc.Dial(f.btsrv.Addr, grpc.WithInsecure())
 	if err != nil {
 		return
 	}
@@ -49,15 +56,10 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	client, err := bigtable.NewClient(ctx, proj, instance, option.WithGRPCConn(conn))
-	if err != nil {
-		return
-	}
-
 	schemaConfig = chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{{
-			Store: "gcp",
-			From:  model.Now(),
+			IndexType: "gcp",
+			From:      model.Now(),
 			ChunkTables: chunk.PeriodicTableConfig{
 				Prefix: "chunks",
 				Period: 10 * time.Minute,
@@ -68,27 +70,50 @@ func (f *fixture) Clients() (
 		client: adminClient,
 	}
 
+	client, err := bigtable.NewClient(ctx, proj, instance, option.WithGRPCConn(conn))
+	if err != nil {
+		return
+	}
+
 	if f.columnKeyClient {
-		sClient = newStorageClientColumnKey(Config{}, client, schemaConfig)
+		iClient = newStorageClientColumnKey(Config{}, schemaConfig, client)
+	} else {
+		iClient = newStorageClientV1(Config{}, schemaConfig, client)
+	}
+
+	if f.gcsChunkClient {
+		cClient = newGCSChunkClient(GCSConfig{
+			BucketName: "chunks",
+		}, schemaConfig, f.gcssrv.Client())
 	} else {
-		sClient = newStorageClientV1(Config{}, client, schemaConfig)
+		cClient = newBigtableChunkClient(Config{}, schemaConfig, client)
 	}
 
 	return
 }
 
 func (f *fixture) Teardown() error {
-	f.srv.Close()
+	f.btsrv.Close()
+	f.gcssrv.Stop()
 	return nil
 }
 
 // Fixtures for unit testing GCP storage.
 var Fixtures = []testutils.Fixture{
 	&fixture{
-		name:            "GCP-ColumnKey",
+		name: "bigtable",
+	},
+	&fixture{
+		name:            "bigtable-columnkey",
 		columnKeyClient: true,
 	},
 	&fixture{
-		name: "GCPv1",
+		name:           "bigtable-gcs",
+		gcsChunkClient: true,
+	},
+	&fixture{
+		name:            "bigtable-columnkey-gcs",
+		gcsChunkClient:  true,
+		columnKeyClient: true,
 	},
 }
diff --git a/gcp/gcs_chunk_client.go b/gcp/gcs_chunk_client.go
new file mode 100644
index 0000000000000..94e57d82c4c69
--- /dev/null
+++ b/gcp/gcs_chunk_client.go
@@ -0,0 +1,93 @@
+package gcp
+
+import (
+	"context"
+	"flag"
+	"io/ioutil"
+
+	"cloud.google.com/go/storage"
+	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+type gcsChunkClient struct {
+	cfg       GCSConfig
+	schemaCfg chunk.SchemaConfig
+	client    *storage.Client
+	bucket    *storage.BucketHandle
+}
+
+// GCSConfig is config for the GCS Chunk Client.
+type GCSConfig struct {
+	BucketName string `yaml:"bucket_name"`
+}
+
+// RegisterFlags registers flags.
+func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.BucketName, "gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
+}
+
+// NewGCSChunkClient makes a new chunk.ChunkClient that writes chunks to GCS.
+func NewGCSChunkClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+	client, err := storage.NewClient(ctx, instrumentation()...)
+	if err != nil {
+		return nil, err
+	}
+	return newGCSChunkClient(cfg, schemaCfg, client), nil
+}
+
+func newGCSChunkClient(cfg GCSConfig, schemaCfg chunk.SchemaConfig, client *storage.Client) chunk.ObjectClient {
+	bucket := client.Bucket(cfg.BucketName)
+	return &gcsChunkClient{
+		cfg:       cfg,
+		schemaCfg: schemaCfg,
+		client:    client,
+		bucket:    bucket,
+	}
+}
+
+func (s *gcsChunkClient) Stop() {
+	s.client.Close()
+}
+
+func (s *gcsChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	for _, chunk := range chunks {
+		buf, err := chunk.Encode()
+		if err != nil {
+			return err
+		}
+		writer := s.bucket.Object(chunk.ExternalKey()).NewWriter(ctx)
+		if _, err := writer.Write(buf); err != nil {
+			return err
+		}
+		if err := writer.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (s *gcsChunkClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, input, s.getChunk)
+}
+
+func (s *gcsChunkClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+	reader, err := s.bucket.Object(input.ExternalKey()).NewReader(ctx)
+	if err != nil {
+		return chunk.Chunk{}, errors.WithStack(err)
+	}
+	defer reader.Close()
+
+	buf, err := ioutil.ReadAll(reader)
+	if err != nil {
+		return chunk.Chunk{}, errors.WithStack(err)
+	}
+
+	if err := input.Decode(decodeContext, buf); err != nil {
+		return chunk.Chunk{}, err
+	}
+
+	return input, nil
+}
diff --git a/storage/factory.go b/storage/factory.go
index 298089c3448b9..75678257cbfbb 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -38,6 +38,7 @@ type Config struct {
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.AWSStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
+	cfg.GCSConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 	cfg.BoltDBConfig.RegisterFlags(f)
 
@@ -165,6 +166,8 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":
 		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "gcs":
+		return gcp.NewGCSChunkClient(context.Background(), cfg.GCSConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	default:

From 51c84e12b5c3bc70c0f3fcab9d6a862101c3a2d0 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 10 Dec 2018 07:28:43 -0800
Subject: [PATCH 242/660] Add an object client which stores chunks as files in
 the filesystem.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 local/fixtures.go         |  7 +++-
 local/fs_object_client.go | 73 +++++++++++++++++++++++++++++++++++++++
 storage/factory.go        |  4 +++
 3 files changed, 83 insertions(+), 1 deletion(-)
 create mode 100644 local/fs_object_client.go

diff --git a/local/fixtures.go b/local/fixtures.go
index 09a84e393ed92..5bf81427edc28 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -36,7 +36,12 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	chunkClient = chunk.NewMockStorage()
+	chunkClient, err = NewFSObjectClient(FSConfig{
+		Directory: f.dirname,
+	})
+	if err != nil {
+		return
+	}
 
 	tableClient, err = NewTableClient()
 	if err != nil {
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
new file mode 100644
index 0000000000000..e21b23022a8db
--- /dev/null
+++ b/local/fs_object_client.go
@@ -0,0 +1,73 @@
+package local
+
+import (
+	"context"
+	"encoding/base64"
+	"flag"
+	"io/ioutil"
+	"path"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+// FSConfig is the config for a fsObjectClient.
+type FSConfig struct {
+	Directory string `yaml:"directory"`
+}
+
+// RegisterFlags registers flags.
+func (cfg *FSConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Directory, "local.chunk-directory", "", "Directory to store chunks in.")
+}
+
+type fsObjectClient struct {
+	cfg FSConfig
+}
+
+// NewFSObjectClient makes a chunk.ObjectClient which stores chunks as files in the local filesystem.
+func NewFSObjectClient(cfg FSConfig) (chunk.ObjectClient, error) {
+	if err := ensureDirectory(cfg.Directory); err != nil {
+		return nil, err
+	}
+
+	return &fsObjectClient{
+		cfg: cfg,
+	}, nil
+}
+
+func (fsObjectClient) Stop() {}
+
+func (f *fsObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) error {
+	for i := range chunks {
+		// Encode the chunk first - checksum is calculated as a side effect.
+		buf, err := chunks[i].Encode()
+		if err != nil {
+			return err
+		}
+
+		filename := base64.StdEncoding.EncodeToString([]byte(chunks[i].ExternalKey()))
+		if err := ioutil.WriteFile(path.Join(f.cfg.Directory, filename), buf, 0644); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (f *fsObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, chunks, f.getChunk)
+}
+
+func (f *fsObjectClient) getChunk(_ context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+	filename := base64.StdEncoding.EncodeToString([]byte(c.ExternalKey()))
+	buf, err := ioutil.ReadFile(path.Join(f.cfg.Directory, filename))
+	if err != nil {
+		return c, err
+	}
+
+	if err := c.Decode(decodeContext, buf); err != nil {
+		return c, err
+	}
+
+	return c, nil
+}
diff --git a/storage/factory.go b/storage/factory.go
index 75678257cbfbb..abe85981193b7 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -26,6 +26,7 @@ type Config struct {
 	GCSConfig              gcp.GCSConfig      `yaml:"gcs"`
 	CassandraStorageConfig cassandra.Config   `yaml:"cassandra"`
 	BoltDBConfig           local.BoltDBConfig `yaml:"boltdb"`
+	FSConfig               local.FSConfig     `yaml:"filesystem"`
 
 	IndexCacheSize     int
 	IndexCacheValidity time.Duration
@@ -41,6 +42,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.GCSConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 	cfg.BoltDBConfig.RegisterFlags(f)
+	cfg.FSConfig.RegisterFlags(f)
 
 	// Deprecated flags!!
 	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
@@ -170,6 +172,8 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		return gcp.NewGCSChunkClient(context.Background(), cfg.GCSConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+	case "filesystem":
+		return local.NewFSObjectClient(cfg.FSConfig)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
 	}

From 8bf9f648abc35b29f7cecbcc4081a0d25f497310 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 5 Dec 2018 12:38:03 +0000
Subject: [PATCH 243/660] Make the chunk test smaller.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/object_client_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/storage/object_client_test.go b/storage/object_client_test.go
index e2316d6b0234f..5982e7e87e0b2 100644
--- a/storage/object_client_test.go
+++ b/storage/object_client_test.go
@@ -16,13 +16,13 @@ import (
 
 func TestChunksBasic(t *testing.T) {
 	forAllFixtures(t, func(t *testing.T, _ chunk.IndexClient, client chunk.ObjectClient) {
-		const batchSize = 50
-		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		const batchSize = 5
+		ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 		defer cancel()
 
 		// Write a few batches of chunks.
 		written := []string{}
-		for i := 0; i < 50; i++ {
+		for i := 0; i < 5; i++ {
 			keys, chunks, err := testutils.CreateChunks(i, batchSize)
 			require.NoError(t, err)
 			written = append(written, keys...)

From 14160723973dfb7221102a42fa96a6748efb6d0f Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 3 Jan 2019 16:16:08 +0000
Subject: [PATCH 244/660] Don't export dynamoDBStorageClient, have two
 different funcions for constructing a {Index,Object}Client.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/dynamodb_storage_client.go      | 43 +++++++++++++++++------------
 aws/dynamodb_storage_client_test.go |  2 +-
 aws/fixtures.go                     |  4 +--
 aws/mock.go                         |  2 +-
 storage/factory.go                  |  4 +--
 5 files changed, 32 insertions(+), 23 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 31bd2545a3c81..05485e549dc5e 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -135,8 +135,7 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 }
 
-// DynamoDBStorageClient implements both chunk.IndexClient and chunk.ObjectClient for DynamoDB.
-type DynamoDBStorageClient struct {
+type dynamoDBStorageClient struct {
 	cfg       DynamoDBConfig
 	schemaCfg chunk.SchemaConfig
 
@@ -149,14 +148,24 @@ type DynamoDBStorageClient struct {
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
 
-// NewDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient.
-func NewDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*DynamoDBStorageClient, error) {
+// NewDynamoDBIndexClient makes a new DynamoDB-backed IndexClient.
+func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
+	return newDynamoDBStorageClient(cfg, schemaCfg)
+}
+
+// NewDynamoDBObjectClient makes a new DynamoDB-backed ObjectClient.
+func NewDynamoDBObjectClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+	return newDynamoDBStorageClient(cfg, schemaCfg)
+}
+
+// newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient.
+func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*dynamoDBStorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
 	}
 
-	client := &DynamoDBStorageClient{
+	client := &dynamoDBStorageClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		DynamoDB:  dynamoDB,
@@ -168,11 +177,11 @@ func NewDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig)
 }
 
 // Stop implements chunk.IndexClient.
-func (a DynamoDBStorageClient) Stop() {
+func (a dynamoDBStorageClient) Stop() {
 }
 
 // NewWriteBatch implements chunk.IndexClient.
-func (a DynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
+func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
@@ -198,7 +207,7 @@ func logRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
 // BatchWrite writes requests to the underlying storage, handling retries and backoff.
 // Structure is identical to getDynamoDBChunks(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a DynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
+func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.WriteBatch) error {
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
@@ -272,11 +281,11 @@ func (a DynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 }
 
 // QueryPages implements chunk.IndexClient.
-func (a DynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
+func (a dynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) bool) error {
 	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
 }
 
-func (a DynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
 	defer sp.Finish()
 
@@ -346,7 +355,7 @@ func (a DynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 	return nil
 }
 
-func (a DynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
+func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -392,19 +401,19 @@ type dynamoDBRequest interface {
 	Retryable() bool
 }
 
-func (a DynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+func (a dynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.QueryRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a DynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
+func (a dynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchGetItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
 }
 
-func (a DynamoDBStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
+func (a dynamoDBStorageClient) batchWriteItemRequest(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchWriteItemRequest(input)
 	req.SetContext(ctx)
 	return dynamoDBRequestAdapter{req}
@@ -451,7 +460,7 @@ type chunksPlusError struct {
 }
 
 // GetChunks implements chunk.ObjectClient.
-func (a DynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.DynamoDB")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
@@ -503,7 +512,7 @@ var placeholder = []byte{'c'}
 // Fetch a set of chunks from DynamoDB, handling retries and backoff.
 // Structure is identical to BatchWrite(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
-func (a DynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
 	defer sp.Finish()
 	outstanding := dynamoDBReadRequest{}
@@ -622,7 +631,7 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 }
 
 // PutChunks implements chunk.ObjectClient.
-func (a DynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
 		dynamoDBWrites = dynamoDBWriteBatch{}
 	)
diff --git a/aws/dynamodb_storage_client_test.go b/aws/dynamodb_storage_client_test.go
index 45f9ec7d10641..a169613d750f8 100644
--- a/aws/dynamodb_storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -19,7 +19,7 @@ func TestChunksPartialError(t *testing.T) {
 	_, client, err := testutils.Setup(fixture, tableName)
 	require.NoError(t, err)
 
-	sc, ok := client.(*DynamoDBStorageClient)
+	sc, ok := client.(*dynamoDBStorageClient)
 	if !ok {
 		t.Error("DynamoDB test client has unexpected type")
 		return
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 8128b620e68e3..e35027adc917a 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -37,7 +37,7 @@ var Fixtures = []testutils.Fixture{
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
-			index := &DynamoDBStorageClient{
+			index := &dynamoDBStorageClient{
 				DynamoDB:                dynamoDB,
 				queryRequestFn:          dynamoDB.queryRequest,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
@@ -74,7 +74,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
-			storage := &DynamoDBStorageClient{
+			storage := &dynamoDBStorageClient{
 				cfg: DynamoDBConfig{
 					ChunkGangSize:          gangsize,
 					ChunkGetMaxParallelism: maxParallelism,
diff --git a/aws/mock.go b/aws/mock.go
index df9c63112b763..149cad522308c 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -48,7 +48,7 @@ func newMockDynamoDB(unprocessed int, provisionedErr int) *mockDynamoDBClient {
 	}
 }
 
-func (a DynamoDBStorageClient) setErrorParameters(provisionedErr, errAfter int) {
+func (a dynamoDBStorageClient) setErrorParameters(provisionedErr, errAfter int) {
 	if m, ok := a.DynamoDB.(*mockDynamoDBClient); ok {
 		m.provisionedErr = provisionedErr
 		m.errAfter = errAfter
diff --git a/storage/factory.go b/storage/factory.go
index abe85981193b7..d2b7c29ea4e5c 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -133,7 +133,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBIndexClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":
@@ -163,7 +163,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBStorageClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":

From b3e47a329b79cdb361a6def9521515c16fa1d1c7 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 4 Jan 2019 15:05:26 +0000
Subject: [PATCH 245/660] s/ChunkClient/ObjectClient/

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cassandra/fixtures.go                         |  6 +++---
 ...nk_client.go => bigtable_object_client.go} | 18 ++++++++---------
 gcp/fixtures.go                               | 14 ++++++-------
 ...s_chunk_client.go => gcs_object_client.go} | 20 +++++++++----------
 local/fixtures.go                             |  4 ++--
 storage/caching_fixtures.go                   |  4 ++--
 storage/factory.go                            |  6 +++---
 storage/utils_test.go                         |  4 ++--
 testutils/testutils.go                        |  8 ++++----
 util/parallel_chunk_fetch.go                  |  2 +-
 10 files changed, 43 insertions(+), 43 deletions(-)
 rename gcp/{bigtable_chunk_client.go => bigtable_object_client.go} (83%)
 rename gcp/{gcs_chunk_client.go => gcs_object_client.go} (67%)

diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index 54b5ebae015c2..20ab38ca9b401 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -17,7 +17,7 @@ import (
 type fixture struct {
 	name         string
 	indexClient  chunk.IndexClient
-	chunkClient  chunk.ObjectClient
+	objectClient chunk.ObjectClient
 	tableClient  chunk.TableClient
 	schemaConfig chunk.SchemaConfig
 }
@@ -27,7 +27,7 @@ func (f fixture) Name() string {
 }
 
 func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
-	return f.indexClient, f.chunkClient, f.tableClient, f.schemaConfig, nil
+	return f.indexClient, f.objectClient, f.tableClient, f.schemaConfig, nil
 }
 
 func (f fixture) Teardown() error {
@@ -65,7 +65,7 @@ func Fixtures() ([]testutils.Fixture, error) {
 		fixture{
 			name:         "Cassandra",
 			indexClient:  storageClient,
-			chunkClient:  storageClient,
+			objectClient: storageClient,
 			tableClient:  tableClient,
 			schemaConfig: schemaConfig,
 		},
diff --git a/gcp/bigtable_chunk_client.go b/gcp/bigtable_object_client.go
similarity index 83%
rename from gcp/bigtable_chunk_client.go
rename to gcp/bigtable_object_client.go
index 67eab81c7dd6e..7235ccc7a7cc5 100644
--- a/gcp/bigtable_chunk_client.go
+++ b/gcp/bigtable_object_client.go
@@ -13,35 +13,35 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
-type bigtableChunkClient struct {
+type bigtableObjectClient struct {
 	cfg       Config
 	schemaCfg chunk.SchemaConfig
 	client    *bigtable.Client
 }
 
-// NewBigtableChunkClient makes a new chunk.ChunkClient that stores chunks in
+// NewBigtableObjectClient makes a new chunk.ObjectClient that stores chunks in
 // Bigtable.
-func NewBigtableChunkClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
 	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
-	return newBigtableChunkClient(cfg, schemaCfg, client), nil
+	return newBigtableObjectClient(cfg, schemaCfg, client), nil
 }
 
-func newBigtableChunkClient(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) chunk.ObjectClient {
-	return &bigtableChunkClient{
+func newBigtableObjectClient(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) chunk.ObjectClient {
+	return &bigtableObjectClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		client:    client,
 	}
 }
 
-func (s *bigtableChunkClient) Stop() {
+func (s *bigtableObjectClient) Stop() {
 	s.client.Close()
 }
 
-func (s *bigtableChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (s *bigtableObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	keys := map[string][]string{}
 	muts := map[string][]*bigtable.Mutation{}
 
@@ -75,7 +75,7 @@ func (s *bigtableChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chun
 	return nil
 }
 
-func (s *bigtableChunkClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(input)))
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 1dc336205f008..0ebc1b0bcb781 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -25,7 +25,7 @@ type fixture struct {
 
 	name string
 
-	gcsChunkClient  bool
+	gcsObjectClient bool
 	columnKeyClient bool
 }
 
@@ -81,12 +81,12 @@ func (f *fixture) Clients() (
 		iClient = newStorageClientV1(Config{}, schemaConfig, client)
 	}
 
-	if f.gcsChunkClient {
-		cClient = newGCSChunkClient(GCSConfig{
+	if f.gcsObjectClient {
+		cClient = newGCSObjectClient(GCSConfig{
 			BucketName: "chunks",
 		}, schemaConfig, f.gcssrv.Client())
 	} else {
-		cClient = newBigtableChunkClient(Config{}, schemaConfig, client)
+		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
 
 	return
@@ -108,12 +108,12 @@ var Fixtures = []testutils.Fixture{
 		columnKeyClient: true,
 	},
 	&fixture{
-		name:           "bigtable-gcs",
-		gcsChunkClient: true,
+		name:            "bigtable-gcs",
+		gcsObjectClient: true,
 	},
 	&fixture{
 		name:            "bigtable-columnkey-gcs",
-		gcsChunkClient:  true,
+		gcsObjectClient: true,
 		columnKeyClient: true,
 	},
 }
diff --git a/gcp/gcs_chunk_client.go b/gcp/gcs_object_client.go
similarity index 67%
rename from gcp/gcs_chunk_client.go
rename to gcp/gcs_object_client.go
index 94e57d82c4c69..ae43806556fd5 100644
--- a/gcp/gcs_chunk_client.go
+++ b/gcp/gcs_object_client.go
@@ -12,7 +12,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
-type gcsChunkClient struct {
+type gcsObjectClient struct {
 	cfg       GCSConfig
 	schemaCfg chunk.SchemaConfig
 	client    *storage.Client
@@ -29,18 +29,18 @@ func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.BucketName, "gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
 }
 
-// NewGCSChunkClient makes a new chunk.ChunkClient that writes chunks to GCS.
-func NewGCSChunkClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+// NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
+func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
 	client, err := storage.NewClient(ctx, instrumentation()...)
 	if err != nil {
 		return nil, err
 	}
-	return newGCSChunkClient(cfg, schemaCfg, client), nil
+	return newGCSObjectClient(cfg, schemaCfg, client), nil
 }
 
-func newGCSChunkClient(cfg GCSConfig, schemaCfg chunk.SchemaConfig, client *storage.Client) chunk.ObjectClient {
+func newGCSObjectClient(cfg GCSConfig, schemaCfg chunk.SchemaConfig, client *storage.Client) chunk.ObjectClient {
 	bucket := client.Bucket(cfg.BucketName)
-	return &gcsChunkClient{
+	return &gcsObjectClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		client:    client,
@@ -48,11 +48,11 @@ func newGCSChunkClient(cfg GCSConfig, schemaCfg chunk.SchemaConfig, client *stor
 	}
 }
 
-func (s *gcsChunkClient) Stop() {
+func (s *gcsObjectClient) Stop() {
 	s.client.Close()
 }
 
-func (s *gcsChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (s *gcsObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for _, chunk := range chunks {
 		buf, err := chunk.Encode()
 		if err != nil {
@@ -69,11 +69,11 @@ func (s *gcsChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) er
 	return nil
 }
 
-func (s *gcsChunkClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+func (s *gcsObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
 	return util.GetParallelChunks(ctx, input, s.getChunk)
 }
 
-func (s *gcsChunkClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+func (s *gcsObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
 	reader, err := s.bucket.Object(input.ExternalKey()).NewReader(ctx)
 	if err != nil {
 		return chunk.Chunk{}, errors.WithStack(err)
diff --git a/local/fixtures.go b/local/fixtures.go
index 5bf81427edc28..baf338d70c4eb 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -21,7 +21,7 @@ func (f *fixture) Name() string {
 }
 
 func (f *fixture) Clients() (
-	indexClient chunk.IndexClient, chunkClient chunk.ObjectClient, tableClient chunk.TableClient,
+	indexClient chunk.IndexClient, objectClient chunk.ObjectClient, tableClient chunk.TableClient,
 	schemaConfig chunk.SchemaConfig, err error,
 ) {
 	f.dirname, err = ioutil.TempDir(os.TempDir(), "boltdb")
@@ -36,7 +36,7 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	chunkClient, err = NewFSObjectClient(FSConfig{
+	objectClient, err = NewFSObjectClient(FSConfig{
 		Directory: f.dirname,
 	})
 	if err != nil {
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 99736fba4665c..cc36d5cb27d8f 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -16,12 +16,12 @@ type fixture struct {
 
 func (f fixture) Name() string { return "caching-store" }
 func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
-	indexClient, chunkClient, tableClient, schemaConfig, err := f.fixture.Clients()
+	indexClient, objectClient, tableClient, schemaConfig, err := f.fixture.Clients()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		Size:     500,
 		Validity: 5 * time.Minute,
 	}), 5*time.Minute)
-	return indexClient, chunkClient, tableClient, schemaConfig, err
+	return indexClient, objectClient, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
 
diff --git a/storage/factory.go b/storage/factory.go
index d2b7c29ea4e5c..265944a48ada3 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -165,11 +165,11 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		}
 		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
-		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":
-		return gcp.NewBigtableChunkClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
-		return gcp.NewGCSChunkClient(context.Background(), cfg.GCSConfig, schemaCfg)
+		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 72638dd5f5192..421d3ba54c3c2 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -32,11 +32,11 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 
 	for _, fixture := range fixtures {
 		t.Run(fixture.Name(), func(t *testing.T) {
-			indexClient, chunkClient, err := testutils.Setup(fixture, tableName)
+			indexClient, objectClient, err := testutils.Setup(fixture, tableName)
 			require.NoError(t, err)
 			defer fixture.Teardown()
 
-			storageClientTest(t, indexClient, chunkClient)
+			storageClientTest(t, indexClient, objectClient)
 		})
 	}
 }
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 768b180789736..27f33fbefec06 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -27,7 +27,7 @@ type Fixture interface {
 func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectClient, error) {
 	var tbmConfig chunk.TableManagerConfig
 	flagext.DefaultValues(&tbmConfig)
-	indexClient, chunkClient, tableClient, schemaConfig, err := fixture.Clients()
+	indexClient, objectClient, tableClient, schemaConfig, err := fixture.Clients()
 	if err != nil {
 		return nil, nil, err
 	}
@@ -45,7 +45,7 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectCl
 	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
 		Name: tableName,
 	})
-	return indexClient, chunkClient, err
+	return indexClient, objectClient, err
 }
 
 // CreateChunks creates some chunks for testing
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 
diff --git a/util/parallel_chunk_fetch.go b/util/parallel_chunk_fetch.go
index cf75317423c02..5b18578385fb6 100644
--- a/util/parallel_chunk_fetch.go
+++ b/util/parallel_chunk_fetch.go
@@ -13,7 +13,7 @@ const maxParallel = 1000
 
 // GetParallelChunks fetches chunks in parallel (up to maxParallel).
 func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context.Context, *chunk.DecodeContext, chunk.Chunk) (chunk.Chunk, error)) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks")
+	sp, ctx := ot.StartSpanFromContext(ctx, "GetParallelChunks")
 	defer sp.Finish()
 	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
 

From 43a6c84db581cd498d811ecb46f881a115208062 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 4 Jan 2019 15:17:58 +0000
Subject: [PATCH 246/660] Update vendored boltdb and add comments to explain
 the vendoring.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 local/boltdb_index_client.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 796929968c7cc..69ad4e6e1217d 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -36,7 +36,7 @@ type boltIndexClient struct {
 	cfg BoltDBConfig
 
 	dbsMtx sync.RWMutex
-	dbs    map[string]*bolt.DB
+	dbs    map[string]*bbolt.DB
 }
 
 // NewBoltDBIndexClient creates a new IndexClient that used BoltDB.
@@ -47,7 +47,7 @@ func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
 
 	return &boltIndexClient{
 		cfg: cfg,
-		dbs: map[string]*bolt.DB{},
+		dbs: map[string]*bbolt.DB{},
 	}, nil
 }
 
@@ -65,7 +65,7 @@ func (b *boltIndexClient) NewWriteBatch() chunk.WriteBatch {
 	}
 }
 
-func (b *boltIndexClient) getDB(name string) (*bolt.DB, error) {
+func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
 	b.dbsMtx.RLock()
 	db, ok := b.dbs[name]
 	b.dbsMtx.RUnlock()
@@ -81,7 +81,7 @@ func (b *boltIndexClient) getDB(name string) (*bolt.DB, error) {
 	}
 
 	// Open the database.
-	db, err := bolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
+	db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
 	if err != nil {
 		return nil, err
 	}
@@ -97,7 +97,7 @@ func (b *boltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch
 			return err
 		}
 
-		if err := db.Update(func(tx *bolt.Tx) error {
+		if err := db.Update(func(tx *bbolt.Tx) error {
 			b, err := tx.CreateBucketIfNotExists(bucketName)
 			if err != nil {
 				return err
@@ -138,7 +138,7 @@ func (b *boltIndexClient) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	rowPrefix := []byte(query.HashValue + separator)
 
-	return db.View(func(tx *bolt.Tx) error {
+	return db.View(func(tx *bbolt.Tx) error {
 		b := tx.Bucket(bucketName)
 		if b == nil {
 			return nil

From 2ae70337cee7900de6b7d5295facfd82bfec2d86 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 4 Jan 2019 15:19:25 +0000
Subject: [PATCH 247/660] lint.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 testutils/testutils.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/testutils/testutils.go b/testutils/testutils.go
index 27f33fbefec06..b3095884f6a93 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -70,8 +70,8 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+		"bar":  "baz",
+		"toms": "code",
 	})
 }
 

From b9b1459bb80b84c8fb653163db9a8411550ca9d7 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 4 Dec 2018 10:58:37 +0530
Subject: [PATCH 248/660] Make the max grpc receive message size configurable

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/bigtable_index_client.go | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 570ce99d1a8db..e959dc462f8b4 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -10,6 +10,8 @@ import (
 	"cloud.google.com/go/bigtable"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
+	"google.golang.org/api/option"
+	"google.golang.org/grpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
@@ -30,12 +32,17 @@ const (
 type Config struct {
 	Project  string `yaml:"project"`
 	Instance string `yaml:"instance"`
+
+	grpcMaxRecvMsgSize int
+
+	ColumnKey bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.")
 	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
+	f.IntVar(&cfg.grpcMaxRecvMsgSize, "bigtable.max-recv-msg-size", 100<<20, "Bigtable grpc max receive message size.")
 }
 
 // storageClientColumnKey implements chunk.storageClient for GCP.
@@ -53,7 +60,10 @@ type storageClientV1 struct {
 
 // NewStorageClientV1 returns a new v1 StorageClient.
 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
+	opts := instrumentation()
+	opts = append(opts, option.WithGRPCDialOption(grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(cfg.grpcMaxRecvMsgSize))))
+
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
 	}

From 75dcc9c3b58eaa743f7f78c7896bfb303bd3fb8a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 3 Jan 2019 13:15:27 +0000
Subject: [PATCH 249/660] Unify all the gRPC client options.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index e959dc462f8b4..15975b7594e26 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -11,11 +11,11 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"google.golang.org/api/option"
-	"google.golang.org/grpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/grpcclient"
 	"github.com/pkg/errors"
 )
 
@@ -33,7 +33,7 @@ type Config struct {
 	Project  string `yaml:"project"`
 	Instance string `yaml:"instance"`
 
-	grpcMaxRecvMsgSize int
+	GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"`
 
 	ColumnKey bool
 }
@@ -42,7 +42,11 @@ type Config struct {
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.")
 	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
-	f.IntVar(&cfg.grpcMaxRecvMsgSize, "bigtable.max-recv-msg-size", 100<<20, "Bigtable grpc max receive message size.")
+
+	cfg.GRPCClientConfig.RegisterFlags("bigtable", f)
+
+	// Deprecated.
+	f.Int("bigtable.max-recv-msg-size", 100<<20, "DEPRECATED. Bigtable grpc max receive message size.")
 }
 
 // storageClientColumnKey implements chunk.storageClient for GCP.
@@ -61,7 +65,7 @@ type storageClientV1 struct {
 // NewStorageClientV1 returns a new v1 StorageClient.
 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
 	opts := instrumentation()
-	opts = append(opts, option.WithGRPCDialOption(grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(cfg.grpcMaxRecvMsgSize))))
+	opts = append(opts, option.WithGRPCDialOption(cfg.GRPCClientConfig.DialOption()))
 
 	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {

From ab8d4f1473219931fa271473d6939c421be9c576 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 4 Jan 2019 16:55:06 +0000
Subject: [PATCH 250/660] Change chunkSize metric to include metadata overhead

The value of cortex_ingester_chunk_size_bytes will be a few hundred
bytes higher after this change, but far more useful.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/chunk.go b/chunk.go
index 78de06218beb5..9193f20877194 100644
--- a/chunk.go
+++ b/chunk.go
@@ -240,6 +240,11 @@ func (c *Chunk) Encode() ([]byte, error) {
 	return c.encoded, nil
 }
 
+// EncodedSize returns the number of bytes in the encoded data for this chunk
+func (c *Chunk) EncodedSize() int {
+	return len(c.encoded)
+}
+
 // DecodeContext holds data that can be re-used between decodes of different chunks
 type DecodeContext struct {
 	reader *snappy.Reader

From c2904a4a5536781db1ab12775780cc198cd3786d Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 7 Jan 2019 17:54:04 +0530
Subject: [PATCH 251/660] Address review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 schema.go                              |  4 ++--
 schema_caching.go                      | 12 ++++++------
 schema_caching_test.go                 |  6 +++---
 series_store.go                        |  5 +----
 storage/caching_storage_client.go      |  2 +-
 storage/caching_storage_client_test.go |  6 +++---
 6 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/schema.go b/schema.go
index a0ad0804b702a..37033156fd5f4 100644
--- a/schema.go
+++ b/schema.go
@@ -59,8 +59,8 @@ type IndexQuery struct {
 	// Filters for querying
 	ValueEqual []byte
 
-	// If the result of this lookup can be cached or not.
-	Cacheable bool
+	// If the result of this lookup is immutable or not (for caching).
+	Immutable bool
 }
 
 // IndexEntry describes an entry in the chunk index
diff --git a/schema_caching.go b/schema_caching.go
index e5f181d33486a..2444b0c458df5 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -7,13 +7,13 @@ import (
 	"github.com/weaveworks/common/mtime"
 )
 
-type cachingSchema struct {
+type schemaCaching struct {
 	Schema
 
 	cacheOlderThan time.Duration
 }
 
-func (s *cachingSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName)
@@ -29,7 +29,7 @@ func (s *cachingSchema) GetReadQueriesForMetric(from, through model.Time, userID
 	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
 }
 
-func (s *cachingSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName)
@@ -45,7 +45,7 @@ func (s *cachingSchema) GetReadQueriesForMetricLabel(from, through model.Time, u
 	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
 }
 
-func (s *cachingSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue)
@@ -62,7 +62,7 @@ func (s *cachingSchema) GetReadQueriesForMetricLabelValue(from, through model.Ti
 }
 
 // If the query resulted in series IDs, use this method to find chunks.
-func (s *cachingSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetChunksForSeries(cFrom, cThrough, userID, seriesID)
@@ -103,7 +103,7 @@ Outer:
 			}
 		}
 
-		cq.Cacheable = true
+		cq.Immutable = true
 		finalQueries = append(finalQueries, cq)
 	}
 
diff --git a/schema_caching_test.go b/schema_caching_test.go
index 08f23ac42137e..f197ba420898f 100644
--- a/schema_caching_test.go
+++ b/schema_caching_test.go
@@ -16,7 +16,7 @@ func TestCachingSchema(t *testing.T) {
 	)
 
 	dailyBuckets := makeSchema("v3")
-	schema := &cachingSchema{
+	schema := &schemaCaching{
 		Schema:         dailyBuckets,
 		cacheOlderThan: 24 * time.Hour,
 	}
@@ -66,9 +66,9 @@ func TestCachingSchema(t *testing.T) {
 
 		for i := range have {
 			if i <= tc.cacheableIdx {
-				require.True(t, have[i].Cacheable)
+				require.True(t, have[i].Immutable)
 			} else {
-				require.False(t, have[i].Cacheable)
+				require.False(t, have[i].Immutable)
 			}
 		}
 	}
diff --git a/series_store.go b/series_store.go
index 6ee08104a81f5..49aeb9a915077 100644
--- a/series_store.go
+++ b/series_store.go
@@ -5,7 +5,6 @@ import (
 	"encoding/hex"
 	"fmt"
 	"net/http"
-	"time"
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
@@ -61,8 +60,6 @@ type seriesStore struct {
 	cardinalityCache *cache.FifoCache
 
 	writeDedupeCache cache.Cache
-
-	cacheLookupsOlderThan time.Duration
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limits *validation.Overrides) (Store, error) {
@@ -77,7 +74,7 @@ func newSeriesStore(cfg StoreConfig, schema Schema, storage StorageClient, limit
 	}
 
 	if cfg.CacheLookupsOlderThan != 0 {
-		schema = &cachingSchema{
+		schema = &schemaCaching{
 			Schema:         schema,
 			cacheOlderThan: cfg.CacheLookupsOlderThan,
 		}
diff --git a/storage/caching_storage_client.go b/storage/caching_storage_client.go
index 742fa798eb245..410a94e63a358 100644
--- a/storage/caching_storage_client.go
+++ b/storage/caching_storage_client.go
@@ -107,7 +107,7 @@ func (s *cachingStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 		}
 
 		// If the query is cacheable forever, nil the expiry.
-		if queries[0].Cacheable {
+		if queries[0].Immutable {
 			rb.Expiry = 0
 		}
 
diff --git a/storage/caching_storage_client_test.go b/storage/caching_storage_client_test.go
index 71d81ba4d95de..6d3689ba53a62 100644
--- a/storage/caching_storage_client_test.go
+++ b/storage/caching_storage_client_test.go
@@ -122,9 +122,9 @@ func TestPermCachingStorageClient(t *testing.T) {
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
 	client := newCachingStorageClient(store, cache, 100*time.Millisecond)
 	queries := []chunk.IndexQuery{
-		{TableName: "table", HashValue: "foo", Cacheable: true},
-		{TableName: "table", HashValue: "bar", Cacheable: true},
-		{TableName: "table", HashValue: "baz", Cacheable: true},
+		{TableName: "table", HashValue: "foo", Immutable: true},
+		{TableName: "table", HashValue: "bar", Immutable: true},
+		{TableName: "table", HashValue: "baz", Immutable: true},
 	}
 	results := 0
 	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {

From 26d5cc3f5b6a4dfd7a6f91fbe7a7ae871c792c60 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 7 Jan 2019 19:54:14 +0000
Subject: [PATCH 252/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/factory.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/encoding/factory.go b/encoding/factory.go
index 92897acf87459..f9062d3adbb2f 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -97,7 +97,7 @@ func NewForEncoding(encoding Encoding) (Chunk, error) {
 
 // MustRegisterEncoding add a new chunk encoding.  There is no locking, so this
 // must be called in init().
-func MustRegisterEncoding(enc Encoding, name string, new func() Chunk) {
+func MustRegisterEncoding(enc Encoding, name string, f func() Chunk) {
 	_, ok := encodings[enc]
 	if ok {
 		panic("double register encoding")
@@ -105,6 +105,6 @@ func MustRegisterEncoding(enc Encoding, name string, new func() Chunk) {
 
 	encodings[enc] = encoding{
 		Name: name,
-		New:  new,
+		New:  f,
 	}
 }

From ca18d35f117ac389d3b5251627eb02c91e0b0347 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 8 Jan 2019 15:15:33 +0000
Subject: [PATCH 253/660] Revert "Change chunkSize metric to include metadata
 overhead"

This reverts commit 9c90a83d6e31f2bfc54d9fd2bc80a4423755b5cf.

Didn't work because the encoded data is not saved on the chunk passed
in to Put()

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/chunk.go b/chunk.go
index 9193f20877194..78de06218beb5 100644
--- a/chunk.go
+++ b/chunk.go
@@ -240,11 +240,6 @@ func (c *Chunk) Encode() ([]byte, error) {
 	return c.encoded, nil
 }
 
-// EncodedSize returns the number of bytes in the encoded data for this chunk
-func (c *Chunk) EncodedSize() int {
-	return len(c.encoded)
-}
-
 // DecodeContext holds data that can be re-used between decodes of different chunks
 type DecodeContext struct {
 	reader *snappy.Reader

From f3bb4837fea0c7f0ba35926385a3438d61ac1626 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 8 Jan 2019 15:44:02 +0000
Subject: [PATCH 254/660] Make chunk encoding an explicit operation

Remove all the scary comments and make it more obvious where encoding
happens

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go |  3 +--
 aws/s3_storage_client.go       |  3 +--
 cache/cache_test.go            |  4 +++-
 cassandra/storage_client.go    |  3 +--
 chunk.go                       | 25 +++++++++++++++----------
 chunk_store.go                 |  5 +----
 chunk_store_test.go            | 15 +++++++++++----
 chunk_store_utils.go           |  2 +-
 chunk_test.go                  | 10 +++++++---
 gcp/bigtable_object_client.go  |  3 +--
 gcp/gcs_object_client.go       |  2 +-
 inmemory_storage_client.go     |  2 +-
 local/fs_object_client.go      |  3 +--
 series_store.go                |  5 +----
 testutils/testutils.go         |  6 +-----
 15 files changed, 47 insertions(+), 44 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 05485e549dc5e..79f729732051e 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -637,8 +637,7 @@ func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 	)
 
 	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 439970c42e028..81a18254e8c8a 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -95,8 +95,7 @@ func (a s3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	)
 
 	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/cache/cache_test.go b/cache/cache_test.go
index cc72820be2797..ec1db08b01eaa 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -44,7 +44,9 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			ts.Add(chunkLen),
 		)
 
-		buf, err := c.Encode()
+		err := c.Encode()
+		require.NoError(t, err)
+		buf, err := c.Encoded()
 		require.NoError(t, err)
 
 		keys = append(keys, c.ExternalKey())
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index e39912069bed6..28df0c0b2bd8f 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -265,8 +265,7 @@ func (b *readBatchIter) Value() []byte {
 // PutChunks implements chunk.ObjectClient.
 func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return errors.WithStack(err)
 		}
diff --git a/chunk.go b/chunk.go
index 9193f20877194..5016e26c5f020 100644
--- a/chunk.go
+++ b/chunk.go
@@ -29,6 +29,7 @@ const (
 	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
 	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
 	ErrDataLength      = errs.Error("chunk data wrong length")
+	ErrNotEncoded      = errs.Error("Chunk not encoded")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -188,18 +189,14 @@ var writerPool = sync.Pool{
 	New: func() interface{} { return snappy.NewBufferedWriter(nil) },
 }
 
-// Encode writes the chunk out to a big write buffer, then calculates the checksum.
-func (c *Chunk) Encode() ([]byte, error) {
-	if c.encoded != nil {
-		return c.encoded, nil
-	}
-
+// Encode writes the chunk into a buffer, and calculates the checksum.
+func (c *Chunk) Encode() error {
 	var buf bytes.Buffer
 
 	// Write 4 empty bytes first - we will come back and put the len in here.
 	metadataLenBytes := [4]byte{}
 	if _, err := buf.Write(metadataLenBytes[:]); err != nil {
-		return nil, err
+		return err
 	}
 
 	// Encode chunk metadata into snappy-compressed buffer
@@ -208,7 +205,7 @@ func (c *Chunk) Encode() ([]byte, error) {
 	writer.Reset(&buf)
 	json := jsoniter.ConfigFastest
 	if err := json.NewEncoder(writer).Encode(c); err != nil {
-		return nil, err
+		return err
 	}
 	writer.Close()
 
@@ -221,12 +218,12 @@ func (c *Chunk) Encode() ([]byte, error) {
 	// Write another 4 empty bytes - we will come back and put the len in here.
 	dataLenBytes := [4]byte{}
 	if _, err := buf.Write(dataLenBytes[:]); err != nil {
-		return nil, err
+		return err
 	}
 
 	// And now the chunk data
 	if err := c.Data.Marshal(&buf); err != nil {
-		return nil, err
+		return err
 	}
 
 	// Now write the data len back into the buf.
@@ -237,6 +234,14 @@ func (c *Chunk) Encode() ([]byte, error) {
 	c.encoded = buf.Bytes()
 	c.ChecksumSet = true
 	c.Checksum = crc32.Checksum(c.encoded, castagnoliTable)
+	return nil
+}
+
+// Encoded returns the buffer created by Encoded()
+func (c *Chunk) Encoded() ([]byte, error) {
+	if c.encoded == nil {
+		return nil, errors.WithStack(ErrNotEncoded)
+	}
 	return c.encoded, nil
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index d33a509a178b8..2790e55cd7770 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -128,9 +128,6 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 		return err
 	}
 
-	// Horribly, PutChunks mutates the chunk by setting its checksum.  By putting
-	// the chunk in a slice we are in fact passing by reference, so below we
-	// need to make sure we pick the chunk back out the slice.
 	chunks := []Chunk{chunk}
 
 	err = c.storage.PutChunks(ctx, chunks)
@@ -140,7 +137,7 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 
 	c.writeBackCache(ctx, chunks)
 
-	writeReqs, err := c.calculateIndexEntries(userID, from, through, chunks[0])
+	writeReqs, err := c.calculateIndexEntries(userID, from, through, chunk)
 	if err != nil {
 		return err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index ccddf40755a4f..42e0d996d7a0b 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -430,8 +430,9 @@ func TestChunkStoreRandom(t *testing.T) {
 					ts,
 					ts.Add(chunkLen*time.Second),
 				)
-
-				err := store.Put(ctx, []Chunk{chunk})
+				err := chunk.Encode()
+				require.NoError(t, err)
+				err = store.Put(ctx, []Chunk{chunk})
 				require.NoError(t, err)
 			}
 
@@ -495,7 +496,9 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			ts.Add(chunkLen*time.Second),
 		)
 		t.Logf("Loop %d", i)
-		err := store.Put(ctx, []Chunk{chunk})
+		err := chunk.Encode()
+		require.NoError(t, err)
+		err = store.Put(ctx, []Chunk{chunk})
 		require.NoError(t, err)
 	}
 
@@ -545,12 +548,16 @@ func TestIndexCachingWorks(t *testing.T) {
 	storage := store.(CompositeStore).stores[0].Store.(*seriesStore).storage.(*MockStorage)
 
 	fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
-	err := store.Put(ctx, []Chunk{fooChunk1})
+	err := fooChunk1.Encode()
+	require.NoError(t, err)
+	err = store.Put(ctx, []Chunk{fooChunk1})
 	require.NoError(t, err)
 	n := storage.numWrites
 
 	// Only one extra entry for the new chunk of same series.
 	fooChunk2 := dummyChunkFor(model.Time(0).Add(30*time.Second), metric)
+	err = fooChunk2.Encode()
+	require.NoError(t, err)
 	err = store.Put(ctx, []Chunk{fooChunk2})
 	require.NoError(t, err)
 	require.Equal(t, n+1, storage.numWrites)
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index bfbe79f4b62b9..b61a2eaa60545 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -142,7 +142,7 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 	keys := make([]string, 0, len(chunks))
 	bufs := make([][]byte, 0, len(chunks))
 	for i := range chunks {
-		encoded, err := chunks[i].Encode()
+		encoded, err := chunks[i].Encoded()
 		// TODO don't fail, just log and conitnue?
 		if err != nil {
 			return err
diff --git a/chunk_test.go b/chunk_test.go
index 5324719a2410f..f1fd1cb3492bf 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -47,7 +47,7 @@ func dummyChunkForEncoding(now model.Time, metric model.Metric, enc encoding.Enc
 		now,
 	)
 	// Force checksum calculation.
-	_, err := chunk.Encode()
+	err := chunk.Encode()
 	if err != nil {
 		panic(err)
 	}
@@ -98,7 +98,9 @@ func TestChunkCodec(t *testing.T) {
 		},
 	} {
 		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
-			encoded, err := c.chunk.Encode()
+			err := c.chunk.Encode()
+			require.NoError(t, err)
+			encoded, err := c.chunk.Encoded()
 			require.NoError(t, err)
 
 			have, err := ParseExternalKey(userID, c.chunk.ExternalKey())
@@ -234,7 +236,9 @@ func BenchmarkDecode10000(b *testing.B) { benchmarkDecode(b, 10000) }
 
 func benchmarkDecode(b *testing.B, batchSize int) {
 	chunk := benchmarkChunk(model.Now())
-	buf, err := chunk.Encode()
+	err := chunk.Encode()
+	require.NoError(b, err)
+	buf, err := chunk.Encoded()
 	require.NoError(b, err)
 
 	b.ResetTimer()
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index 7235ccc7a7cc5..1dab5853fe704 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -46,8 +46,7 @@ func (s *bigtableObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 	muts := map[string][]*bigtable.Mutation{}
 
 	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index ae43806556fd5..1f621cf6f7547 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -54,7 +54,7 @@ func (s *gcsObjectClient) Stop() {
 
 func (s *gcsObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for _, chunk := range chunks {
-		buf, err := chunk.Encode()
+		buf, err := chunk.Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 000c6c072fa48..5339be94be022 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -266,7 +266,7 @@ func (m *MockStorage) PutChunks(_ context.Context, chunks []Chunk) error {
 	defer m.mtx.Unlock()
 
 	for i := range chunks {
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index e21b23022a8db..164e877b8628e 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -40,8 +40,7 @@ func (fsObjectClient) Stop() {}
 
 func (f *fsObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
-		// Encode the chunk first - checksum is calculated as a side effect.
-		buf, err := chunks[i].Encode()
+		buf, err := chunks[i].Encoded()
 		if err != nil {
 			return err
 		}
diff --git a/series_store.go b/series_store.go
index 9ae1b2dcb718d..a6c837c35afdb 100644
--- a/series_store.go
+++ b/series_store.go
@@ -337,9 +337,6 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 		return err
 	}
 
-	// Horribly, PutChunks mutates the chunk by setting its checksum.  By putting
-	// the chunk in a slice we are in fact passing by reference, so below we
-	// need to make sure we pick the chunk back out the slice.
 	chunks := []Chunk{chunk}
 
 	err = c.storage.PutChunks(ctx, chunks)
@@ -349,7 +346,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 
 	c.writeBackCache(ctx, chunks)
 
-	writeReqs, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunks[0])
+	writeReqs, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunk)
 	if err != nil {
 		return err
 	}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index b3095884f6a93..123c465257c3c 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -58,10 +58,6 @@ func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
 			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
 		})
 		chunks = append(chunks, chunk)
-		_, err := chunk.Encode() // Need to encode it, side effect calculates crc
-		if err != nil {
-			return nil, nil, err
-		}
 		keys = append(keys, chunk.ExternalKey())
 	}
 	return keys, chunks, nil
@@ -86,7 +82,7 @@ func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
 		now,
 	)
 	// Force checksum calculation.
-	_, err := chunk.Encode()
+	err := chunk.Encode()
 	if err != nil {
 		panic(err)
 	}

From 270c98f878dc581e901f792c81b8c095cbf17445 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@ea.com>
Date: Wed, 9 Jan 2019 21:50:36 -0800
Subject: [PATCH 255/660] - Added a ReadScale value for the table client and
 logic read autoscaling, based on what was done for write autoscaling - Added
 default read usage/error queries and functions - Included Unit tests for Read
 scaling - Adding command flags for configuring read scaling on dynamo - Added
 support for OnDemand Read/Write mode for dynamo - Added Unit Test for
 validating TableDesc generation when mixing DynamicIOMode and current
 autoscaling settings

Signed-off-by: Ken Haines <khaines@ea.com>
---
 aws/dynamodb_table_client.go      |  48 +++++-
 aws/dynamodb_table_client_test.go |  22 +++
 aws/metrics_autoscaling.go        | 269 +++++++++++++++++++++---------
 aws/metrics_autoscaling_test.go   | 250 +++++++++++++++++++++++++--
 schema_config.go                  |  52 +++++-
 table_client.go                   |  24 ++-
 table_manager.go                  |  36 +++-
 table_manager_test.go             | 105 ++++++++++++
 8 files changed, 684 insertions(+), 122 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index f9bc715723e55..6e8ebbb3066ce 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -212,6 +212,9 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 				if out.Table.TableStatus != nil {
 					isActive = (*out.Table.TableStatus == dynamodb.TableStatusActive)
 				}
+				if out.Table.BillingModeSummary != nil {
+					desc.UseOnDemandIOMode = *out.Table.BillingModeSummary.BillingMode == dynamodb.BillingModePayPerRequest
+				}
 				tableARN = out.Table.TableArn
 			}
 			return err
@@ -250,18 +253,53 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 			return err
 		}
 	}
-
-	if current.ProvisionedRead != expected.ProvisionedRead || current.ProvisionedWrite != expected.ProvisionedWrite {
+	level.Debug(util.Logger).Log("msg", "Updating Table",
+		"expectedWrite", expected.ProvisionedWrite,
+		"currentWrite", current.ProvisionedWrite,
+		"expectedRead", expected.ProvisionedRead,
+		"currentRead", current.ProvisionedRead,
+		"expectedOnDemandMode", expected.UseOnDemandIOMode,
+		"currentOnDemandMode", current.UseOnDemandIOMode)
+	if (current.ProvisionedRead != expected.ProvisionedRead ||
+		current.ProvisionedWrite != expected.ProvisionedWrite) &&
+		!expected.UseOnDemandIOMode {
 		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-				_, err := d.DynamoDB.UpdateTableWithContext(ctx, &dynamodb.UpdateTableInput{
-					TableName: aws.String(expected.Name),
+				var dynamoBillingMode string
+				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name),
 					ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
 						ReadCapacityUnits:  aws.Int64(expected.ProvisionedRead),
 						WriteCapacityUnits: aws.Int64(expected.ProvisionedWrite),
 					},
-				})
+				}
+				// we need this to be a separate check for the billing mode, as aws returns
+				// an error if we set a table to the billing mode it is currently on.
+				if current.UseOnDemandIOMode != expected.UseOnDemandIOMode {
+					dynamoBillingMode = dynamodb.BillingModeProvisioned
+					level.Info(util.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
+					updateTableInput.BillingMode = aws.String(dynamoBillingMode)
+				}
+
+				_, err := d.DynamoDB.UpdateTableWithContext(ctx, updateTableInput)
+				return err
+			})
+		}); err != nil {
+			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable")
+			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
+				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
+			} else {
+				return err
+			}
+		}
+	} else if expected.UseOnDemandIOMode && current.UseOnDemandIOMode != expected.UseOnDemandIOMode {
+		// moved the enabling of OnDemand mode to it's own block to reduce complexities & interactions with the various
+		// settings used in provisioned mode. Unfortunately the boilerplate wrappers for retry and tracking needed to be copied.
+		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+				level.Info(util.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
+				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name), BillingMode: aws.String(dynamodb.BillingModePayPerRequest)}
+				_, err := d.DynamoDB.UpdateTableWithContext(ctx, updateTableInput)
 				return err
 			})
 		}); err != nil {
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 65909e6f6041f..fe1c69b4baa4a 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -40,6 +40,17 @@ func fixtureWriteScale() chunk.AutoScalingConfig {
 	}
 }
 
+func fixtureReadScale() chunk.AutoScalingConfig {
+	return chunk.AutoScalingConfig{
+		Enabled:     true,
+		MinCapacity: 1,
+		MaxCapacity: 2000,
+		OutCooldown: 100,
+		InCooldown:  100,
+		TargetValue: 80.0,
+	}
+}
+
 func fixturePeriodicTableConfig(prefix string) chunk.PeriodicTableConfig {
 	return chunk.PeriodicTableConfig{
 		Prefix: prefix,
@@ -59,6 +70,17 @@ func fixtureProvisionConfig(inactLastN int64, writeScale, inactWriteScale chunk.
 	}
 }
 
+func fixtureReadProvisionConfig(readScale, inactReadScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
+	return chunk.ProvisionConfig{
+		ProvisionedWriteThroughput: write,
+		ProvisionedReadThroughput:  read,
+		InactiveWriteThroughput:    inactiveWrite,
+		InactiveReadThroughput:     inactiveRead,
+		ReadScale:                  readScale,
+		InactiveReadScale:          inactReadScale,
+	}
+}
+
 func baseTable(name string, provisionedRead, provisionedWrite int64) []chunk.TableDesc {
 	return []chunk.TableDesc{
 		{
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index e4385a491b5f0..153bf1a344990 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -33,6 +33,10 @@ const (
 	// fetch write capacity usage per DynamoDB table
 	// use the rate over 15 minutes so we take a broad average
 	defaultUsageQuery = `sum(rate(cortex_dynamo_consumed_capacity_total{operation="DynamoDB.BatchWriteItem"}[15m])) by (table) > 0`
+	// use the read rate over 1hr so we take a broad average
+	defaultReadUsageQuery = `sum(rate(cortex_dynamo_consumed_capacity_total{operation="DynamoDB.QueryPages"}[1h])) by (table) > 0`
+	// fetch read error rate per DynamoDB table
+	defaultReadErrorQuery = `sum(increase(cortex_dynamo_failures_total{operation="DynamoDB.QueryPages",error="ProvisionedThroughputExceededException"}[1m])) by (table) > 0`
 )
 
 // MetricsAutoScalingConfig holds parameters to configure how it works
@@ -43,6 +47,8 @@ type MetricsAutoScalingConfig struct {
 	QueueLengthQuery string  // Promql query to fetch ingester queue length
 	ErrorRateQuery   string  // Promql query to fetch error rates per table
 	UsageQuery       string  // Promql query to fetch write capacity usage per table
+	ReadUsageQuery   string  // Promql query to fetch read usage per table
+	ReadErrorQuery   string  // Promql query to fetch read errors per table
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -53,16 +59,21 @@ func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.QueueLengthQuery, "metrics.queue-length-query", defaultQueueLenQuery, "query to fetch ingester queue length")
 	f.StringVar(&cfg.ErrorRateQuery, "metrics.error-rate-query", defaultErrorRateQuery, "query to fetch error rates per table")
 	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
+	f.StringVar(&cfg.ReadUsageQuery, "metrics.read-usage-query", defaultReadUsageQuery, "query to fetch read capacity usage per table")
+	f.StringVar(&cfg.ReadErrorQuery, "metrics.read-error-query", defaultReadErrorQuery, "query to fetch read errors per table")
 }
 
 type metricsData struct {
-	cfg              MetricsAutoScalingConfig
-	promAPI          promV1.API
-	promLastQuery    time.Time
-	tableLastUpdated map[string]time.Time
-	queueLengths     []float64
-	errorRates       map[string]float64
-	usageRates       map[string]float64
+	cfg                  MetricsAutoScalingConfig
+	promAPI              promV1.API
+	promLastQuery        time.Time
+	tableLastUpdated     map[string]time.Time
+	tableReadLastUpdated map[string]time.Time
+	queueLengths         []float64
+	errorRates           map[string]float64
+	usageRates           map[string]float64
+	usageReadRates       map[string]float64
+	readErrorRates       map[string]float64
 }
 
 func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
@@ -71,9 +82,10 @@ func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
 		return nil, err
 	}
 	return &metricsData{
-		promAPI:          promV1.NewAPI(client),
-		cfg:              cfg.Metrics,
-		tableLastUpdated: make(map[string]time.Time),
+		promAPI:              promV1.NewAPI(client),
+		cfg:                  cfg.Metrics,
+		tableLastUpdated:     make(map[string]time.Time),
+		tableReadLastUpdated: make(map[string]time.Time),
 	}, nil
 }
 
@@ -86,106 +98,183 @@ func (m *metricsData) DescribeTable(ctx context.Context, desc *chunk.TableDesc)
 }
 
 func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
-	// If we don't take explicit action, return the current provision as the expected provision
-	expected.ProvisionedWrite = current.ProvisionedWrite
 
-	if !expected.WriteScale.Enabled {
-		return nil
-	}
 	if err := m.update(ctx); err != nil {
 		return err
 	}
 
-	errorRate := m.errorRates[expected.Name]
-	usageRate := m.usageRates[expected.Name]
-
-	level.Info(util.Logger).Log("msg", "checking metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "errorRate", errorRate, "usageRate", usageRate)
-
-	switch {
-	case errorRate < errorFractionScaledown*float64(current.ProvisionedWrite) &&
-		m.queueLengths[2] < float64(m.cfg.TargetQueueLen)*targetScaledown:
-		// No big queue, low errors -> scale down
-		m.scaleDownWrite(current, expected, m.computeScaleDown(current, *expected), "metrics scale-down")
-	case errorRate == 0 &&
-		m.queueLengths[2] < m.queueLengths[1] && m.queueLengths[1] < m.queueLengths[0]:
-		// zero errors and falling queue -> scale down to current usage
-		m.scaleDownWrite(current, expected, m.computeScaleDown(current, *expected), "zero errors scale-down")
-	case errorRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
-		// Too big queue, some errors -> scale up
-		m.scaleUpWrite(current, expected, m.computeScaleUp(current, *expected), "metrics max queue scale-up")
-	case errorRate > 0 &&
-		m.queueLengths[2] > float64(m.cfg.TargetQueueLen) &&
-		m.queueLengths[2] > m.queueLengths[1] && m.queueLengths[1] > m.queueLengths[0]:
-		// Growing queue, some errors -> scale up
-		m.scaleUpWrite(current, expected, m.computeScaleUp(current, *expected), "metrics queue growing scale-up")
+	if expected.WriteScale.Enabled {
+		// default if no action is taken is to use the currently provisioned setting
+		expected.ProvisionedWrite = current.ProvisionedWrite
+
+		errorRate := m.errorRates[expected.Name]
+		usageRate := m.usageRates[expected.Name]
+
+		level.Info(util.Logger).Log("msg", "checking write metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "errorRate", errorRate, "usageRate", usageRate)
+
+		switch {
+		case errorRate < errorFractionScaledown*float64(current.ProvisionedWrite) &&
+			m.queueLengths[2] < float64(m.cfg.TargetQueueLen)*targetScaledown:
+			// No big queue, low errors -> scale down
+			expected.ProvisionedWrite = scaleDown(current.Name,
+				current.ProvisionedWrite,
+				expected.WriteScale.MinCapacity,
+				computeScaleDown(current.Name, m.usageRates, expected.WriteScale.TargetValue),
+				m.tableLastUpdated,
+				expected.WriteScale.InCooldown,
+				"metrics scale-down",
+				"write",
+				m.usageRates)
+		case errorRate == 0 &&
+			m.queueLengths[2] < m.queueLengths[1] && m.queueLengths[1] < m.queueLengths[0]:
+			// zero errors and falling queue -> scale down to current usage
+			expected.ProvisionedWrite = scaleDown(current.Name,
+				current.ProvisionedWrite,
+				expected.WriteScale.MinCapacity,
+				computeScaleDown(current.Name, m.usageRates, expected.WriteScale.TargetValue),
+				m.tableLastUpdated,
+				expected.WriteScale.InCooldown,
+				"zero errors scale-down",
+				"write",
+				m.usageRates)
+		case errorRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
+			// Too big queue, some errors -> scale up
+			expected.ProvisionedWrite = scaleUp(current.Name,
+				current.ProvisionedWrite,
+				expected.WriteScale.MaxCapacity,
+				computeScaleUp(current.ProvisionedWrite, expected.WriteScale.MaxCapacity, m.cfg.ScaleUpFactor),
+				m.tableLastUpdated,
+				expected.WriteScale.OutCooldown,
+				"metrics max queue scale-up",
+				"write")
+		case errorRate > 0 &&
+			m.queueLengths[2] > float64(m.cfg.TargetQueueLen) &&
+			m.queueLengths[2] > m.queueLengths[1] && m.queueLengths[1] > m.queueLengths[0]:
+			// Growing queue, some errors -> scale up
+			expected.ProvisionedWrite = scaleUp(current.Name,
+				current.ProvisionedWrite,
+				expected.WriteScale.MaxCapacity,
+				computeScaleUp(current.ProvisionedWrite, expected.WriteScale.MaxCapacity, m.cfg.ScaleUpFactor),
+				m.tableLastUpdated,
+				expected.WriteScale.OutCooldown,
+				"metrics queue growing scale-up",
+				"write")
+		}
 	}
-	return nil
-}
 
-func (m metricsData) computeScaleDown(current, expected chunk.TableDesc) int64 {
-	usageRate := m.usageRates[expected.Name]
-	return int64(usageRate * 100.0 / expected.WriteScale.TargetValue)
+	if expected.ReadScale.Enabled {
+		// default if no action is taken is to use the currently provisioned setting
+		expected.ProvisionedRead = current.ProvisionedRead
+		readUsageRate := m.usageReadRates[expected.Name]
+		readErrorRate := m.readErrorRates[expected.Name]
+
+		level.Info(util.Logger).Log("msg", "checking read metrics", "table", current.Name, "errorRate", readErrorRate, "readUsageRate", readUsageRate)
+		// Read Scaling
+		switch {
+		// the table is at low/minimum capacity and it is being used -> scale up
+		case readUsageRate > 0 && current.ProvisionedRead < expected.ReadScale.MaxCapacity/10:
+			expected.ProvisionedRead = scaleUp(
+				current.Name,
+				current.ProvisionedRead,
+				expected.ReadScale.MaxCapacity,
+				computeScaleUp(current.ProvisionedRead, expected.ReadScale.MaxCapacity, m.cfg.ScaleUpFactor),
+				m.tableReadLastUpdated, expected.ReadScale.OutCooldown,
+				"table is being used. scale up",
+				"read")
+		case readErrorRate > 0 && readUsageRate > 0:
+			// Queries are causing read throttling on the table -> scale up
+			expected.ProvisionedRead = scaleUp(
+				current.Name,
+				current.ProvisionedRead,
+				expected.ReadScale.MaxCapacity,
+				computeScaleUp(current.ProvisionedRead, expected.ReadScale.MaxCapacity, m.cfg.ScaleUpFactor),
+				m.tableReadLastUpdated, expected.ReadScale.OutCooldown,
+				"table is in use and there are read throttle errors, scale up",
+				"read")
+		case readErrorRate == 0 && readUsageRate == 0:
+			// this table is not being used. -> scale down
+			expected.ProvisionedRead = scaleDown(current.Name,
+				current.ProvisionedRead,
+				expected.ReadScale.MinCapacity,
+				computeScaleDown(current.Name, m.usageReadRates, expected.ReadScale.TargetValue),
+				m.tableReadLastUpdated,
+				expected.ReadScale.InCooldown,
+				"table is not in use. scale down", "read",
+				nil)
+		}
+	}
+
+	return nil
 }
 
-func (m metricsData) computeScaleUp(current, expected chunk.TableDesc) int64 {
-	scaleUp := int64(float64(current.ProvisionedWrite) * m.cfg.ScaleUpFactor)
+func computeScaleUp(currentValue, maxValue int64, scaleFactor float64) int64 {
+	scaleUp := int64(float64(currentValue) * scaleFactor)
 	// Scale up minimum of 10% of max capacity, to avoid futzing around at low levels
-	minIncrement := expected.WriteScale.MaxCapacity / 10
-	if scaleUp < current.ProvisionedWrite+minIncrement {
-		scaleUp = current.ProvisionedWrite + minIncrement
+	minIncrement := maxValue / 10
+	if scaleUp < currentValue+minIncrement {
+		scaleUp = currentValue + minIncrement
 	}
 	return scaleUp
 }
 
-func (m *metricsData) scaleDownWrite(current chunk.TableDesc, expected *chunk.TableDesc, newWrite int64, msg string) {
-	if newWrite < expected.WriteScale.MinCapacity {
-		newWrite = expected.WriteScale.MinCapacity
+func computeScaleDown(currentName string, usageRates map[string]float64, targetValue float64) int64 {
+	usageRate := usageRates[currentName]
+	return int64(usageRate * 100.0 / targetValue)
+}
+
+func scaleDown(tableName string, currentValue, minValue int64, newValue int64, lastUpdated map[string]time.Time, coolDown int64, msg, operation string, usageRates map[string]float64) int64 {
+	if newValue < minValue {
+		newValue = minValue
 	}
 	// If we're already at or below the requested value, it's not a scale-down.
-	if newWrite >= current.ProvisionedWrite {
-		return
+	if newValue >= currentValue {
+		return currentValue
 	}
-	earliest := m.tableLastUpdated[current.Name].Add(time.Duration(expected.WriteScale.InCooldown) * time.Second)
+
+	earliest := lastUpdated[tableName].Add(time.Duration(coolDown) * time.Second)
 	if earliest.After(mtime.Now()) {
-		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", current.Name, "till", earliest)
-		return
+		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest, "op", operation)
+		return currentValue
 	}
+
 	// Reject a change that is less than 20% - AWS rate-limits scale-downs so save
 	// our chances until it makes a bigger difference
-	if newWrite > current.ProvisionedWrite*4/5 {
-		level.Info(util.Logger).Log("msg", "rejected de minimis "+msg, "table", current.Name, "current", current.ProvisionedWrite, "proposed", newWrite)
-		return
-	}
-	// Check that the ingesters seem to be doing some work - don't want to scale down
-	// if all our metrics are returning zero, or all the ingesters have crashed, etc
-	totalUsage := 0.0
-	for _, u := range m.usageRates {
-		totalUsage += u
+	if newValue > currentValue*4/5 {
+		level.Info(util.Logger).Log("msg", "rejected de minimis "+msg, "table", tableName, "current", currentValue, "proposed", newValue, "op", operation)
+		return currentValue
 	}
-	if totalUsage < minUsageForScaledown {
-		level.Info(util.Logger).Log("msg", "rejected low usage "+msg, "table", current.Name, "totalUsage", totalUsage)
-		return
+
+	if usageRates != nil {
+		// Check that the ingesters seem to be doing some work - don't want to scale down
+		// if all our metrics are returning zero, or all the ingesters have crashed, etc
+		totalUsage := 0.0
+		for _, u := range usageRates {
+			totalUsage += u
+		}
+		if totalUsage < minUsageForScaledown {
+			level.Info(util.Logger).Log("msg", "rejected low usage "+msg, "table", tableName, "totalUsage", totalUsage, "op", operation)
+			return currentValue
+		}
 	}
 
-	level.Info(util.Logger).Log("msg", msg, "table", current.Name, "write", newWrite)
-	expected.ProvisionedWrite = newWrite
-	m.tableLastUpdated[current.Name] = mtime.Now()
+	level.Info(util.Logger).Log("msg", msg, "table", tableName, operation, newValue)
+	lastUpdated[tableName] = mtime.Now()
+	return newValue
 }
 
-func (m *metricsData) scaleUpWrite(current chunk.TableDesc, expected *chunk.TableDesc, newWrite int64, msg string) {
-	if newWrite > expected.WriteScale.MaxCapacity {
-		newWrite = expected.WriteScale.MaxCapacity
-	}
-	earliest := m.tableLastUpdated[current.Name].Add(time.Duration(expected.WriteScale.OutCooldown) * time.Second)
-	if earliest.After(mtime.Now()) {
-		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", current.Name, "till", earliest)
-		return
+func scaleUp(tableName string, currentValue, maxValue int64, newValue int64, lastUpdated map[string]time.Time, coolDown int64, msg, operation string) int64 {
+	if newValue > maxValue {
+		newValue = maxValue
 	}
-	if newWrite > current.ProvisionedWrite {
-		level.Info(util.Logger).Log("msg", msg, "table", current.Name, "write", newWrite)
-		expected.ProvisionedWrite = newWrite
-		m.tableLastUpdated[current.Name] = mtime.Now()
+	earliest := lastUpdated[tableName].Add(time.Duration(coolDown) * time.Second)
+	if !earliest.After(mtime.Now()) && newValue > currentValue {
+		level.Info(util.Logger).Log("msg", msg, "table", tableName, operation, newValue)
+		lastUpdated[tableName] = mtime.Now()
+		return newValue
 	}
+
+	level.Info(util.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest)
+	return currentValue
 }
 
 func (m *metricsData) update(ctx context.Context) error {
@@ -225,6 +314,22 @@ func (m *metricsData) update(ctx context.Context) error {
 		return err
 	}
 
+	readUsageMatrix, err := promQuery(ctx, m.promAPI, m.cfg.ReadUsageQuery, 0, time.Second)
+	if err != nil {
+		return err
+	}
+	if m.usageReadRates, err = extractRates(readUsageMatrix); err != nil {
+		return err
+	}
+
+	readErrorMatrix, err := promQuery(ctx, m.promAPI, m.cfg.ReadErrorQuery, 0, time.Second)
+	if err != nil {
+		return err
+	}
+	if m.readErrorRates, err = extractRates(readErrorMatrix); err != nil {
+		return err
+	}
+
 	return nil
 }
 
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 23b881eb68ad7..72271ecec031a 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -72,63 +72,63 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			staticTable(0, read, write, read, write)...),
 	)
 
-	mockProm.SetResponse(0, 100000, 100000, []int{0, 0}, []int{100, 20})
+	mockProm.SetResponseForWrites(0, 100000, 100000, []int{0, 0}, []int{100, 20})
 	test(t, client, tableManager, "Queues but no errors",
 		startTime.Add(time.Minute*10),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, write, read, write)...), // - remain flat
 	)
 
-	mockProm.SetResponse(0, 120000, 100000, []int{100, 200}, []int{100, 20})
+	mockProm.SetResponseForWrites(0, 120000, 100000, []int{100, 200}, []int{100, 20})
 	test(t, client, tableManager, "Shrinking queues",
 		startTime.Add(time.Minute*20),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, write, read, write)...), //  - remain flat
 	)
 
-	mockProm.SetResponse(0, 120000, 200000, []int{100, 0}, []int{100, 20})
+	mockProm.SetResponseForWrites(0, 120000, 200000, []int{100, 0}, []int{100, 20})
 	test(t, client, tableManager, "Building queues",
 		startTime.Add(time.Minute*30),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 240, read, write)...), // - scale up index table
 	)
 
-	mockProm.SetResponse(0, 5000000, 5000000, []int{1, 0}, []int{100, 20})
+	mockProm.SetResponseForWrites(0, 5000000, 5000000, []int{1, 0}, []int{100, 20})
 	test(t, client, tableManager, "Large queues small errors",
 		startTime.Add(time.Minute*40),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 250, read, write)...), // - scale up index table
 	)
 
-	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{120, 40})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{120, 40})
 	test(t, client, tableManager, "No queues no errors",
 		startTime.Add(time.Minute*100),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 150, read, 50)...), // - scale down both tables
 	)
 
-	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{50, 10})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{50, 10})
 	test(t, client, tableManager, "in cooldown period",
 		startTime.Add(time.Minute*101),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 150, read, 50)...), // - no change; in cooldown period
 	)
 
-	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{90, 10})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{90, 10})
 	test(t, client, tableManager, "No queues no errors",
 		startTime.Add(time.Minute*200),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 112, read, 20)...), // - scale down both again
 	)
 
-	mockProm.SetResponse(0, 0, 0, []int{0, 0}, []int{50, 10})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{50, 10})
 	test(t, client, tableManager, "de minimis change",
 		startTime.Add(time.Minute*220),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 112, read, 20)...), // - should see no change
 	)
 
-	mockProm.SetResponse(0, 0, 0, []int{30, 30, 30, 30}, []int{50, 10, 100, 20})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{30, 30, 30, 30}, []int{50, 10, 100, 20})
 	test(t, client, tableManager, "Next week",
 		startTime.Add(tablePeriod),
 		// Nothing much happening - expect table 0 write rates to stay as-is and table 1 to be created with defaults
@@ -138,7 +138,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	// No errors on last week's index table, still some on chunk table
-	mockProm.SetResponse(0, 0, 0, []int{0, 30, 30, 30}, []int{10, 2, 100, 20})
+	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 30, 30, 30}, []int{10, 2, 100, 20})
 	test(t, client, tableManager, "Next week plus a bit",
 		startTime.Add(tablePeriod).Add(time.Minute*10),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
@@ -147,7 +147,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	// No errors on last week's tables but some queueing
-	mockProm.SetResponse(20000, 20000, 20000, []int{0, 0, 1, 1}, []int{0, 0, 100, 20})
+	mockProm.SetResponseForWrites(20000, 20000, 20000, []int{0, 0, 1, 1}, []int{0, 0, 100, 20})
 	test(t, client, tableManager, "Next week plus a bit",
 		startTime.Add(tablePeriod).Add(time.Minute*20),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
@@ -155,7 +155,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			staticTable(1, read, write, read, write)...),
 	)
 
-	mockProm.SetResponse(120000, 130000, 140000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
+	mockProm.SetResponseForWrites(120000, 130000, 140000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
 	test(t, client, tableManager, "next week, queues building, errors on index table",
 		startTime.Add(tablePeriod).Add(time.Minute*30),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
@@ -163,7 +163,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			staticTable(1, read, 240, read, write)...), // scale up index table
 	)
 
-	mockProm.SetResponse(140000, 130000, 120000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
+	mockProm.SetResponseForWrites(140000, 130000, 120000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
 	test(t, client, tableManager, "next week, queues shrinking, errors on index table",
 		startTime.Add(tablePeriod).Add(time.Minute*40),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
@@ -172,12 +172,121 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 }
 
+func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
+	dynamoDB := newMockDynamoDB(0, 0)
+	mockProm := mockPrometheus{}
+
+	client := dynamoTableClient{
+		DynamoDB: dynamoDB,
+		autoscale: &metricsData{
+			promAPI: &mockProm,
+			cfg: MetricsAutoScalingConfig{
+				TargetQueueLen: 100000,
+				ScaleUpFactor:  1.2,
+			},
+			tableLastUpdated:     make(map[string]time.Time),
+			tableReadLastUpdated: make(map[string]time.Time),
+		},
+	}
+
+	indexReadScale := fixtureReadScale()
+	chunkReadScale := fixtureReadScale()
+	inactiveReadScale := fixtureReadScale()
+	inactiveReadScale.MinCapacity = 5
+
+	// Set up table-manager config
+	cfg := chunk.SchemaConfig{
+		Configs: []chunk.PeriodConfig{
+			{
+				IndexType: "aws-dynamo",
+				IndexTables: chunk.PeriodicTableConfig{
+					Prefix: "a",
+				},
+			},
+			{
+				IndexType:   "aws-dynamo",
+				IndexTables: fixturePeriodicTableConfig(tablePrefix),
+				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
+			},
+		},
+	}
+	tbm := chunk.TableManagerConfig{
+		CreationGracePeriod: gracePeriod,
+		IndexTables:         fixtureReadProvisionConfig(indexReadScale, inactiveReadScale),
+		ChunkTables:         fixtureReadProvisionConfig(chunkReadScale, inactiveReadScale),
+	}
+
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create tables
+	startTime := time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod)
+
+	test(t, client, tableManager, "Create tables",
+		startTime,
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, read, write, read, write)...),
+	)
+
+	mockProm.SetResponseForReads([][]int{{0, 0}}, [][]int{{0, 0}})
+	test(t, client, tableManager, "No Query Usage",
+		startTime.Add(time.Minute*10),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 1, write, 1, write)...), // - remain flat
+	)
+
+	mockProm.SetResponseForReads([][]int{{10, 10}}, [][]int{{0, 0}})
+	test(t, client, tableManager, "Query Usage but no errors",
+		startTime.Add(time.Minute*20),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 201, write, 201, write)...), //  - less than 10% of max ... scale read on both
+	)
+
+	mockProm.SetResponseForReads([][]int{{11, 11}}, [][]int{{20, 0}})
+	test(t, client, tableManager, "Query Usage and throttling on index",
+		startTime.Add(time.Minute*30),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 401, write, 201, write)...), // - scale up index table read
+	)
+
+	mockProm.SetResponseForReads([][]int{{12, 12}}, [][]int{{20, 20}})
+	test(t, client, tableManager, "Query Usage and throttling on index plus chunk",
+		startTime.Add(time.Minute*40),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 601, write, 401, write)...), // - scale up index more and scale chunk a step
+	)
+
+	mockProm.SetResponseForReads([][]int{{13, 13}}, [][]int{{200, 200}})
+	test(t, client, tableManager, "in cooldown period",
+		startTime.Add(time.Minute*41),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 601, write, 401, write)...), // - no change; in cooldown period
+	)
+
+	mockProm.SetResponseForReads([][]int{{13, 13}}, [][]int{{0, 0}})
+	test(t, client, tableManager, "Sustained Query Usage",
+		startTime.Add(time.Minute*100),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 601, write, 401, write)...), // - errors have stopped, but usage continues so no scaling
+	)
+
+	mockProm.SetResponseForReads([][]int{{0, 0}}, [][]int{{0, 0}})
+	test(t, client, tableManager, "Query Usage has ended",
+		startTime.Add(time.Minute*200),
+		append(baseTable("a", inactiveRead, inactiveWrite),
+			staticTable(0, 1, write, 1, write)...), // - scale down to minimum... no usage at all
+	)
+
+}
+
 // Helper to return pre-canned results to Prometheus queries
 type mockPrometheus struct {
 	rangeValues []model.Value
 }
 
-func (m *mockPrometheus) SetResponse(q0, q1, q2 model.SampleValue, errorRates ...[]int) {
+func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, errorRates ...[]int) {
 	// Mock metrics from Prometheus
 	m.rangeValues = []model.Value{
 		// Queue lengths
@@ -204,6 +313,119 @@ func (m *mockPrometheus) SetResponse(q0, q1, q2 model.SampleValue, errorRates ..
 		}
 		m.rangeValues = append(m.rangeValues, errorMatrix)
 	}
+	// stub response for usage queries (not used in write tests)
+	for _, rates := range errorRates {
+		readUsageMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+
+			readUsageMatrix = append(readUsageMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: 0}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: 0}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, readUsageMatrix)
+	}
+	// stub response for usage error queries (not used in write tests)
+	for _, rates := range errorRates {
+		readErrorMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+
+			readErrorMatrix = append(readErrorMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: 0}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: 0}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, readErrorMatrix)
+	}
+}
+
+func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]int) {
+	// Mock metrics from Prometheus. In Read tests, these aren't used but must be
+	// filled out in a basic way for the underlying functions to get the right amount of prometheus results
+	m.rangeValues = []model.Value{
+		// Queue lengths ( not used)
+		model.Matrix{
+			&model.SampleStream{Values: []model.SamplePair{{Timestamp: 0, Value: 0},
+				{Timestamp: 15000, Value: 0},
+				{Timestamp: 30000, Value: 0}}},
+		},
+	}
+	// Error rates, for writes so not used in a read test. Here as a filler for the expected number of prom responses
+	for _, rates := range errorRates {
+		errorMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+			errorMatrix = append(errorMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(0)}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(0)}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, errorMatrix)
+	}
+	// usage rates, for writes so not used in a read test. Here as a filler for the expected number of prom responses
+	for _, rates := range errorRates {
+		errorMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+			errorMatrix = append(errorMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(0)}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(0)}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, errorMatrix)
+	}
+	// read usage metrics per table.
+	for _, rates := range usageRates {
+		readUsageMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+
+			readUsageMatrix = append(readUsageMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2])}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2+1])}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, readUsageMatrix)
+	}
+	// errors from read throttling, per table
+	for _, rates := range errorRates {
+		readErrorMatrix := model.Matrix{}
+		for i := 0; i < len(rates)/2; i++ {
+
+			readErrorMatrix = append(readErrorMatrix,
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2])}},
+				},
+				&model.SampleStream{
+					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", chunkTablePrefix, i))},
+					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2+1])}},
+				})
+		}
+		m.rangeValues = append(m.rangeValues, readErrorMatrix)
+	}
 }
 
 func (m *mockPrometheus) Query(ctx context.Context, query string, ts time.Time) (model.Value, error) {
diff --git a/schema_config.go b/schema_config.go
index 453ae83c7054e..9d7712e4b7902 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -3,6 +3,7 @@ package chunk
 import (
 	"flag"
 	"fmt"
+	"github.com/go-kit/kit/log/level"
 	"os"
 	"strconv"
 	"time"
@@ -343,6 +344,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		firstTable     = from.Unix() / periodSecs
 		lastTable      = through.Unix() / periodSecs
 		now            = mtime.Now().Unix()
+		nowWeek        = now / periodSecs
 		result         = []TableDesc{}
 	)
 	// If through ends on 00:00 of the day, don't include the upcoming day
@@ -352,23 +354,61 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 	for i := firstTable; i <= lastTable; i++ {
 		table := TableDesc{
 			// Name construction needs to be consistent with chunk_store.bigBuckets
-			Name:             cfg.Prefix + strconv.Itoa(int(i)),
-			ProvisionedRead:  pCfg.InactiveReadThroughput,
-			ProvisionedWrite: pCfg.InactiveWriteThroughput,
-			Tags:             cfg.Tags,
+			Name:              cfg.Prefix + strconv.Itoa(int(i)),
+			ProvisionedRead:   pCfg.InactiveReadThroughput,
+			ProvisionedWrite:  pCfg.InactiveWriteThroughput,
+			UseOnDemandIOMode: pCfg.InactiveThroughputOnDemandMode,
+			Tags:              cfg.Tags,
 		}
+		level.Debug(util.Logger).Log("msg", "Expected Table", "tableName", table.Name,
+			"provisionedRead", table.ProvisionedRead,
+			"provisionedWrite", table.ProvisionedWrite,
+			"useOnDemandMode", table.UseOnDemandIOMode,
+		)
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
 			table.ProvisionedRead = pCfg.ProvisionedReadThroughput
 			table.ProvisionedWrite = pCfg.ProvisionedWriteThroughput
+			table.UseOnDemandIOMode = pCfg.ProvisionedThroughputOnDemandMode
 
 			if pCfg.WriteScale.Enabled {
 				table.WriteScale = pCfg.WriteScale
+				table.UseOnDemandIOMode = false
 			}
-		} else if pCfg.InactiveWriteScale.Enabled && i >= (lastTable-pCfg.InactiveWriteScaleLastN) {
+
+			if pCfg.ReadScale.Enabled {
+				table.ReadScale = pCfg.ReadScale
+				table.UseOnDemandIOMode = false
+			}
+			level.Debug(util.Logger).Log("msg", "Table is Active",
+				"tableName", table.Name,
+				"provisionedRead", table.ProvisionedRead,
+				"provisionedWrite", table.ProvisionedWrite,
+				"useOnDemandMode", table.UseOnDemandIOMode,
+				"useWriteAutoScale", table.WriteScale.Enabled,
+				"useReadAutoScale", table.ReadScale.Enabled)
+
+		} else if pCfg.InactiveWriteScale.Enabled || pCfg.InactiveReadScale.Enabled {
 			// Autoscale last N tables
-			table.WriteScale = pCfg.InactiveWriteScale
+			// this is measured against "now", since the lastWeek is the final week in the schema config range
+			// the N last tables in that range will always be set to the inactive scaling settings.
+			if pCfg.InactiveWriteScale.Enabled && i >= (nowWeek-pCfg.InactiveWriteScaleLastN) {
+				table.WriteScale = pCfg.InactiveWriteScale
+				table.UseOnDemandIOMode = false
+			}
+			if pCfg.InactiveReadScale.Enabled && i >= (nowWeek-pCfg.InactiveReadScaleLastN) {
+				table.ReadScale = pCfg.InactiveReadScale
+				table.UseOnDemandIOMode = false
+			}
+
+			level.Debug(util.Logger).Log("msg", "Table is Inactive",
+				"tableName", table.Name,
+				"provisionedRead", table.ProvisionedRead,
+				"provisionedWrite", table.ProvisionedWrite,
+				"useOnDemandMode", table.UseOnDemandIOMode,
+				"useWriteAutoScale", table.WriteScale.Enabled,
+				"useReadAutoScale", table.ReadScale.Enabled)
 		}
 
 		result = append(result, table)
diff --git a/table_client.go b/table_client.go
index 24d175229b14a..9264afe45ff3b 100644
--- a/table_client.go
+++ b/table_client.go
@@ -12,11 +12,13 @@ type TableClient interface {
 
 // TableDesc describes a table.
 type TableDesc struct {
-	Name             string
-	ProvisionedRead  int64
-	ProvisionedWrite int64
-	Tags             Tags
-	WriteScale       AutoScalingConfig
+	Name              string
+	UseOnDemandIOMode bool
+	ProvisionedRead   int64
+	ProvisionedWrite  int64
+	Tags              Tags
+	WriteScale        AutoScalingConfig
+	ReadScale         AutoScalingConfig
 }
 
 // Equals returns true if other matches desc.
@@ -25,7 +27,12 @@ func (desc TableDesc) Equals(other TableDesc) bool {
 		return false
 	}
 
-	if desc.ProvisionedRead != other.ProvisionedRead {
+	if desc.ReadScale != other.ReadScale {
+		return false
+	}
+
+	// Only check provisioned read if auto scaling is disabled
+	if !desc.ReadScale.Enabled && desc.ProvisionedRead != other.ProvisionedRead {
 		return false
 	}
 
@@ -34,6 +41,11 @@ func (desc TableDesc) Equals(other TableDesc) bool {
 		return false
 	}
 
+	// if the billing mode needs updating
+	if desc.UseOnDemandIOMode != other.UseOnDemandIOMode {
+		return false
+	}
+
 	if !desc.Tags.Equals(other.Tags) {
 		return false
 	}
diff --git a/table_manager.go b/table_manager.go
index d49b32088477b..12e97399125b1 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -59,14 +59,19 @@ type TableManagerConfig struct {
 
 // ProvisionConfig holds config for provisioning capacity (on DynamoDB)
 type ProvisionConfig struct {
-	ProvisionedWriteThroughput int64
-	ProvisionedReadThroughput  int64
-	InactiveWriteThroughput    int64
-	InactiveReadThroughput     int64
+	ProvisionedThroughputOnDemandMode bool
+	ProvisionedWriteThroughput        int64
+	ProvisionedReadThroughput         int64
+	InactiveThroughputOnDemandMode    bool
+	InactiveWriteThroughput           int64
+	InactiveReadThroughput            int64
 
 	WriteScale              AutoScalingConfig
 	InactiveWriteScale      AutoScalingConfig
 	InactiveWriteScaleLastN int64
+	ReadScale               AutoScalingConfig
+	InactiveReadScale       AutoScalingConfig
+	InactiveReadScaleLastN  int64
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -83,12 +88,18 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
 	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
+	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand througput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
 	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
 	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
+	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand througput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
 
 	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
 	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
 	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
+
+	cfg.ReadScale.RegisterFlags(argPrefix+".read-throughput.scale", f)
+	cfg.InactiveReadScale.RegisterFlags(argPrefix+".inactive-read-throughput.scale", f)
+	f.Int64Var(&cfg.InactiveReadScaleLastN, argPrefix+".inactive-read-throughput.scale-last-n", 4, "Number of last inactive tables to enable read autoscale.")
 }
 
 // Tags is a string-string map that implements flag.Value.
@@ -223,10 +234,11 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 			}
 
 			table := TableDesc{
-				Name:             config.IndexTables.Prefix,
-				ProvisionedRead:  m.cfg.IndexTables.InactiveReadThroughput,
-				ProvisionedWrite: m.cfg.IndexTables.InactiveWriteThroughput,
-				Tags:             config.IndexTables.Tags,
+				Name:              config.IndexTables.Prefix,
+				ProvisionedRead:   m.cfg.IndexTables.InactiveReadThroughput,
+				ProvisionedWrite:  m.cfg.IndexTables.InactiveWriteThroughput,
+				UseOnDemandIOMode: m.cfg.IndexTables.InactiveThroughputOnDemandMode,
+				Tags:              config.IndexTables.Tags,
 			}
 			isActive := true
 			if i+1 < len(m.schemaCfg.Configs) {
@@ -238,14 +250,20 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 				)
 				if now >= endTime+gracePeriodSecs+maxChunkAgeSecs {
 					isActive = false
+
 				}
 			}
 			if isActive {
 				table.ProvisionedRead = m.cfg.IndexTables.ProvisionedReadThroughput
 				table.ProvisionedWrite = m.cfg.IndexTables.ProvisionedWriteThroughput
-
+				table.UseOnDemandIOMode = m.cfg.IndexTables.ProvisionedThroughputOnDemandMode
 				if m.cfg.IndexTables.WriteScale.Enabled {
 					table.WriteScale = m.cfg.IndexTables.WriteScale
+					table.UseOnDemandIOMode = false
+				}
+				if m.cfg.IndexTables.ReadScale.Enabled {
+					table.ReadScale = m.cfg.IndexTables.ReadScale
+					table.UseOnDemandIOMode = false
 				}
 			}
 			result = append(result, table)
diff --git a/table_manager_test.go b/table_manager_test.go
index 7e70b6f482743..347e112223148 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -365,6 +365,7 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	)
 
 	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
+
 	tmTest(t, client, tableManager,
 		"Move forward by table period + max chunk age + grace period",
 		weeklyTableStart.Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
@@ -378,6 +379,110 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	)
 }
 
+func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
+	client := newMockTableClient()
+
+	cfg := SchemaConfig{
+		Configs: []PeriodConfig{
+			{
+				From: model.TimeFromUnix(baseTableStart.Unix()),
+				IndexTables: PeriodicTableConfig{
+					Prefix: baseTableName,
+				},
+			},
+			{
+				From: model.TimeFromUnix(weeklyTableStart.Unix()),
+				IndexTables: PeriodicTableConfig{
+					Prefix: tablePrefix,
+					Period: tablePeriod,
+				},
+
+				ChunkTables: PeriodicTableConfig{
+					Prefix: chunkTablePrefix,
+					Period: tablePeriod,
+				},
+			},
+		},
+	}
+	tbmConfig := TableManagerConfig{
+		CreationGracePeriod: gracePeriod,
+		IndexTables: ProvisionConfig{
+			ProvisionedWriteThroughput:     write,
+			ProvisionedReadThroughput:      read,
+			InactiveWriteThroughput:        inactiveWrite,
+			InactiveReadThroughput:         inactiveRead,
+			InactiveWriteScale:             inactiveScalingConfig,
+			InactiveWriteScaleLastN:        1,
+			InactiveThroughputOnDemandMode: true,
+		},
+		ChunkTables: ProvisionConfig{
+			ProvisionedWriteThroughput:     write,
+			ProvisionedReadThroughput:      read,
+			InactiveWriteThroughput:        inactiveWrite,
+			InactiveReadThroughput:         inactiveRead,
+			InactiveThroughputOnDemandMode: true,
+		},
+	}
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check at time zero, we have the base table and one weekly table
+	tmTest(t, client, tableManager,
+		"Initial test",
+		weeklyTableStart,
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + grace period, check we still have provisioned throughput
+	tmTest(t, client, tableManager,
+		"Move forward by table period + grace period",
+		weeklyTableStart.Add(tablePeriod).Add(gracePeriod),
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// Fast forward table period + max chunk age + grace period, check we remove provisioned throughput
+	// Week 1 index table will not have dynamic mode enabled, since it has an active autoscale config for
+	// a managed provisioning mode. However the week 1 chunk table will flip to the DynamicIO mode.
+	tmTest(t, client, tableManager,
+		"Move forward by table period + max chunk age + grace period",
+		weeklyTableStart.Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig, UseOnDemandIOMode: false},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
+	// fast forward to another table period. Now week 1's dynamic mode will flip to true, as the managed autoscaling config is no longer active
+	tmTest(t, client, tableManager,
+		"Move forward by table period + max chunk age + grace period",
+		weeklyTableStart.Add(tablePeriod*2).Add(maxChunkAge).Add(gracePeriod),
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig, UseOnDemandIOMode: false},
+			{Name: tablePrefix + "5", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, UseOnDemandIOMode: true},
+			{Name: chunkTablePrefix + "5", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+}
+
 func TestTableManagerTags(t *testing.T) {
 	client := newMockTableClient()
 

From 932258227350d4e001c45d7f0017569c1dbde1a3 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@ea.com>
Date: Thu, 10 Jan 2019 14:02:53 -0800
Subject: [PATCH 256/660] Minor, yet breaking tweaks that were missed in the
 merging of #1133

Signed-off-by: Ken Haines <khaines@ea.com>
---
 aws/dynamodb_table_client.go | 2 +-
 local/boltdb_table_client.go | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 0eea1d599b118..bbb2d42b2ef6b 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -191,7 +191,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 
 func (d dynamoTableClient) DeleteTable(ctx context.Context, name string) error {
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.TimeRequestHistogram(ctx, "DynamoDB.DeleteTable", dynamoRequestDuration, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.DeleteTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &dynamodb.DeleteTableInput{TableName: aws.String(name)}
 			_, err := d.DynamoDB.DeleteTableWithContext(ctx, input)
 			if err != nil {
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index 5abbba8a2d186..72c568772fa41 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -21,6 +21,10 @@ func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 	return nil
 }
 
+func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
+	return nil
+}
+
 func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,

From f434dc4d084f8b6b15f78689e05a467a42180927 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 15 Jan 2019 09:03:43 +0000
Subject: [PATCH 257/660] Ensure that chunks are encoded

In case we forget to call Encode() earlier, do it when needed.
It's still a mistake to forget, but this way we just slow down rather
than generating a total failure.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/chunk.go b/chunk.go
index 5016e26c5f020..78b85d62b0fb2 100644
--- a/chunk.go
+++ b/chunk.go
@@ -29,7 +29,6 @@ const (
 	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
 	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
 	ErrDataLength      = errs.Error("chunk data wrong length")
-	ErrNotEncoded      = errs.Error("Chunk not encoded")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -240,7 +239,9 @@ func (c *Chunk) Encode() error {
 // Encoded returns the buffer created by Encoded()
 func (c *Chunk) Encoded() ([]byte, error) {
 	if c.encoded == nil {
-		return nil, errors.WithStack(ErrNotEncoded)
+		if err := c.Encode(); err != nil {
+			return nil, err
+		}
 	}
 	return c.encoded, nil
 }

From 263dc45cff4adffa0bd34b36ef974d746e18f22e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 16 Jan 2019 19:51:59 +0000
Subject: [PATCH 258/660] Remove tracing span for dynamoDB query

In most cases the spans for query() are one-for-one with the spans for
DynamoDB.QueryPages(), and when there are thousands of small queries
this just makes traces larger and harder to read.

To distinguish the case where a query has multiple pages from the case
where one page is retried, I added tags with the page number and retry
count.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 05485e549dc5e..c5b01208a8b07 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -286,9 +286,6 @@ func (a dynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 }
 
 func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
-	defer sp.Finish()
-
 	input := &dynamodb.QueryInput{
 		TableName: aws.String(query.TableName),
 		KeyConditions: map[string]*dynamodb.Condition{
@@ -337,7 +334,7 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 	for page := request; page != nil; page = page.NextPage() {
 		pageCount++
 
-		response, err := a.queryPage(ctx, input, page)
+		response, err := a.queryPage(ctx, input, page, query.HashValue, pageCount)
 		if err != nil {
 			return err
 		}
@@ -355,7 +352,7 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 	return nil
 }
 
-func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest) (*dynamoDBReadResponse, error) {
+func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
 	defer func() {
 		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
@@ -364,6 +361,12 @@ func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.Qu
 	var err error
 	for backoff.Ongoing() {
 		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(_ context.Context) error {
+			if sp := ot.SpanFromContext(ctx); sp != nil {
+				sp.SetTag("tableName", aws.StringValue(input.TableName))
+				sp.SetTag("hashValue", hashValue)
+				sp.SetTag("page", pageCount)
+				sp.SetTag("retry", backoff.NumRetries())
+			}
 			return page.Send()
 		})
 

From 685f8339272b1bcef26b9c4ae213f9fa5e13a997 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 18 Jan 2019 17:13:00 +0000
Subject: [PATCH 259/660] Pull span from the correct context, in
 dynamoDB.queryPage()

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 1733b9d220010..77f32106bfcf1 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -360,8 +360,8 @@ func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.Qu
 
 	var err error
 	for backoff.Ongoing() {
-		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(_ context.Context) error {
-			if sp := ot.SpanFromContext(ctx); sp != nil {
+		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
+			if sp := ot.SpanFromContext(innerCtx); sp != nil {
 				sp.SetTag("tableName", aws.StringValue(input.TableName))
 				sp.SetTag("hashValue", hashValue)
 				sp.SetTag("page", pageCount)

From 65e3f15c0975bfded1e8e5c82213d0d0778dcefc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 25 Jan 2019 10:34:47 +0000
Subject: [PATCH 260/660] Add a cap on bigChunk size

Overflow to another chunk if it goes above that size.
Only check the size when adding a new sub-chunk, to limit the cost of
checking.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 33adf364d9141..c5c5463c0d1e3 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"errors"
+	"flag"
 	"io"
 
 	"github.com/prometheus/common/model"
@@ -14,6 +15,16 @@ const samplesPerChunk = 120
 
 var errOutOfBounds = errors.New("out of bounds")
 
+// BigchunkConfig configures the behaviour of bigchunks
+type BigchunkConfig struct{}
+
+var bigchunkSizeCapBytes = 0
+
+// RegisterFlags registers configuration settings.
+func (BigchunkConfig) RegisterFlags(f *flag.FlagSet) {
+	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
+}
+
 // bigchunk is a set of prometheus/tsdb chunks.  It grows over time and has no
 // upperbound on number of samples it can contain.
 type bigchunk struct {
@@ -31,6 +42,9 @@ func newBigchunk() *bigchunk {
 
 func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 	if b.remainingSamples == 0 {
+		if bigchunkSizeCapBytes > 0 && b.Size() > bigchunkSizeCapBytes {
+			return addToOverflowChunk(b, sample)
+		}
 		if err := b.addNextChunk(sample.Timestamp); err != nil {
 			return nil, err
 		}

From a350d4172d0e22b8da42bcde2badfe36d28daf4c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 29 Jan 2019 10:23:52 +0000
Subject: [PATCH 261/660] Combine encoding configuration into one place

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 11 -----------
 encoding/factory.go  | 19 +++++++++++++++++--
 encoding/varbit.go   | 11 -----------
 3 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index c5c5463c0d1e3..a2de41de6925a 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -4,7 +4,6 @@ import (
 	"bytes"
 	"encoding/binary"
 	"errors"
-	"flag"
 	"io"
 
 	"github.com/prometheus/common/model"
@@ -15,16 +14,6 @@ const samplesPerChunk = 120
 
 var errOutOfBounds = errors.New("out of bounds")
 
-// BigchunkConfig configures the behaviour of bigchunks
-type BigchunkConfig struct{}
-
-var bigchunkSizeCapBytes = 0
-
-// RegisterFlags registers configuration settings.
-func (BigchunkConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
-}
-
 // bigchunk is a set of prometheus/tsdb chunks.  It grows over time and has no
 // upperbound on number of samples it can contain.
 type bigchunk struct {
diff --git a/encoding/factory.go b/encoding/factory.go
index f9062d3adbb2f..26b1308f627fd 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -1,6 +1,7 @@
 package encoding
 
 import (
+	"flag"
 	"fmt"
 	"strconv"
 )
@@ -8,8 +9,22 @@ import (
 // Encoding defines which encoding we are using, delta, doubledelta, or varbit
 type Encoding byte
 
-// DefaultEncoding can be changed via a flag.
-var DefaultEncoding = DoubleDelta
+// Config configures the behaviour of chunk encoding
+type Config struct{}
+
+var (
+	// DefaultEncoding exported for use in unit tests elsewhere
+	DefaultEncoding             = DoubleDelta
+	alwaysMarshalFullsizeChunks = true
+	bigchunkSizeCapBytes        = 0
+)
+
+// RegisterFlags registers configuration settings.
+func (Config) RegisterFlags(f *flag.FlagSet) {
+	f.Var(&DefaultEncoding, "ingester.chunk-encoding", "Encoding version to use for chunks.")
+	flag.BoolVar(&alwaysMarshalFullsizeChunks, "store.fullsize-chunks", alwaysMarshalFullsizeChunks, "When saving varbit chunks, pad to 1024 bytes")
+	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
+}
 
 // String implements flag.Value.
 func (e Encoding) String() string {
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 2b602065df6f6..6dfde5d1ca1ad 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -18,7 +18,6 @@ package encoding
 
 import (
 	"encoding/binary"
-	"flag"
 	"fmt"
 	"io"
 	"math"
@@ -315,16 +314,6 @@ func (c varbitChunk) Utilization() float64 {
 	return math.Min(float64(c.nextSampleOffset()/8+15)/float64(cap(c)), 1)
 }
 
-// MarshalConfig configures the behaviour of marshalling
-type MarshalConfig struct{}
-
-var alwaysMarshalFullsizeChunks = true
-
-// RegisterFlags registers configuration settings.
-func (MarshalConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.BoolVar(&alwaysMarshalFullsizeChunks, "store.fullsize-chunks", alwaysMarshalFullsizeChunks, "When saving varbit chunks, pad to 1024 bytes")
-}
-
 // marshalLen returns the number of bytes that should be marshalled for this chunk
 func (c varbitChunk) marshalLen() int {
 	bits := c.nextSampleOffset()

From b72e53fafc657e1b0d2e292479c31bbc2848c3d0 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 1 Feb 2019 10:31:52 +0000
Subject: [PATCH 262/660] More efficient storage of bigchunk chunks

Instead of three parallel slices, use a slice of structs.
Saves 2x24-byte slice headers per bigchunk, and some GC overhead.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 33adf364d9141..6f4d44a673e33 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -14,12 +14,16 @@ const samplesPerChunk = 120
 
 var errOutOfBounds = errors.New("out of bounds")
 
+type smallChunk struct {
+	chunkenc.Chunk
+	start int64
+	end   int64
+}
+
 // bigchunk is a set of prometheus/tsdb chunks.  It grows over time and has no
 // upperbound on number of samples it can contain.
 type bigchunk struct {
-	chunks []chunkenc.Chunk
-	starts []int64
-	ends   []int64
+	chunks []smallChunk
 
 	appender         chunkenc.Appender
 	remainingSamples int
@@ -38,7 +42,7 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
-	b.ends[len(b.ends)-1] = int64(sample.Timestamp)
+	b.chunks[len(b.chunks)-1].end = int64(sample.Timestamp)
 	return []Chunk{b}, nil
 }
 
@@ -47,14 +51,14 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 	// To save memory, we "compact" the previous chunk - the array backing the slice
 	// will be upto 2x too big, and we can save this space.
 	if l := len(b.chunks); l > 0 {
-		c := b.chunks[l-1]
+		c := b.chunks[l-1].Chunk
 		buf := make([]byte, len(c.Bytes()))
 		copy(buf, c.Bytes())
 		compacted, err := chunkenc.FromData(chunkenc.EncXOR, buf)
 		if err != nil {
 			return err
 		}
-		b.chunks[l-1] = compacted
+		b.chunks[l-1].Chunk = compacted
 	}
 
 	chunk := chunkenc.NewXORChunk()
@@ -63,9 +67,11 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 		return err
 	}
 
-	b.starts = append(b.starts, int64(start))
-	b.ends = append(b.ends, int64(start))
-	b.chunks = append(b.chunks, chunk)
+	b.chunks = append(b.chunks, smallChunk{
+		Chunk: chunk,
+		start: int64(start),
+		end:   int64(start),
+	})
 
 	b.appender = appender
 	b.remainingSamples = samplesPerChunk
@@ -101,7 +107,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 		return err
 	}
 
-	b.chunks = make([]chunkenc.Chunk, 0, numChunks)
+	b.chunks = make([]smallChunk, 0, numChunks)
 	for i := uint16(0); i < numChunks; i++ {
 		chunkLen, err := r.ReadUint16()
 		if err != nil {
@@ -123,9 +129,11 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 			return err
 		}
 
-		b.chunks = append(b.chunks, chunk)
-		b.starts = append(b.starts, start)
-		b.ends = append(b.ends, end)
+		b.chunks = append(b.chunks, smallChunk{
+			Chunk: chunk,
+			start: int64(start),
+			end:   int64(end),
+		})
 	}
 	return nil
 }
@@ -164,18 +172,16 @@ func (b *bigchunk) NewIterator() Iterator {
 func (b *bigchunk) Slice(start, end model.Time) Chunk {
 	i, j := 0, len(b.chunks)
 	for k := 0; k < len(b.chunks); k++ {
-		if b.ends[k] < int64(start) {
+		if b.chunks[k].end < int64(start) {
 			i = k + 1
 		}
-		if b.starts[k] > int64(end) {
+		if b.chunks[k].start > int64(end) {
 			j = k
 			break
 		}
 	}
 	return &bigchunk{
 		chunks: b.chunks[i:j],
-		starts: b.starts[i:j],
-		ends:   b.ends[i:j],
 	}
 }
 
@@ -227,9 +233,9 @@ func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
 
 	// If the seek is outside the current chunk, use the index to find the right
 	// chunk.
-	if int64(target) < it.starts[it.i] || int64(target) > it.ends[it.i] {
+	if int64(target) < it.chunks[it.i].start || int64(target) > it.chunks[it.i].end {
 		it.curr = nil
-		for it.i = 0; it.i < len(it.chunks) && int64(target) > it.ends[it.i]; it.i++ {
+		for it.i = 0; it.i < len(it.chunks) && int64(target) > it.chunks[it.i].end; it.i++ {
 		}
 	}
 

From b311ae723888999d8d36a751ddbfd312d8fc84d7 Mon Sep 17 00:00:00 2001
From: Adam Harrison <harrisonadamw@gmail.com>
Date: Fri, 1 Feb 2019 11:32:15 +0000
Subject: [PATCH 263/660] Protect against nil when dereferencing request
 Retryable field

The request Send function can return an error with nil Retryable.
Test the value of the field using the same helper function used
internally by the client library.

Signed-off-by: Adam Harrison <harrisonadamw@gmail.com>
---
 aws/dynamodb_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 77f32106bfcf1..507a1b8aa20b5 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -454,7 +454,7 @@ func (a dynamoDBRequestAdapter) HasNextPage() bool {
 }
 
 func (a dynamoDBRequestAdapter) Retryable() bool {
-	return *a.request.Retryable
+	return aws.BoolValue(a.request.Retryable)
 }
 
 type chunksPlusError struct {

From 728312f6b371d5e361df4c0e7476330d1f62a308 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 2 Feb 2019 10:58:10 +0100
Subject: [PATCH 264/660] Add options for client-side load balancing on all
 gRPC clients.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go  | 10 ++++------
 gcp/bigtable_object_client.go |  3 ++-
 gcp/gcs_object_client.go      |  2 +-
 gcp/instrumentation.go        | 33 +++++++++++++++++----------------
 gcp/table_client.go           |  3 ++-
 5 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 15975b7594e26..3b79fc5e077e3 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -10,7 +10,6 @@ import (
 	"cloud.google.com/go/bigtable"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
-	"google.golang.org/api/option"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
@@ -64,10 +63,8 @@ type storageClientV1 struct {
 
 // NewStorageClientV1 returns a new v1 StorageClient.
 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	opts := instrumentation()
-	opts = append(opts, option.WithGRPCDialOption(cfg.GRPCClientConfig.DialOption()))
-
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
+	opts := cfg.GRPCClientConfig.DialOption(instrumentation())
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toBigtableOpts(opts)...)
 	if err != nil {
 		return nil, err
 	}
@@ -90,7 +87,8 @@ func newStorageClientV1(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtab
 
 // NewStorageClientColumnKey returns a new v2 StorageClient.
 func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
+	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index 1dab5853fe704..383286a7ed42e 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -22,7 +22,8 @@ type bigtableObjectClient struct {
 // NewBigtableObjectClient makes a new chunk.ObjectClient that stores chunks in
 // Bigtable.
 func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
+	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 1f621cf6f7547..ca7d249dbf753 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -31,7 +31,7 @@ func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
 func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	client, err := storage.NewClient(ctx, instrumentation()...)
+	client, err := storage.NewClient(ctx)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 62b7b9e050ce9..40687a5be7a0c 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -1,9 +1,8 @@
 package gcp
 
 import (
-	"github.com/grpc-ecosystem/go-grpc-middleware"
 	otgrpc "github.com/opentracing-contrib/go-grpc"
-	"github.com/opentracing/opentracing-go"
+	opentracing "github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"google.golang.org/api/option"
@@ -22,19 +21,21 @@ var bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
 	Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
 }, []string{"operation", "status_code"})
 
-func instrumentation() []option.ClientOption {
-	return []option.ClientOption{
-		option.WithGRPCDialOption(
-			grpc.WithUnaryInterceptor(grpc_middleware.ChainUnaryClient(
-				otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()),
-				middleware.PrometheusGRPCUnaryInstrumentation(bigtableRequestDuration),
-			)),
-		),
-		option.WithGRPCDialOption(
-			grpc.WithStreamInterceptor(grpc_middleware.ChainStreamClient(
-				otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()),
-				middleware.PrometheusGRPCStreamInstrumentation(bigtableRequestDuration),
-			)),
-		),
+func instrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) {
+	return []grpc.UnaryClientInterceptor{
+			otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()),
+			middleware.PrometheusGRPCUnaryInstrumentation(bigtableRequestDuration),
+		},
+		[]grpc.StreamClientInterceptor{
+			otgrpc.OpenTracingStreamClientInterceptor(opentracing.GlobalTracer()),
+			middleware.PrometheusGRPCStreamInstrumentation(bigtableRequestDuration),
+		}
+}
+
+func toBigtableOpts(opts []grpc.DialOption) []option.ClientOption {
+	result := make([]option.ClientOption, 0, len(opts))
+	for _, opt := range opts {
+		result = append(result, option.WithGRPCDialOption(opt))
 	}
+	return result
 }
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 2a62c1b04216b..258e9c73fa2e3 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -17,7 +17,8 @@ type tableClient struct {
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, instrumentation()...)
+	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
 	}

From ecd173c223b91bddd45bd22ec0b3019a9090fb03 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 2 Feb 2019 11:29:06 +0100
Subject: [PATCH 265/660] Correctly instrument GCS client.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go  |  6 ++--
 gcp/bigtable_object_client.go |  2 +-
 gcp/gcs_object_client.go      |  7 +++-
 gcp/instrumentation.go        | 66 +++++++++++++++++++++++++++++------
 gcp/table_client.go           |  2 +-
 5 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 3b79fc5e077e3..9297bb1e98ca3 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -63,8 +63,8 @@ type storageClientV1 struct {
 
 // NewStorageClientV1 returns a new v1 StorageClient.
 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	opts := cfg.GRPCClientConfig.DialOption(instrumentation())
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toBigtableOpts(opts)...)
+	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -87,7 +87,7 @@ func newStorageClientV1(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtab
 
 // NewStorageClientColumnKey returns a new v2 StorageClient.
 func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
 	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index 383286a7ed42e..aa927133aff3d 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -22,7 +22,7 @@ type bigtableObjectClient struct {
 // NewBigtableObjectClient makes a new chunk.ObjectClient that stores chunks in
 // Bigtable.
 func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
 	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index ca7d249dbf753..34b1f256f385d 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -31,7 +31,12 @@ func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
 func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	client, err := storage.NewClient(ctx)
+	option, err := gcsInstrumentation(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	client, err := storage.NewClient(ctx, option)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 40687a5be7a0c..69347dd9d3f60 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -1,27 +1,45 @@
 package gcp
 
 import (
+	"context"
+	"net/http"
+	"strconv"
+	"time"
+
 	otgrpc "github.com/opentracing-contrib/go-grpc"
 	opentracing "github.com/opentracing/opentracing-go"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"google.golang.org/api/option"
+	google_http "google.golang.org/api/transport/http"
 	"google.golang.org/grpc"
 
 	"github.com/cortexproject/cortex/pkg/util/middleware"
 )
 
-var bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
-	Namespace: "cortex",
-	Name:      "bigtable_request_duration_seconds",
-	Help:      "Time spent doing Bigtable requests.",
+var (
+	bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "bigtable_request_duration_seconds",
+		Help:      "Time spent doing Bigtable requests.",
+
+		// Bigtable latency seems to range from a few ms to a few hundred ms and is
+		// important.  So use 6 buckets from 1ms to 1s.
+		Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
+	}, []string{"operation", "status_code"})
 
-	// Bigtable latency seems to range from a few ms to a few hundred ms and is
-	// important.  So use 6 buckets from 1ms to 1s.
-	Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
-}, []string{"operation", "status_code"})
+	gcsRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "gcs_request_duration_seconds",
+		Help:      "Time spent doing GCS requests.",
+
+		// Bigtable latency seems to range from a few ms to a few hundred ms and is
+		// important.  So use 6 buckets from 1ms to 1s.
+		Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
+	}, []string{"operation", "status_code"})
+)
 
-func instrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) {
+func bigtableInstrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterceptor) {
 	return []grpc.UnaryClientInterceptor{
 			otgrpc.OpenTracingClientInterceptor(opentracing.GlobalTracer()),
 			middleware.PrometheusGRPCUnaryInstrumentation(bigtableRequestDuration),
@@ -32,10 +50,38 @@ func instrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClientInterc
 		}
 }
 
-func toBigtableOpts(opts []grpc.DialOption) []option.ClientOption {
+func gcsInstrumentation(ctx context.Context) (option.ClientOption, error) {
+	transport, err := google_http.NewTransport(ctx, http.DefaultTransport)
+	if err != nil {
+		return nil, err
+	}
+	client := &http.Client{
+		Transport: instrumentedTransport{
+			observer: gcsRequestDuration,
+			next:     transport,
+		},
+	}
+	return option.WithHTTPClient(client), nil
+}
+
+func toOptions(opts []grpc.DialOption) []option.ClientOption {
 	result := make([]option.ClientOption, 0, len(opts))
 	for _, opt := range opts {
 		result = append(result, option.WithGRPCDialOption(opt))
 	}
 	return result
 }
+
+type instrumentedTransport struct {
+	observer prometheus.ObserverVec
+	next     http.RoundTripper
+}
+
+func (i instrumentedTransport) RoundTrip(req *http.Request) (*http.Response, error) {
+	start := time.Now()
+	resp, err := i.next.RoundTrip(req)
+	if err == nil {
+		i.observer.WithLabelValues(req.URL.Path, strconv.Itoa(resp.StatusCode)).Observe(time.Since(start).Seconds())
+	}
+	return resp, err
+}
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 258e9c73fa2e3..9f705e7fea443 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -17,7 +17,7 @@ type tableClient struct {
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	opts := toBigtableOpts(cfg.GRPCClientConfig.DialOption(instrumentation()))
+	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
 	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
 		return nil, err

From 23b69ca0c4ec4c4a277afb697bb8d387c724e726 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 2 Feb 2019 12:23:18 +0100
Subject: [PATCH 266/660] Also wrap errors in the bigtable table client so we
 can see where they fail.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/table_client.go | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/gcp/table_client.go b/gcp/table_client.go
index 9f705e7fea443..ae6d16444ddd7 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -8,6 +8,7 @@ import (
 	"google.golang.org/grpc/status"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/pkg/errors"
 )
 
 type tableClient struct {
@@ -31,7 +32,7 @@ func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error)
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	tables, err := c.client.Tables(ctx)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "client.Tables")
 	}
 
 	// Check each table has the right column family.  If not, omit it.
@@ -39,7 +40,7 @@ func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	for _, table := range tables {
 		info, err := c.client.TableInfo(ctx, table)
 		if err != nil {
-			return nil, err
+			return nil, errors.Wrap(err, "client.TableInfo")
 		}
 
 		if hasColumnFamily(info.FamilyInfos) {
@@ -62,10 +63,15 @@ func hasColumnFamily(infos []bigtable.FamilyInfo) bool {
 func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	if err := c.client.CreateTable(ctx, desc.Name); err != nil {
 		if !alreadyExistsError(err) {
-			return err
+			return errors.Wrap(err, "client.CreateTable")
 		}
 	}
-	return c.client.CreateColumnFamily(ctx, desc.Name, columnFamily)
+
+	if err := c.client.CreateColumnFamily(ctx, desc.Name, columnFamily); err != nil {
+		return errors.Wrap(err, "client.CreateColumnFamily")
+	}
+
+	return nil
 }
 
 func alreadyExistsError(err error) bool {
@@ -77,7 +83,7 @@ func alreadyExistsError(err error) bool {
 
 func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
 	if err := c.client.DeleteTable(ctx, name); err != nil {
-		return err
+		return errors.Wrap(err, "client.DeleteTable")
 	}
 
 	return nil

From c89c4dcb6955fbb503d5ede5e21cf8dac36f3d63 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 8 Feb 2019 21:26:51 +0100
Subject: [PATCH 267/660] Make sure GCS instrumentation doesn't have full paths

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/instrumentation.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 69347dd9d3f60..a87cd10b3dffb 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -81,7 +81,7 @@ func (i instrumentedTransport) RoundTrip(req *http.Request) (*http.Response, err
 	start := time.Now()
 	resp, err := i.next.RoundTrip(req)
 	if err == nil {
-		i.observer.WithLabelValues(req.URL.Path, strconv.Itoa(resp.StatusCode)).Observe(time.Since(start).Seconds())
+		i.observer.WithLabelValues(req.Method, strconv.Itoa(resp.StatusCode)).Observe(time.Since(start).Seconds())
 	}
 	return resp, err
 }

From 6fa9b0f5062c00344e449d3282931843b8d2ab0e Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 1 Feb 2019 11:41:27 +0000
Subject: [PATCH 268/660] Strip Chunk interface indirection on bigchunk
 sub-chunks

The Chunk indirection doubles the pointer size for every sub-chunk,
which we don't need because there is only one kind of Chunk.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index e929d4c99cb56..145564e564ccd 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -15,7 +15,7 @@ const samplesPerChunk = 120
 var errOutOfBounds = errors.New("out of bounds")
 
 type smallChunk struct {
-	chunkenc.Chunk
+	*chunkenc.XORChunk
 	start int64
 	end   int64
 }
@@ -54,14 +54,14 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 	// To save memory, we "compact" the previous chunk - the array backing the slice
 	// will be upto 2x too big, and we can save this space.
 	if l := len(b.chunks); l > 0 {
-		c := b.chunks[l-1].Chunk
+		c := b.chunks[l-1].XORChunk
 		buf := make([]byte, len(c.Bytes()))
 		copy(buf, c.Bytes())
 		compacted, err := chunkenc.FromData(chunkenc.EncXOR, buf)
 		if err != nil {
 			return err
 		}
-		b.chunks[l-1].Chunk = compacted
+		b.chunks[l-1].XORChunk = compacted.(*chunkenc.XORChunk)
 	}
 
 	chunk := chunkenc.NewXORChunk()
@@ -71,9 +71,9 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 	}
 
 	b.chunks = append(b.chunks, smallChunk{
-		Chunk: chunk,
-		start: int64(start),
-		end:   int64(start),
+		XORChunk: chunk,
+		start:    int64(start),
+		end:      int64(start),
 	})
 
 	b.appender = appender
@@ -133,9 +133,9 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 		}
 
 		b.chunks = append(b.chunks, smallChunk{
-			Chunk: chunk,
-			start: int64(start),
-			end:   int64(end),
+			XORChunk: chunk.(*chunkenc.XORChunk),
+			start:    int64(start),
+			end:      int64(end),
 		})
 	}
 	return nil

From 8ba9cca29b2489521c4876f159644ac604c940b9 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 12 Feb 2019 00:32:37 +0100
Subject: [PATCH 269/660] Set GCS.ChunkSize to 0 to reduce allocs.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/gcs_object_client.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 34b1f256f385d..89c8f22293e2b 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -64,6 +64,11 @@ func (s *gcsObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) e
 			return err
 		}
 		writer := s.bucket.Object(chunk.ExternalKey()).NewWriter(ctx)
+		// Default GCSChunkSize is 8M and for each call, 8M is allocated xD
+		// By setting it to 0, we just upload the object in a single a request
+		// which should work for our chunk sizes.
+		writer.ChunkSize = 0
+
 		if _, err := writer.Write(buf); err != nil {
 			return err
 		}

From 0a29901ebf1d0079f48e4dee078d38e8c490a3fd Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 12 Feb 2019 17:55:45 +0100
Subject: [PATCH 270/660] Add a flag for ChunkBufferSize

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/gcs_object_client.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 89c8f22293e2b..0d92a5b49e5aa 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -21,12 +21,14 @@ type gcsObjectClient struct {
 
 // GCSConfig is config for the GCS Chunk Client.
 type GCSConfig struct {
-	BucketName string `yaml:"bucket_name"`
+	BucketName      string `yaml:"bucket_name"`
+	ChunkBufferSize int    `yaml:"chunk_buffer_size"`
 }
 
 // RegisterFlags registers flags.
 func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.BucketName, "gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
+	f.IntVar(&cfg.ChunkBufferSize, "gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
 }
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
@@ -67,7 +69,7 @@ func (s *gcsObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) e
 		// Default GCSChunkSize is 8M and for each call, 8M is allocated xD
 		// By setting it to 0, we just upload the object in a single a request
 		// which should work for our chunk sizes.
-		writer.ChunkSize = 0
+		writer.ChunkSize = s.cfg.ChunkBufferSize
 
 		if _, err := writer.Write(buf); err != nil {
 			return err

From bb4f4e51ace0a90f6628cc4a65e46157a504b7aa Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Sat, 17 Nov 2018 12:20:54 -0500
Subject: [PATCH 271/660] fix cassandra fixture schema from timestamp, handle
 error when no table exists Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

---
 aws/dynamodb_storage_client.go      | 11 +++++++++--
 aws/dynamodb_storage_client_test.go |  4 +++-
 aws/fixtures.go                     | 14 ++------------
 cassandra/fixtures.go               |  3 +--
 cassandra/storage_client.go         | 13 ++++++++++---
 gcp/bigtable_object_client.go       | 10 ++++++++--
 gcp/fixtures.go                     | 13 +------------
 schema_config.go                    |  6 +++---
 storage/object_client_test.go       |  3 ++-
 testutils/testutils.go              | 10 ++++++++--
 10 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 507a1b8aa20b5..a1dff2849329f 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -523,7 +523,10 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 	for _, chunk := range chunks {
 		key := chunk.ExternalKey()
 		chunksByKey[key] = chunk
-		tableName := a.schemaCfg.ChunkTableFor(chunk.From)
+		tableName, err := a.schemaCfg.ChunkTableFor(chunk.From)
+		if err != nil {
+			return nil, err
+		}
 		outstanding.Add(tableName, key, placeholder)
 	}
 
@@ -646,7 +649,11 @@ func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 		}
 		key := chunks[i].ExternalKey()
 
-		table := a.schemaCfg.ChunkTableFor(chunks[i].From)
+		table, err := a.schemaCfg.ChunkTableFor(chunks[i].From)
+		if err != nil {
+			return err
+		}
+
 		dynamoDBWrites.Add(table, key, placeholder, buf)
 	}
 
diff --git a/aws/dynamodb_storage_client_test.go b/aws/dynamodb_storage_client_test.go
index a169613d750f8..00b08eacaded7 100644
--- a/aws/dynamodb_storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"testing"
 
+	"github.com/prometheus/common/model"
+
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
@@ -26,7 +28,7 @@ func TestChunksPartialError(t *testing.T) {
 	}
 	ctx := context.Background()
 	// Create more chunks than we can read in one batch
-	_, chunks, err := testutils.CreateChunks(0, dynamoDBMaxReadBatchSize+50)
+	_, chunks, err := testutils.CreateChunks(0, dynamoDBMaxReadBatchSize+50, model.Now())
 	require.NoError(t, err)
 	err = client.PutChunks(ctx, chunks)
 	require.NoError(t, err)
diff --git a/aws/fixtures.go b/aws/fixtures.go
index e35027adc917a..d05cd738efae3 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -7,7 +7,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/prometheus/common/model"
 )
 
 type fixture struct {
@@ -32,7 +31,7 @@ var Fixtures = []testutils.Fixture{
 	fixture{
 		name: "S3 chunks",
 		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
-			schemaConfig := chunk.SchemaConfig{} // Defaults == S3
+			schemaConfig := testutils.DefaultSchemaConfig("s3")
 			dynamoDB := newMockDynamoDB(0, 0)
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
@@ -61,16 +60,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 			provisionedErr, gangsize, maxParallelism),
 		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
 			dynamoDB := newMockDynamoDB(0, provisionedErr)
-			schemaCfg := chunk.SchemaConfig{
-				Configs: []chunk.PeriodConfig{{
-					IndexType: "aws",
-					From:      model.Now(),
-					ChunkTables: chunk.PeriodicTableConfig{
-						Prefix: "chunks",
-						Period: 10 * time.Minute,
-					},
-				}},
-			}
+			schemaCfg := testutils.DefaultSchemaConfig("aws")
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
 			}
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index 20ab38ca9b401..ef13263e6c9c7 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -6,7 +6,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/prometheus/common/model"
 )
 
 // GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
@@ -49,7 +48,7 @@ func Fixtures() ([]testutils.Fixture, error) {
 	}
 
 	// Get a SchemaConfig with the defaults.
-	schemaConfig := chunk.DefaultSchemaConfig("cassandra", "v1", model.Now())
+	schemaConfig := testutils.DefaultSchemaConfig("cassandra")
 
 	storageClient, err := NewStorageClient(cfg, schemaConfig)
 	if err != nil {
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 28df0c0b2bd8f..a3b4f60691084 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -270,7 +270,10 @@ func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 			return errors.WithStack(err)
 		}
 		key := chunks[i].ExternalKey()
-		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		tableName, err := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		if err != nil {
+			return err
+		}
 
 		// Must provide a range key, even though its not useds - hence 0x00.
 		q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
@@ -289,12 +292,16 @@ func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 }
 
 func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
-	tableName := s.schemaCfg.ChunkTableFor(input.From)
+	tableName, err := s.schemaCfg.ChunkTableFor(input.From)
+	if err != nil {
+		return input, err
+	}
+
 	var buf []byte
 	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
 		WithContext(ctx).Scan(&buf); err != nil {
 		return input, errors.WithStack(err)
 	}
-	err := input.Decode(decodeContext, buf)
+	err = input.Decode(decodeContext, buf)
 	return input, err
 }
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index aa927133aff3d..a43980bde1ac0 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -52,7 +52,10 @@ func (s *bigtableObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 			return err
 		}
 		key := chunks[i].ExternalKey()
-		tableName := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		tableName, err := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		if err != nil {
+			return err
+		}
 		keys[tableName] = append(keys[tableName], key)
 
 		mut := bigtable.NewMutation()
@@ -83,7 +86,10 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 	chunks := map[string]map[string]chunk.Chunk{}
 	keys := map[string]bigtable.RowList{}
 	for _, c := range input {
-		tableName := s.schemaCfg.ChunkTableFor(c.From)
+		tableName, err := s.schemaCfg.ChunkTableFor(c.From)
+		if err != nil {
+			return nil, err
+		}
 		key := c.ExternalKey()
 		keys[tableName] = append(keys[tableName], key)
 		if _, ok := chunks[tableName]; !ok {
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 0ebc1b0bcb781..670390b87c018 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -2,12 +2,10 @@ package gcp
 
 import (
 	"context"
-	"time"
 
 	"cloud.google.com/go/bigtable"
 	"cloud.google.com/go/bigtable/bttest"
 	"github.com/fsouza/fake-gcs-server/fakestorage"
-	"github.com/prometheus/common/model"
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
 
@@ -56,16 +54,7 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	schemaConfig = chunk.SchemaConfig{
-		Configs: []chunk.PeriodConfig{{
-			IndexType: "gcp",
-			From:      model.Now(),
-			ChunkTables: chunk.PeriodicTableConfig{
-				Prefix: "chunks",
-				Period: 10 * time.Minute,
-			},
-		}},
-	}
+	schemaConfig = testutils.DefaultSchemaConfig("gcp-columnkey")
 	tClient = &tableClient{
 		client: adminClient,
 	}
diff --git a/schema_config.go b/schema_config.go
index 8ddb54608f143..1fa5db0cca7a5 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -422,13 +422,13 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 }
 
 // ChunkTableFor calculates the chunk table shard for a given point in time.
-func (cfg SchemaConfig) ChunkTableFor(t model.Time) string {
+func (cfg SchemaConfig) ChunkTableFor(t model.Time) (string, error) {
 	for i := range cfg.Configs {
 		if t > cfg.Configs[i].From && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
-			return cfg.Configs[i].ChunkTables.TableFor(t)
+			return cfg.Configs[i].ChunkTables.TableFor(t), nil
 		}
 	}
-	return ""
+	return "", fmt.Errorf("no chunk table found for time %v", t)
 }
 
 // TableFor calculates the table shard for a given point in time.
diff --git a/storage/object_client_test.go b/storage/object_client_test.go
index 5982e7e87e0b2..f6515cc28f3f7 100644
--- a/storage/object_client_test.go
+++ b/storage/object_client_test.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/prometheus/common/model"
 )
 
 func TestChunksBasic(t *testing.T) {
@@ -23,7 +24,7 @@ func TestChunksBasic(t *testing.T) {
 		// Write a few batches of chunks.
 		written := []string{}
 		for i := 0; i < 5; i++ {
-			keys, chunks, err := testutils.CreateChunks(i, batchSize)
+			keys, chunks, err := testutils.CreateChunks(i, batchSize, model.Now())
 			require.NoError(t, err)
 			written = append(written, keys...)
 			err = client.PutChunks(ctx, chunks)
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 123c465257c3c..91bc56a0e70a5 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -23,6 +23,12 @@ type Fixture interface {
 	Teardown() error
 }
 
+// DefaultSchemaConfig returns default schema for use in test fixtures
+func DefaultSchemaConfig(kind string) chunk.SchemaConfig {
+	schemaConfig := chunk.DefaultSchemaConfig(kind, "v1", model.Now().Add(-time.Hour*2))
+	return schemaConfig
+}
+
 // Setup a fixture with initial tables
 func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectClient, error) {
 	var tbmConfig chunk.TableManagerConfig
@@ -49,11 +55,11 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectCl
 }
 
 // CreateChunks creates some chunks for testing
-func CreateChunks(startIndex, batchSize int) ([]string, []chunk.Chunk, error) {
+func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chunk.Chunk, error) {
 	keys := []string{}
 	chunks := []chunk.Chunk{}
 	for j := 0; j < batchSize; j++ {
-		chunk := dummyChunkFor(model.Now(), model.Metric{
+		chunk := dummyChunkFor(start, model.Metric{
 			model.MetricNameLabel: "foo",
 			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
 		})

From 83dccbfeffae8202d653b8dd05398b6d40782887 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 23 Feb 2019 14:53:38 -0500
Subject: [PATCH 272/660] Add some more logging to the index cache, and make
 the metric count the number of gets correctly so you can calculate hit rate.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_index_client.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 10ab4268235b0..eec97cc6d8854 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -9,6 +9,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log/level"
 	proto "github.com/golang/protobuf/proto"
 	"github.com/prometheus/client_golang/prometheus"
@@ -205,7 +206,10 @@ func (s *cachingIndexClient) cacheStore(ctx context.Context, keys []string, batc
 }
 
 func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
-	cacheGets.Inc()
+	log, ctx := spanlogger.New(ctx, "cachingIndexClient.cacheFetch")
+	defer log.Finish()
+
+	cacheGets.Add(float64(len(keys)))
 
 	// Build a map from hash -> key; NB there can be collisions here; we'll fetch
 	// the last hash.
@@ -233,7 +237,7 @@ func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (bat
 		var readBatch ReadBatch
 
 		if err := proto.Unmarshal(bufs[j], &readBatch); err != nil {
-			level.Warn(util.Logger).Log("msg", "error unmarshalling index entry from cache", "err", err)
+			level.Warn(log).Log("msg", "error unmarshalling index entry from cache", "err", err)
 			cacheCorruptErrs.Inc()
 			continue
 		}
@@ -241,6 +245,7 @@ func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (bat
 		// Make sure the hash(key) is not a collision in the cache by looking at the
 		// key in the value.
 		if key != readBatch.Key || (readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry))) {
+			level.Warn(log).Log("msg", "collision/expiration in entry cache", "key", key, "readBatch.Key", readBatch.Key, "expiry", time.Unix(0, readBatch.Expiry))
 			cacheCorruptErrs.Inc()
 			continue
 		}
@@ -262,5 +267,6 @@ func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (bat
 		missed = append(missed, miss)
 	}
 
+	level.Debug(log).Log("hits", len(batches), "misses", len(misses))
 	return batches, missed
 }

From 2ab9fecda1c0d9e0635d85fd4d9518773563011c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 23 Feb 2019 23:19:10 -0500
Subject: [PATCH 273/660] v10 schema builds on v9 but shards index rows to
 break up very big rows.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_test.go |   1 +
 composite_store.go  |   2 +-
 schema.go           | 109 +++++++++++++++++++++++++++++++++++++++++++-
 schema_config.go    |   6 ++-
 4 files changed, 114 insertions(+), 4 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 42e0d996d7a0b..7fba293fc6331 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -37,6 +37,7 @@ var schemas = []struct {
 	{"v5", true},
 	{"v6", true},
 	{"v9", true},
+	{"v10", true},
 }
 
 var stores = []struct {
diff --git a/composite_store.go b/composite_store.go
index cdfad5a5a7430..4b4f1c944d6ff 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -45,7 +45,7 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index
 	var store Store
 	var err error
 	switch cfg.Schema {
-	case "v9":
+	case "v9", "v10":
 		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits)
 	default:
 		store, err = newStore(storeCfg, schema, index, chunks, limits)
diff --git a/schema.go b/schema.go
index 37033156fd5f4..27b3f74f027a0 100644
--- a/schema.go
+++ b/schema.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"encoding/binary"
 	"errors"
 	"fmt"
 	"strings"
@@ -540,7 +541,7 @@ func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 type v9Entries struct {
 }
 
-func (e v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
@@ -630,3 +631,109 @@ func (v9Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuer
 		},
 	}, nil
 }
+
+// v10Entries builds on v9 by sharding index rows to reduce their size.
+type v10Entries struct {
+}
+
+const rowShards = 16
+
+func (v10Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	return nil, ErrNotSupported
+}
+
+func (v10Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	seriesID := sha256bytes(labels.String())
+
+	// read first 32 bits of the hash and use this to calculate the shard
+	shard := binary.BigEndian.Uint32(seriesID) % rowShards
+
+	entries := []IndexEntry{
+		// Entry for metricName -> seriesID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, string(metricName)),
+			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+		},
+	}
+
+	// Entries for metricName:labelName -> hash(value):seriesID
+	// We use a hash of the value to limit its length.
+	for key, value := range labels {
+		if key == model.MetricNameLabel {
+			continue
+		}
+		valueHash := sha256bytes(string(value))
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, key),
+			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			Value:      []byte(value),
+		})
+	}
+
+	return entries, nil
+}
+
+func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+	seriesID := sha256bytes(labels.String())
+	encodedThroughBytes := encodeTime(bucket.through)
+
+	entries := []IndexEntry{
+		// Entry for seriesID -> chunkID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  bucket.hashKey + ":" + string(seriesID),
+			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
+		},
+	}
+
+	return entries, nil
+}
+
+func (v10Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	result := make([]IndexQuery, 0, rowShards)
+	for i := 0; i < rowShards; i++ {
+		result = append(result, IndexQuery{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%02d:%s:%s", i, bucket.hashKey, string(metricName)),
+		})
+	}
+	return result, nil
+}
+
+func (v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	result := make([]IndexQuery, 0, rowShards)
+	for i := 0; i < rowShards; i++ {
+		result = append(result, IndexQuery{
+			TableName: bucket.tableName,
+			HashValue: fmt.Sprintf("%02d:%s:%s:%s", i, bucket.hashKey, metricName, labelName),
+		})
+	}
+	return result, nil
+}
+
+func (v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+	valueHash := sha256bytes(string(labelValue))
+	result := make([]IndexQuery, 0, rowShards)
+	for i := 0; i < rowShards; i++ {
+		result = append(result, IndexQuery{
+			TableName:       bucket.tableName,
+			HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, bucket.hashKey, metricName, labelName),
+			RangeValueStart: encodeRangeKey(valueHash),
+			ValueEqual:      []byte(labelValue),
+		})
+	}
+	return result, nil
+}
+
+func (v10Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error) {
+	encodedFromBytes := encodeTime(bucket.from)
+	return []IndexQuery{
+		{
+			TableName:       bucket.tableName,
+			HashValue:       bucket.hashKey + ":" + string(seriesID),
+			RangeValueStart: encodeRangeKey(encodedFromBytes),
+		},
+	}, nil
+}
diff --git a/schema_config.go b/schema_config.go
index 1fa5db0cca7a5..41c9bb98ec27f 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -3,17 +3,17 @@ package chunk
 import (
 	"flag"
 	"fmt"
-	"github.com/go-kit/kit/log/level"
 	"os"
 	"strconv"
 	"time"
 
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/common/model"
+	"github.com/weaveworks/common/mtime"
 	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
-	"github.com/weaveworks/common/mtime"
 )
 
 const (
@@ -181,6 +181,8 @@ func (cfg PeriodConfig) createSchema() Schema {
 		s = schema{cfg.dailyBuckets, v6Entries{}}
 	case "v9":
 		s = schema{cfg.dailyBuckets, v9Entries{}}
+	case "v10":
+		s = schema{cfg.dailyBuckets, v10Entries{}}
 	}
 	return s
 }

From 237783a0e6f53874f12a0cb9a74d57bab119410e Mon Sep 17 00:00:00 2001
From: zhoulin xie <zhoulin.xie@daocloud.io>
Date: Sun, 24 Feb 2019 17:13:28 +0800
Subject: [PATCH 274/660] Fix typo in comment

Signed-off-by: zhoulin xie <zhoulin.xie@daocloud.io>
---
 encoding/varbit_helpers.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/varbit_helpers.go b/encoding/varbit_helpers.go
index 9ca639e9421d9..9fe9c09feaf17 100644
--- a/encoding/varbit_helpers.go
+++ b/encoding/varbit_helpers.go
@@ -38,7 +38,7 @@ func isInt32(v model.SampleValue) bool {
 	return model.SampleValue(int32(v)) == v
 }
 
-// countBits returs the number of leading zero bits and the number of
+// countBits returns the number of leading zero bits and the number of
 // significant bits after that in the given bit pattern. The maximum number of
 // leading zeros is 31 (so that it can be represented by a 5bit number). Leading
 // zeros beyond that are considered part of the significant bits.

From 68ca752c9559b98c24b9df23d9a75e36bb0c047c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 24 Feb 2019 18:14:03 -0800
Subject: [PATCH 275/660] Review feedback.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_index_client.go | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index eec97cc6d8854..fcb9a02204a1d 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -244,9 +244,13 @@ func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (bat
 
 		// Make sure the hash(key) is not a collision in the cache by looking at the
 		// key in the value.
-		if key != readBatch.Key || (readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry))) {
-			level.Warn(log).Log("msg", "collision/expiration in entry cache", "key", key, "readBatch.Key", readBatch.Key, "expiry", time.Unix(0, readBatch.Expiry))
-			cacheCorruptErrs.Inc()
+		if key != readBatch.Key {
+			level.Debug(log).Log("msg", "dropping index cache entry due to key collision", "key", key, "readBatch.Key", readBatch.Key, "expiry")
+			continue
+		}
+
+		if readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry)) {
+			level.Debug(log).Log("msg", "dropping index cache entry due to expiration", "key", key, "readBatch.Key", readBatch.Key, "expiry", time.Unix(0, readBatch.Expiry))
 			continue
 		}
 

From 8be0d774137f26b8e8ec5930683057b7528827e3 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 26 Feb 2019 14:34:09 -0800
Subject: [PATCH 276/660] Make rowShards configurable.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 schema.go        | 25 ++++++++++++-------------
 schema_config.go | 10 +++++++++-
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/schema.go b/schema.go
index 27b3f74f027a0..307f4c1d60303 100644
--- a/schema.go
+++ b/schema.go
@@ -634,19 +634,18 @@ func (v9Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuer
 
 // v10Entries builds on v9 by sharding index rows to reduce their size.
 type v10Entries struct {
+	rowShards uint32
 }
 
-const rowShards = 16
-
 func (v10Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (v10Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
 
 	// read first 32 bits of the hash and use this to calculate the shard
-	shard := binary.BigEndian.Uint32(seriesID) % rowShards
+	shard := binary.BigEndian.Uint32(seriesID) % s.rowShards
 
 	entries := []IndexEntry{
 		// Entry for metricName -> seriesID
@@ -691,9 +690,9 @@ func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValu
 	return entries, nil
 }
 
-func (v10Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
-	result := make([]IndexQuery, 0, rowShards)
-	for i := 0; i < rowShards; i++ {
+func (s v10Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+	result := make([]IndexQuery, 0, s.rowShards)
+	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
 			TableName: bucket.tableName,
 			HashValue: fmt.Sprintf("%02d:%s:%s", i, bucket.hashKey, string(metricName)),
@@ -702,9 +701,9 @@ func (v10Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValu
 	return result, nil
 }
 
-func (v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
-	result := make([]IndexQuery, 0, rowShards)
-	for i := 0; i < rowShards; i++ {
+func (s v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+	result := make([]IndexQuery, 0, s.rowShards)
+	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
 			TableName: bucket.tableName,
 			HashValue: fmt.Sprintf("%02d:%s:%s:%s", i, bucket.hashKey, metricName, labelName),
@@ -713,10 +712,10 @@ func (v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.Labe
 	return result, nil
 }
 
-func (v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (s v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
 	valueHash := sha256bytes(string(labelValue))
-	result := make([]IndexQuery, 0, rowShards)
-	for i := 0; i < rowShards; i++ {
+	result := make([]IndexQuery, 0, s.rowShards)
+	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
 			TableName:       bucket.tableName,
 			HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, bucket.hashKey, metricName, labelName),
diff --git a/schema_config.go b/schema_config.go
index 41c9bb98ec27f..64e6476a62f1f 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -32,6 +32,7 @@ type PeriodConfig struct {
 	Schema      string              `yaml:"schema"`
 	IndexTables PeriodicTableConfig `yaml:"index"`
 	ChunkTables PeriodicTableConfig `yaml:"chunks,omitempty"`
+	RowShards   uint32              `yaml:"row_shards"`
 }
 
 // SchemaConfig contains the config for our chunk index schemas
@@ -182,7 +183,14 @@ func (cfg PeriodConfig) createSchema() Schema {
 	case "v9":
 		s = schema{cfg.dailyBuckets, v9Entries{}}
 	case "v10":
-		s = schema{cfg.dailyBuckets, v10Entries{}}
+		rowShards := uint32(16)
+		if cfg.RowShards > 0 {
+			rowShards = cfg.RowShards
+		}
+
+		s = schema{cfg.dailyBuckets, v10Entries{
+			rowShards: rowShards,
+		}}
 	}
 	return s
 }

From fbabb0c79c3f32b67836bc6203fe505385f1b481 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 22 Feb 2019 18:14:32 -0500
Subject: [PATCH 277/660] Optionally prepend a hash of row key to improve
 distribution.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go | 23 ++++++++++++++-----
 gcp/fixtures.go              | 43 +++++++++++++++++++-----------------
 gcp/fnv.go                   | 36 ++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 26 deletions(-)
 create mode 100644 gcp/fnv.go

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 9297bb1e98ca3..92cb8fcb142b3 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"strconv"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
@@ -34,7 +35,8 @@ type Config struct {
 
 	GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"`
 
-	ColumnKey bool
+	ColumnKey      bool
+	DistributeKeys bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -54,6 +56,8 @@ type storageClientColumnKey struct {
 	schemaCfg chunk.SchemaConfig
 	client    *bigtable.Client
 	keysFn    keysFn
+
+	distributeKeys bool
 }
 
 // storageClientV1 implements chunk.storageClient for GCP.
@@ -96,13 +100,20 @@ func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.
 }
 
 func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) *storageClientColumnKey {
+
 	return &storageClientColumnKey{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		client:    client,
 		keysFn: func(hashValue string, rangeValue []byte) (string, string) {
-			// We could hash the row key for better distribution but we decided against it
-			// because that would make migrations very, very hard.
+
+			// We hash the row key and prepend it back to the key for better distribution.
+			// We preserve the existing key to make migrations and o11y easier.
+			if cfg.DistributeKeys {
+				prefix := hashAdd(hashNew(), hashValue)
+				hashValue = strconv.FormatUint(prefix, 8) + "-" + hashValue
+			}
+
 			return hashValue, string(rangeValue)
 		},
 	}
@@ -194,8 +205,9 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 				queries: map[string]chunk.IndexQuery{},
 			}
 		}
-		tq.queries[query.HashValue] = query
-		tq.rows = append(tq.rows, query.HashValue)
+		hashKey, _ := s.keysFn(query.HashValue, nil)
+		tq.queries[hashKey] = query
+		tq.rows = append(tq.rows, hashKey)
 		tableQueries[query.TableName] = tq
 	}
 
@@ -344,7 +356,6 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 		readOpts = append(readOpts, bigtable.RowFilter(bigtable.ValueFilter(string(query.ValueEqual))))
 	}
 	*/
-
 	if len(query.RangeValuePrefix) > 0 {
 		rowRange = bigtable.PrefixRange(query.HashValue + separator + string(query.RangeValuePrefix))
 	} else if len(query.RangeValueStart) > 0 {
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 670390b87c018..d0eafac81d8a0 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -2,6 +2,7 @@ package gcp
 
 import (
 	"context"
+	"fmt"
 
 	"cloud.google.com/go/bigtable"
 	"cloud.google.com/go/bigtable/bttest"
@@ -25,6 +26,7 @@ type fixture struct {
 
 	gcsObjectClient bool
 	columnKeyClient bool
+	hashPrefix      bool
 }
 
 func (f *fixture) Name() string {
@@ -64,10 +66,13 @@ func (f *fixture) Clients() (
 		return
 	}
 
+	cfg := Config{
+		DistributeKeys: f.hashPrefix,
+	}
 	if f.columnKeyClient {
-		iClient = newStorageClientColumnKey(Config{}, schemaConfig, client)
+		iClient = newStorageClientColumnKey(cfg, schemaConfig, client)
 	} else {
-		iClient = newStorageClientV1(Config{}, schemaConfig, client)
+		iClient = newStorageClientV1(cfg, schemaConfig, client)
 	}
 
 	if f.gcsObjectClient {
@@ -88,21 +93,19 @@ func (f *fixture) Teardown() error {
 }
 
 // Fixtures for unit testing GCP storage.
-var Fixtures = []testutils.Fixture{
-	&fixture{
-		name: "bigtable",
-	},
-	&fixture{
-		name:            "bigtable-columnkey",
-		columnKeyClient: true,
-	},
-	&fixture{
-		name:            "bigtable-gcs",
-		gcsObjectClient: true,
-	},
-	&fixture{
-		name:            "bigtable-columnkey-gcs",
-		gcsObjectClient: true,
-		columnKeyClient: true,
-	},
-}
+var Fixtures = func() []testutils.Fixture {
+	fixtures := []testutils.Fixture{}
+	for _, gcsObjectClient := range []bool{true, false} {
+		for _, columnKeyClient := range []bool{true, false} {
+			for _, hashPrefix := range []bool{true, false} {
+				fixtures = append(fixtures, &fixture{
+					name:            fmt.Sprintf("bigtable-columnkey:%v-gcsObjectClient:%v-hashPrefix:%v", columnKeyClient, gcsObjectClient, hashPrefix),
+					columnKeyClient: columnKeyClient,
+					gcsObjectClient: gcsObjectClient,
+					hashPrefix:      hashPrefix,
+				})
+			}
+		}
+	}
+	return fixtures
+}()
diff --git a/gcp/fnv.go b/gcp/fnv.go
new file mode 100644
index 0000000000000..851a9d7f19f94
--- /dev/null
+++ b/gcp/fnv.go
@@ -0,0 +1,36 @@
+// Modified from github.com/prometheus/common/model/fnv.go
+// Copyright 2015 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gcp
+
+// Inline and byte-free variant of hash/fnv's fnv64a.
+
+const (
+	offset64 = 14695981039346656037
+	prime64  = 1099511628211
+)
+
+// hashNew initializies a new fnv64a hash value.
+func hashNew() uint64 {
+	return offset64
+}
+
+// hashAdd adds a string to a fnv64a hash value, returning the updated hash.
+func hashAdd(h uint64, s string) uint64 {
+	for i := 0; i < len(s); i++ {
+		h ^= uint64(s[i])
+		h *= prime64
+	}
+	return h
+}

From 066a4883f4cf6ca3c3f29abdca81e3b1dd605381 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Tue, 26 Feb 2019 15:34:08 -0800
Subject: [PATCH 278/660] Remove unused storageClientColumnKey.query function.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go | 44 ------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 92cb8fcb142b3..d2e27e74a1d8e 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -259,50 +259,6 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 	return lastErr
 }
 
-func (s *storageClientColumnKey) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
-	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
-	defer sp.Finish()
-
-	table := s.client.Open(query.TableName)
-
-	rOpts := []bigtable.ReadOption{
-		bigtable.RowFilter(bigtable.FamilyFilter(columnFamily)),
-	}
-
-	if len(query.RangeValuePrefix) > 0 {
-		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValuePrefix), string(query.RangeValuePrefix)+null)))
-	} else if len(query.RangeValueStart) > 0 {
-		rOpts = append(rOpts, bigtable.RowFilter(bigtable.ColumnRangeFilter(columnFamily, string(query.RangeValueStart), null)))
-	}
-
-	r, err := table.ReadRow(ctx, query.HashValue, rOpts...)
-	if err != nil {
-		sp.LogFields(otlog.String("error", err.Error()))
-		return errors.WithStack(err)
-	}
-
-	val, ok := r[columnFamily]
-	if !ok {
-		// There are no matching rows.
-		return nil
-	}
-
-	if query.ValueEqual != nil {
-		filteredItems := make([]bigtable.ReadItem, 0, len(val))
-		for _, item := range val {
-			if bytes.Equal(query.ValueEqual, item.Value) {
-				filteredItems = append(filteredItems, item)
-			}
-		}
-
-		val = filteredItems
-	}
-	callback(&columnKeyBatch{
-		items: val,
-	})
-	return nil
-}
-
 // columnKeyBatch represents a batch of values read from Bigtable.
 type columnKeyBatch struct {
 	items []bigtable.ReadItem

From cd984a4d2c4dfab3d674cfc5ccc13898dd8f038a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 22 Feb 2019 18:34:48 -0500
Subject: [PATCH 279/660] Add option to use it

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/factory.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage/factory.go b/storage/factory.go
index 114b369064910..f521aa96f4d0b 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -137,6 +137,9 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
+	case "bigtable-hashed":
+		cfg.GCPStorageConfig.DistributeKeys = true
+		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "boltdb":

From 2324c87ddbb0b9f57875ce5e7ee54d5cc42e6500 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 23 Feb 2019 08:58:28 -0500
Subject: [PATCH 280/660] Use hex (not base 8) and be careful to hget zero
 padding correct.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 gcp/bigtable_index_client.go | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index d2e27e74a1d8e..05417d4650556 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -3,9 +3,10 @@ package gcp
 import (
 	"bytes"
 	"context"
+	"encoding/binary"
+	"encoding/hex"
 	"flag"
 	"fmt"
-	"strconv"
 	"strings"
 
 	"cloud.google.com/go/bigtable"
@@ -110,8 +111,7 @@ func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client
 			// We hash the row key and prepend it back to the key for better distribution.
 			// We preserve the existing key to make migrations and o11y easier.
 			if cfg.DistributeKeys {
-				prefix := hashAdd(hashNew(), hashValue)
-				hashValue = strconv.FormatUint(prefix, 8) + "-" + hashValue
+				hashValue = hashPrefix(hashValue) + "-" + hashValue
 			}
 
 			return hashValue, string(rangeValue)
@@ -119,6 +119,17 @@ func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client
 	}
 }
 
+// hashPrefix calculates a 64bit hash of the input string and hex-encodes
+// the result, taking care to zero pad etc.
+func hashPrefix(input string) string {
+	prefix := hashAdd(hashNew(), input)
+	var encodedUint64 [8]byte
+	binary.LittleEndian.PutUint64(encodedUint64[:], prefix)
+	var hexEncoded [16]byte
+	hex.Encode(hexEncoded[:], encodedUint64[:])
+	return string(hexEncoded[:])
+}
+
 func (s *storageClientColumnKey) Stop() {
 	s.client.Close()
 }

From f89d5227116ddf4cc5acaec3a9eb27678909b82c Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 23 Feb 2019 09:31:46 -0500
Subject: [PATCH 281/660] Make sure tablemanager can start.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/factory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/factory.go b/storage/factory.go
index f521aa96f4d0b..d45ed09de02e7 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -192,7 +192,7 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
-	case "gcp", "gcp-columnkey":
+	case "gcp", "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
 	case "cassandra":
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)

From 3f8ee125147b46931265ee8a816b1a9c12d8a25b Mon Sep 17 00:00:00 2001
From: Patrick McIlroy <patrick@platform9.com>
Date: Tue, 26 Feb 2019 19:50:21 -0800
Subject: [PATCH 282/660] fix logic in determining maximum concurrent chunk
 fetches

Signed-off-by: Patrick McIlroy <patrick@platform9.com>
---
 util/parallel_chunk_fetch.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/util/parallel_chunk_fetch.go b/util/parallel_chunk_fetch.go
index 5b18578385fb6..25748244445a3 100644
--- a/util/parallel_chunk_fetch.go
+++ b/util/parallel_chunk_fetch.go
@@ -29,7 +29,7 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 	processedChunks := make(chan chunk.Chunk)
 	errors := make(chan error)
 
-	for i := 0; i < max(maxParallel, len(chunks)); i++ {
+	for i := 0; i < min(maxParallel, len(chunks)); i++ {
 		go func() {
 			decodeContext := chunk.NewDecodeContext()
 			for c := range queuedChunks {
@@ -63,8 +63,8 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 	return result, lastErr
 }
 
-func max(a, b int) int {
-	if a > b {
+func min(a, b int) int {
+	if a < b {
 		return a
 	}
 	return b

From 913a0f6323568597e705e2cdd2adbd7848856d98 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 1 Mar 2019 10:58:22 -0800
Subject: [PATCH 283/660] Handle schema transition which don't align to
 periodic table transistions.

Schema transistions can happen on any day; table transitions happen weekly.  When they don't align, the table manager will try and create table twice.

This PR fixes the logic to handle these duplicates.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 table_manager.go | 55 +++++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 988ba7816e710..4c99a77fc9abc 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -305,46 +305,49 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 
 // partitionTables works out tables that need to be created vs tables that need to be updated
 func (m *TableManager) partitionTables(ctx context.Context, descriptions []TableDesc) ([]TableDesc, []TableDesc, []TableDesc, error) {
-	existingTables, err := m.client.ListTables(ctx)
+	tables, err := m.client.ListTables(ctx)
 	if err != nil {
 		return nil, nil, nil, err
 	}
-	sort.Strings(existingTables)
 
-	tablePrefixes := map[string]struct{}{}
-	for _, cfg := range m.schemaCfg.Configs {
-		tablePrefixes[cfg.IndexTables.Prefix] = struct{}{}
-		tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{}
+	existingTables := make(map[string]struct{}, len(tables))
+	for _, table := range tables {
+		existingTables[table] = struct{}{}
+	}
+
+	expectedTables := make(map[string]TableDesc, len(descriptions))
+	for _, desc := range descriptions {
+		expectedTables[desc.Name] = desc
 	}
 
 	toCreate, toCheck, toDelete := []TableDesc{}, []TableDesc{}, []TableDesc{}
-	i, j := 0, 0
-	for i < len(descriptions) && j < len(existingTables) {
-		if descriptions[i].Name < existingTables[j] {
-			// Table descriptions[i] doesn't exist
-			toCreate = append(toCreate, descriptions[i])
-			i++
-		} else if descriptions[i].Name > existingTables[j] {
-			// existingTables[j].name isn't in descriptions, and can be removed
-			if m.cfg.RetentionPeriod > 0 {
+	for _, expectedTable := range expectedTables {
+		if _, ok := existingTables[expectedTable.Name]; ok {
+			toCheck = append(toCheck, expectedTable)
+		} else {
+			toCreate = append(toCreate, expectedTable)
+		}
+	}
+
+	if m.cfg.RetentionPeriod > 0 {
+		// Ensure we only delete tables which have a prefix managed by Cortex.
+		tablePrefixes := map[string]struct{}{}
+		for _, cfg := range m.schemaCfg.Configs {
+			tablePrefixes[cfg.IndexTables.Prefix] = struct{}{}
+			tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{}
+		}
+
+		for existingTable := range existingTables {
+			if _, ok := expectedTables[existingTable]; !ok {
 				for tblPrefix := range tablePrefixes {
-					if strings.HasPrefix(existingTables[j], tblPrefix) {
-						toDelete = append(toDelete, TableDesc{Name: existingTables[j]})
+					if strings.HasPrefix(existingTable, tblPrefix) {
+						toDelete = append(toDelete, TableDesc{Name: existingTable})
 						break
 					}
 				}
 			}
-			j++
-		} else {
-			// Table exists, need to check it has correct throughput
-			toCheck = append(toCheck, descriptions[i])
-			i++
-			j++
 		}
 	}
-	for ; i < len(descriptions); i++ {
-		toCreate = append(toCreate, descriptions[i])
-	}
 
 	return toCreate, toCheck, toDelete, nil
 }

From 7208c90ba5402c053132d2274a9744e043567269 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Fri, 1 Mar 2019 11:37:57 -0800
Subject: [PATCH 284/660] ignore family already exists in bigtable table client

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 gcp/table_client.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcp/table_client.go b/gcp/table_client.go
index ae6d16444ddd7..bb6c7e2ca70a8 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -2,7 +2,8 @@ package gcp
 
 import (
 	"context"
-	"strings"
+
+	"google.golang.org/grpc/codes"
 
 	"cloud.google.com/go/bigtable"
 	"google.golang.org/grpc/status"
@@ -68,17 +69,17 @@ func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 	}
 
 	if err := c.client.CreateColumnFamily(ctx, desc.Name, columnFamily); err != nil {
-		return errors.Wrap(err, "client.CreateColumnFamily")
+		if !alreadyExistsError(err) {
+			return errors.Wrap(err, "client.CreateColumnFamily")
+		}
 	}
 
 	return nil
 }
 
 func alreadyExistsError(err error) bool {
-	// This is super fragile, but I can't find a better way of doing it.
-	// Have filed bug upstream: https://github.com/GoogleCloudPlatform/google-cloud-go/issues/672
 	serr, ok := status.FromError(err)
-	return ok && strings.Contains(serr.Message(), "already exists")
+	return ok && serr.Code() == codes.AlreadyExists
 }
 
 func (c *tableClient) DeleteTable(ctx context.Context, name string) error {

From 4b3de7500caf89c6a0a95a5310aa692b0fb438ea Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 2 Mar 2019 00:05:47 +0000
Subject: [PATCH 285/660] Don't block queries with no metric name if no-op

Queries within the ingester retention time are satisfied without going
to the chunk store, so we shouldn't reject ones that the chunk store
can't implement.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 2790e55cd7770..ff913c934e629 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -209,12 +209,6 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, length > limit (%s > %s)", (*through).Sub(from), maxQueryLength)
 	}
 
-	// Fetch metric name chunks if the matcher is of type equal,
-	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(matchers)
-	if !ok || metricNameMatcher.Type != labels.MatchEqual {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "query must contain metric name")
-	}
-
 	now := model.Now()
 
 	if from.After(now) {
@@ -234,6 +228,12 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 		*through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
+	// Check there is a metric name matcher of type equal,
+	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(matchers)
+	if !ok || metricNameMatcher.Type != labels.MatchEqual {
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "query must contain metric name")
+	}
+
 	return metricNameMatcher.Value, matchers, false, nil
 }
 

From 1e0e3a5b60c9e2f2fbd2e70c4247532866e8720d Mon Sep 17 00:00:00 2001
From: zhoulin xie <zhoulin.xie@daocloud.io>
Date: Fri, 8 Mar 2019 01:05:35 +0800
Subject: [PATCH 286/660] Fix fmt.Errorf() error message

Signed-off-by: zhoulin xie <zhoulin.xie@daocloud.io>
---
 encoding/delta.go       | 2 +-
 encoding/delta_test.go  | 2 +-
 encoding/doubledelta.go | 2 +-
 encoding/varbit.go      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/encoding/delta.go b/encoding/delta.go
index 2b7a9c76d2c88..76ad6914c650a 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -224,7 +224,7 @@ func (c *deltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
 func (c *deltaEncodedChunk) setLen() error {
 	l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
 	if int(l) > cap(*c) {
-		return fmt.Errorf("delta chunk length exceeded during unmarshaling: %d", l)
+		return fmt.Errorf("delta chunk length exceeded during unmarshalling: %d", l)
 	}
 	if int(l) < deltaHeaderBytes {
 		return fmt.Errorf("delta chunk length less than header size: %d < %d", l, deltaHeaderBytes)
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
index 6363a5df2e2fb..3c014c60e53da 100644
--- a/encoding/delta_test.go
+++ b/encoding/delta_test.go
@@ -29,7 +29,7 @@ import (
 	"github.com/prometheus/common/model"
 )
 
-func TestUnmarshalingCorruptedDeltaReturnsAnError(t *testing.T) {
+func TestUnmarshallingCorruptedDeltaReturnsAnError(t *testing.T) {
 
 	var verifyUnmarshallingError = func(
 		err error,
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index 8e7852eef3d33..ac5470abca35c 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -252,7 +252,7 @@ func (c *doubleDeltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
 func (c *doubleDeltaEncodedChunk) setLen() error {
 	l := binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])
 	if int(l) > cap(*c) {
-		return fmt.Errorf("doubledelta chunk length exceeded during unmarshaling: %d", l)
+		return fmt.Errorf("doubledelta chunk length exceeded during unmarshalling: %d", l)
 	}
 	if int(l) < doubleDeltaHeaderMinBytes {
 		return fmt.Errorf("doubledelta chunk length less than header size: %d < %d", l, doubleDeltaHeaderMinBytes)
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 6dfde5d1ca1ad..2005053aa78a4 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -300,7 +300,7 @@ func (c varbitChunk) Marshal(w io.Writer) error {
 // UnmarshalFromBuf implements chunk.
 func (c varbitChunk) UnmarshalFromBuf(buf []byte) error {
 	if copied := copy(c, buf); copied != cap(c) && copied != c.marshalLen() {
-		return fmt.Errorf("incorrect byte count copied from buffer during unmarshaling, want %d or %d, got %d", c.marshalLen(), ChunkLen, copied)
+		return fmt.Errorf("incorrect byte count copied from buffer during unmarshalling, want %d or %d, got %d", c.marshalLen(), ChunkLen, copied)
 	}
 	return nil
 }

From 22b48085ca5e11f02139905ce95f2b169d7b2345 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 14 Mar 2019 15:44:52 +0530
Subject: [PATCH 287/660] Update gofmt to go 1.12

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 aws/aws_autoscaling.go |  2 +-
 cache/cache_test.go    |  2 +-
 chunk_store_test.go    | 32 ++++++++++++++++----------------
 chunk_test.go          | 12 ++++++------
 fixtures.go            | 30 +++++++++++++++---------------
 schema_test.go         |  6 +++---
 schema_util_test.go    | 14 +++++++-------
 testutils/testutils.go |  4 ++--
 8 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
index 4643ed7897e47..42a1a637ce4b8 100644
--- a/aws/aws_autoscaling.go
+++ b/aws/aws_autoscaling.go
@@ -44,7 +44,7 @@ func newAWSAutoscale(cfg DynamoDBConfig, callManager callManager) (*awsAutoscale
 		return nil, err
 	}
 	return &awsAutoscale{
-		call: callManager,
+		call:                   callManager,
 		ApplicationAutoScaling: applicationautoscaling.New(session),
 	}, nil
 }
diff --git a/cache/cache_test.go b/cache/cache_test.go
index ec1db08b01eaa..f020af1239245 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -37,7 +37,7 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			promChunk[0],
 			ts,
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 7fba293fc6331..97d63930abfcc 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -136,25 +136,25 @@ func TestChunkStore_Get(t *testing.T) {
 
 	fooMetric1 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	}
 	fooMetric2 := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
 	barMetric1 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	barMetric2 := model.Metric{
 		model.MetricNameLabel: "bar",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -316,14 +316,14 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
-		"flip": "flop",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
 	})
 	chunk2 := dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "beep",
-		"toms": "code",
+		"bar":                 "beep",
+		"toms":                "code",
 	})
 
 	for _, tc := range []struct {
@@ -425,7 +425,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					model.Fingerprint(1),
 					model.Metric{
 						model.MetricNameLabel: "foo",
-						"bar": "baz",
+						"bar":                 "baz",
 					},
 					chunks[0],
 					ts,
@@ -490,7 +490,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			model.Fingerprint(1),
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar": "baz",
+				"bar":                 "baz",
 			},
 			chunks[0],
 			ts,
@@ -538,7 +538,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/chunk_test.go b/chunk_test.go
index f1fd1cb3492bf..dcf4bc413c9aa 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -23,8 +23,8 @@ func init() {
 func dummyChunk(now model.Time) Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 
@@ -156,8 +156,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, metric)
@@ -175,8 +175,8 @@ func TestChunksToMatrix(t *testing.T) {
 	// Create another chunk with a different metric
 	otherMetric := model.Metric{
 		model.MetricNameLabel: "foo2",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	}
 	chunk3 := dummyChunkFor(now, otherMetric)
 	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
diff --git a/fixtures.go b/fixtures.go
index b6dc21a71fec6..9c6898d23fae2 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -8,23 +8,23 @@ import (
 
 // BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
 var BenchmarkMetric = model.Metric{
-	model.MetricNameLabel:              "container_cpu_usage_seconds_total",
-	"beta_kubernetes_io_arch":          "amd64",
-	"beta_kubernetes_io_instance_type": "c3.somesize",
-	"beta_kubernetes_io_os":            "linux",
-	"container_name":                   "some-name",
-	"cpu":                              "cpu01",
+	model.MetricNameLabel:                      "container_cpu_usage_seconds_total",
+	"beta_kubernetes_io_arch":                  "amd64",
+	"beta_kubernetes_io_instance_type":         "c3.somesize",
+	"beta_kubernetes_io_os":                    "linux",
+	"container_name":                           "some-name",
+	"cpu":                                      "cpu01",
 	"failure_domain_beta_kubernetes_io_region": "somewhere-1",
 	"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
-	"id":       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
-	"image":    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
-	"instance": "ip-111-11-1-11.ec2.internal",
-	"job":      "kubernetes-cadvisor",
-	"kubernetes_io_hostname": "ip-111-11-1-11",
-	"monitor":                "prod",
-	"name":                   "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
-	"namespace":              "kube-system",
-	"pod_name":               "some-other-name-5j8s8",
+	"id":                                       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
+	"image":                                    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
+	"instance":                                 "ip-111-11-1-11.ec2.internal",
+	"job":                                      "kubernetes-cadvisor",
+	"kubernetes_io_hostname":                   "ip-111-11-1-11",
+	"monitor":                                  "prod",
+	"name":                                     "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
+	"namespace":                                "kube-system",
+	"pod_name":                                 "some-other-name-5j8s8",
 }
 
 // DefaultSchemaConfig creates a simple schema config for testing
diff --git a/schema_test.go b/schema_test.go
index 9760362ca1487..9326d0ceb4f28 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -63,7 +63,7 @@ func TestSchemaHashKeys(t *testing.T) {
 	labelBuckets := makeSchema("v4")
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "baz",
+		"bar":                 "baz",
 	}
 	chunkID := "chunkID"
 
@@ -188,8 +188,8 @@ func TestSchemaRangeKey(t *testing.T) {
 		v6RangeKeys   = makeSchema("v6")
 		metric        = model.Metric{
 			model.MetricNameLabel: metricName,
-			"bar": "bary",
-			"baz": "bazy",
+			"bar":                 "bary",
+			"baz":                 "bazy",
 		}
 	)
 
diff --git a/schema_util_test.go b/schema_util_test.go
index c112967c0174f..45ca226ca8fd8 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -25,16 +25,16 @@ func TestMetricSeriesID(t *testing.T) {
 		{
 			model.Metric{
 				model.MetricNameLabel: "foo",
-				"bar":  "baz",
-				"toms": "code",
-				"flip": "flop",
+				"bar":                 "baz",
+				"toms":                "code",
+				"flip":                "flop",
 			},
 			"KrbXMezYneba+o7wfEdtzOdAWhbfWcDrlVfs1uOCX3M",
 		},
 		{
 			model.Metric{
-				"flip": "flop",
-				"bar":  "baz",
+				"flip":                "flop",
+				"bar":                 "baz",
 				model.MetricNameLabel: "foo",
 				"toms":                "code",
 			},
@@ -126,8 +126,8 @@ func TestParseMetricNameRangeValue(t *testing.T) {
 func TestParseSeriesRangeValue(t *testing.T) {
 	metric := model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar": "bary",
-		"baz": "bazy",
+		"bar":                 "bary",
+		"baz":                 "bazy",
 	}
 
 	fingerprintBytes := make([]byte, 8)
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 91bc56a0e70a5..d6cfb69f0e228 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -72,8 +72,8 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 func dummyChunk(now model.Time) chunk.Chunk {
 	return dummyChunkFor(now, model.Metric{
 		model.MetricNameLabel: "foo",
-		"bar":  "baz",
-		"toms": "code",
+		"bar":                 "baz",
+		"toms":                "code",
 	})
 }
 

From a51e046670fd6dfdcf4a681bb8b46473791f2668 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 19 Mar 2019 14:53:34 +0000
Subject: [PATCH 288/660] Allow extra space to add new bigchunk data after
 Unmarshal

After a transfer of chunks from one ingester to another, a new
smallChunk is created immediately for any new samples that arrive.
Allowing room for this avoids a copy and doubling in size of the slice
of smallChunks.

Unfortunately we go through the same code path on queries, so they
will get slightly larger in memory - 24 bytes per chunk.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 145564e564ccd..8dcad16dc2810 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -110,7 +110,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 		return err
 	}
 
-	b.chunks = make([]smallChunk, 0, numChunks)
+	b.chunks = make([]smallChunk, 0, numChunks+1) // allow one extra space in case we want to add new data
 	for i := uint16(0); i < numChunks; i++ {
 		chunkLen, err := r.ReadUint16()
 		if err != nil {

From 8f4bdf1bfb3701816e042e20dec1726a8168f797 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 19 Mar 2019 15:22:23 +0000
Subject: [PATCH 289/660] Reduce the amount of copying we do when adding to
 bigchunk

Currently we always copy the last chunk, even though it could be
exactly the right size (e.g. after an ingester handover).  Change to
copy only if the block is at least 32 bytes bigger than required.
(arbitrary choice - these blocks are usually in the low hundreds of bytes)

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 145564e564ccd..e1b4f6430d0c6 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -53,15 +53,18 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 func (b *bigchunk) addNextChunk(start model.Time) error {
 	// To save memory, we "compact" the previous chunk - the array backing the slice
 	// will be upto 2x too big, and we can save this space.
+	const chunkCapacityExcess = 32 // don't bother copying if it's within this range
 	if l := len(b.chunks); l > 0 {
-		c := b.chunks[l-1].XORChunk
-		buf := make([]byte, len(c.Bytes()))
-		copy(buf, c.Bytes())
-		compacted, err := chunkenc.FromData(chunkenc.EncXOR, buf)
-		if err != nil {
-			return err
+		oldBuf := b.chunks[l-1].XORChunk.Bytes()
+		if cap(oldBuf) > len(oldBuf)+chunkCapacityExcess {
+			buf := make([]byte, len(oldBuf))
+			copy(buf, oldBuf)
+			compacted, err := chunkenc.FromData(chunkenc.EncXOR, buf)
+			if err != nil {
+				return err
+			}
+			b.chunks[l-1].XORChunk = compacted.(*chunkenc.XORChunk)
 		}
-		b.chunks[l-1].XORChunk = compacted.(*chunkenc.XORChunk)
 	}
 
 	chunk := chunkenc.NewXORChunk()

From 650624588007757cf5dec8c3c4a2a74cf196cb7e Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 20 Mar 2019 18:49:05 +0530
Subject: [PATCH 290/660] Fix table lookup when chunk time aligns with table

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 schema_config.go      |  2 +-
 schema_config_test.go | 98 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index 64e6476a62f1f..ec01b8415d00e 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -434,7 +434,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 // ChunkTableFor calculates the chunk table shard for a given point in time.
 func (cfg SchemaConfig) ChunkTableFor(t model.Time) (string, error) {
 	for i := range cfg.Configs {
-		if t > cfg.Configs[i].From && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
+		if t >= cfg.Configs[i].From && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
 			return cfg.Configs[i].ChunkTables.TableFor(t), nil
 		}
 	}
diff --git a/schema_config_test.go b/schema_config_test.go
index ef1c9bf230f2d..00f310136081d 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -3,8 +3,10 @@ package chunk
 import (
 	"reflect"
 	"testing"
+	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
 )
 
 func TestHourlyBuckets(t *testing.T) {
@@ -178,3 +180,99 @@ func TestDailyBuckets(t *testing.T) {
 		})
 	}
 }
+
+func TestChunkTableFor(t *testing.T) {
+	tablePeriod, err := time.ParseDuration("168h")
+	require.NoError(t, err)
+
+	periodConfigs := []PeriodConfig{
+		{
+			FromStr: "1970-01-01",
+			IndexTables: PeriodicTableConfig{
+				Prefix: "index_1_",
+				Period: tablePeriod,
+			},
+			ChunkTables: PeriodicTableConfig{
+				Prefix: "chunks_1_",
+				Period: tablePeriod,
+			},
+		},
+		{
+			FromStr: "2019-01-02",
+			IndexTables: PeriodicTableConfig{
+				Prefix: "index_2_",
+				Period: tablePeriod,
+			},
+			ChunkTables: PeriodicTableConfig{
+				Prefix: "chunks_2_",
+				Period: tablePeriod,
+			},
+		},
+		{
+			FromStr: "2019-03-06",
+			IndexTables: PeriodicTableConfig{
+				Prefix: "index_3_",
+				Period: tablePeriod,
+			},
+			ChunkTables: PeriodicTableConfig{
+				Prefix: "chunks_3_",
+				Period: tablePeriod,
+			},
+		},
+	}
+
+	for i, cfg := range periodConfigs {
+		ts, err := time.Parse("2006-01-02", cfg.FromStr)
+		require.NoError(t, err)
+
+		periodConfigs[i].From = model.TimeFromUnix(ts.Unix())
+	}
+
+	schemaCfg := SchemaConfig{
+		Configs: periodConfigs,
+	}
+
+	testCases := []struct {
+		timeStr    string // RFC3339
+		chunkTable string
+	}{
+		{
+			timeStr:    "1970-01-01T00:00:00Z",
+			chunkTable: "chunks_1_0",
+		},
+		{
+			timeStr:    "1970-01-01T00:00:01Z",
+			chunkTable: "chunks_1_0",
+		},
+		{
+			timeStr:    "2019-01-01T00:00:00Z",
+			chunkTable: "chunks_1_2556",
+		},
+		{
+			timeStr:    "2019-01-01T23:59:59Z",
+			chunkTable: "chunks_1_2556",
+		},
+		{
+			timeStr:    "2019-01-02T00:00:00Z",
+			chunkTable: "chunks_2_2556",
+		},
+		{
+			timeStr:    "2019-03-06T00:00:00Z",
+			chunkTable: "chunks_3_2565",
+		},
+		{
+			timeStr:    "2020-03-06T00:00:00Z",
+			chunkTable: "chunks_3_2618",
+		},
+	}
+
+	for _, tc := range testCases {
+		ts, err := time.Parse(time.RFC3339, tc.timeStr)
+		require.NoError(t, err)
+
+		table, err := schemaCfg.ChunkTableFor(model.TimeFromUnix(ts.Unix()))
+		require.NoError(t, err)
+
+		require.Equal(t, tc.chunkTable, table)
+	}
+}

From 157a6c59e21c2ffc5f335b4083d46134fbb666d1 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 28 Mar 2019 11:15:23 +0000
Subject: [PATCH 291/660] Size encoded chunk buffer correctly.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 encoding/bigchunk.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 29746f87e5670..0282c08e99763 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -161,8 +161,9 @@ func (b *bigchunk) Len() int {
 }
 
 func (b *bigchunk) Size() int {
-	sum := 0
+	sum := 2 // For the number of sub chunks.
 	for _, c := range b.chunks {
+		sum += 2 // For the length of the sub chunk.
 		sum += len(c.Bytes())
 	}
 	return sum

From 064eb05cadf68ae0964347cb429e381f3c70dccc Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Thu, 28 Mar 2019 10:22:04 +0000
Subject: [PATCH 292/660] Big change: Remove the label copying in QueryStream.

This change removes the use of LabelPair and wire.Bytes.  We directly marshal and unmarshal from the bytes on the wire into labels.Labels, via LabelsAdapater.

I've migrated everywhere I could find to these new types.  We can almost completely remove wire.Bytes, execpt for its use in the caching index proto.  So I moved the type there.

I'd draw attention to the use of yoloString and the use of unsafe for casting between []LabelAdapater and labels.Labels.  They should be safe as we take a copy of the label string in the inverted index.

Also:
- changed all occurances of `__name__` to model.MetricNameLabel.
- removed a bunch of unused functions for converting between old types.
- make the FromXXXToYYY naming consistent in compat.go.
- fork stdlib fnv32 hashing like we have with fnv64.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/bytes.go                   | 39 ++++++++++++++++++++++++++++++
 storage/caching_index_client.proto |  4 +--
 2 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 storage/bytes.go

diff --git a/storage/bytes.go b/storage/bytes.go
new file mode 100644
index 0000000000000..c4804995ff82a
--- /dev/null
+++ b/storage/bytes.go
@@ -0,0 +1,39 @@
+package storage
+
+import (
+	"bytes"
+)
+
+// Bytes exists to stop proto copying the byte array
+type Bytes []byte
+
+// Marshal just returns bs
+func (bs *Bytes) Marshal() ([]byte, error) {
+	return []byte(*bs), nil
+}
+
+// MarshalTo copies Bytes to data
+func (bs *Bytes) MarshalTo(data []byte) (n int, err error) {
+	return copy(data, *bs), nil
+}
+
+// Unmarshal updates Bytes to be data, without a copy
+func (bs *Bytes) Unmarshal(data []byte) error {
+	*bs = data
+	return nil
+}
+
+// Size returns the length of Bytes
+func (bs *Bytes) Size() int {
+	return len(*bs)
+}
+
+// Equal returns true if other equals Bytes
+func (bs *Bytes) Equal(other Bytes) bool {
+	return bytes.Equal(*bs, other)
+}
+
+// Compare Bytes to other
+func (bs *Bytes) Compare(other Bytes) int {
+	return bytes.Compare(*bs, other)
+}
diff --git a/storage/caching_index_client.proto b/storage/caching_index_client.proto
index 1c22c94c8ab51..2621db517bb45 100644
--- a/storage/caching_index_client.proto
+++ b/storage/caching_index_client.proto
@@ -8,8 +8,8 @@ option (gogoproto.marshaler_all) = true;
 option (gogoproto.unmarshaler_all) = true;
 
 message Entry {
-    bytes Column = 1 [(gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
-    bytes Value = 2 [(gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/util/wire.Bytes", (gogoproto.nullable) = false];
+    bytes Column = 1 [(gogoproto.customtype) = "Bytes", (gogoproto.nullable) = false];
+    bytes Value = 2 [(gogoproto.customtype) = "Bytes", (gogoproto.nullable) = false];
 }
 
 message ReadBatch {

From fdd3b65cba9c86ddcc2c8282c96c492e6ab9410b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 1 Apr 2019 17:41:24 +0530
Subject: [PATCH 293/660] Document cardinalityError usage

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 series_store.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/series_store.go b/series_store.go
index a6c837c35afdb..42328e210919e 100644
--- a/series_store.go
+++ b/series_store.go
@@ -210,6 +210,10 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 				ids = intersectStrings(ids, incoming)
 			}
 		case err := <-incomingErrors:
+			// The idea is that if we have 2 matchers, and if one returns a lot of
+			// series and the other returns only 10 (a few), we don't lookup the first one at all.
+			// We just manually filter through the 10 series again using "filterChunksByMatchers",
+			// saving us from looking up and intersecting a lot of series.
 			if err == errCardinalityExceeded {
 				cardinalityExceededErrors++
 			} else {
@@ -217,6 +221,8 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 			}
 		}
 	}
+
+	// But if every single matcher returns a lot of series, then it makes sense to abort the query.
 	if cardinalityExceededErrors == len(matchers) {
 		return nil, errCardinalityExceeded
 	} else if lastErr != nil {

From 582450cb98e562c39b2d480b0a4388bf62be9c49 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 3 Apr 2019 22:18:32 +0100
Subject: [PATCH 294/660] Limit number of days a query can be split into.  Use
 existing flag/limit from the Querier.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store.go b/chunk_store.go
index ff913c934e629..731064717912b 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -206,7 +206,7 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 
 	maxQueryLength := c.limits.MaxQueryLength(userID)
 	if maxQueryLength > 0 && (*through).Sub(from) > maxQueryLength {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, length > limit (%s > %s)", (*through).Sub(from), maxQueryLength)
+		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(from), maxQueryLength)
 	}
 
 	now := model.Now()

From 5950c92cc089a022464487231f081f78cd8fe149 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 5 Apr 2019 14:05:18 +0100
Subject: [PATCH 295/660] Make per-series query cardinality limit configurable
 per user.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go  | 2 --
 series_store.go | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 731064717912b..d021198d5f1af 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -60,7 +60,6 @@ type StoreConfig struct {
 	MinChunkAge              time.Duration
 	CardinalityCacheSize     int
 	CardinalityCacheValidity time.Duration
-	CardinalityLimit         int
 
 	CacheLookupsOlderThan time.Duration
 }
@@ -74,7 +73,6 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
 	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
-	f.IntVar(&cfg.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries.")
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 }
 
diff --git a/series_store.go b/series_store.go
index 42328e210919e..0a18ef71030f7 100644
--- a/series_store.go
+++ b/series_store.go
@@ -263,7 +263,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 			continue
 		}
 		cardinality := value.(int)
-		if cardinality > c.cfg.CardinalityLimit {
+		if cardinality > c.limits.CardinalityLimit(userID) {
 			return nil, errCardinalityExceeded
 		}
 	}
@@ -283,7 +283,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 	c.cardinalityCache.Put(ctx, keys, values)
 
-	if len(entries) > c.cfg.CardinalityLimit {
+	if len(entries) > c.limits.CardinalityLimit(userID) {
 		return nil, errCardinalityExceeded
 	}
 

From 0732974d77d1785f54fadf24c72139ca71383b08 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Fri, 5 Apr 2019 16:05:50 +0100
Subject: [PATCH 296/660] Use the index cache for cardinality limits.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go                       | 12 +++----
 series_store.go                      | 38 +++-------------------
 storage/caching_fixtures.go          | 15 ++++++++-
 storage/caching_index_client.go      | 40 ++++++++++++++++++-----
 storage/caching_index_client.proto   |  4 +++
 storage/caching_index_client_test.go | 47 ++++++++++++++++++----------
 storage/factory.go                   |  2 +-
 storage/index_client_test.go         |  9 +++---
 8 files changed, 96 insertions(+), 71 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index d021198d5f1af..83e45434c2857 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -57,23 +57,21 @@ type StoreConfig struct {
 	ChunkCacheConfig       cache.Config
 	WriteDedupeCacheConfig cache.Config
 
-	MinChunkAge              time.Duration
-	CardinalityCacheSize     int
-	CardinalityCacheValidity time.Duration
-
+	MinChunkAge           time.Duration
 	CacheLookupsOlderThan time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.ChunkCacheConfig.RegisterFlagsWithPrefix("", "Cache config for chunks. ", f)
-
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
-	f.IntVar(&cfg.CardinalityCacheSize, "store.cardinality-cache-size", 0, "Size of in-memory cardinality cache, 0 to disable.")
-	f.DurationVar(&cfg.CardinalityCacheValidity, "store.cardinality-cache-validity", 1*time.Hour, "Period for which entries in the cardinality cache are valid.")
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
+
+	// Deprecated.
+	f.Int("store.cardinality-cache-size", 0, "DEPRECATED. store.cardinality-cache.enable-fifocache and store.cardinality-cache.fifocache.size.")
+	f.Duration("store.cardinality-cache-validity", 1*time.Hour, "DEPRECATED. store.cardinality-cache.enable-fifocache and store.cardinality-cache.fifocache.duration.")
 }
 
 // store implements Store
diff --git a/series_store.go b/series_store.go
index 0a18ef71030f7..88c546751ea8a 100644
--- a/series_store.go
+++ b/series_store.go
@@ -23,7 +23,9 @@ import (
 )
 
 var (
-	errCardinalityExceeded = errors.New("cardinality limit exceeded")
+	// ErrCardinalityExceeded is returned when the user reads a row that
+	// is too large.
+	ErrCardinalityExceeded = errors.New("cardinality limit exceeded")
 
 	indexLookupsPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
@@ -57,8 +59,6 @@ var (
 // seriesStore implements Store
 type seriesStore struct {
 	store
-	cardinalityCache *cache.FifoCache
-
 	writeDedupeCache cache.Cache
 }
 
@@ -89,10 +89,6 @@ func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Ob
 			limits:  limits,
 			Fetcher: fetcher,
 		},
-		cardinalityCache: cache.NewFifoCache("cardinality", cache.FifoCacheConfig{
-			Size:     cfg.CardinalityCacheSize,
-			Validity: cfg.CardinalityCacheValidity,
-		}),
 		writeDedupeCache: writeDedupeCache,
 	}, nil
 }
@@ -214,7 +210,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 			// series and the other returns only 10 (a few), we don't lookup the first one at all.
 			// We just manually filter through the 10 series again using "filterChunksByMatchers",
 			// saving us from looking up and intersecting a lot of series.
-			if err == errCardinalityExceeded {
+			if err == ErrCardinalityExceeded {
 				cardinalityExceededErrors++
 			} else {
 				lastErr = err
@@ -224,7 +220,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 
 	// But if every single matcher returns a lot of series, then it makes sense to abort the query.
 	if cardinalityExceededErrors == len(matchers) {
-		return nil, errCardinalityExceeded
+		return nil, ErrCardinalityExceeded
 	} else if lastErr != nil {
 		return nil, lastErr
 	}
@@ -257,36 +253,12 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 	level.Debug(log).Log("queries", len(queries))
 
-	for _, query := range queries {
-		value, ok := c.cardinalityCache.Get(ctx, query.HashValue)
-		if !ok {
-			continue
-		}
-		cardinality := value.(int)
-		if cardinality > c.limits.CardinalityLimit(userID) {
-			return nil, errCardinalityExceeded
-		}
-	}
-
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
 	if err != nil {
 		return nil, err
 	}
 	level.Debug(log).Log("entries", len(entries))
 
-	// TODO This is not correct, will overcount for queries > 24hrs
-	keys := make([]string, 0, len(queries))
-	values := make([]interface{}, 0, len(queries))
-	for _, query := range queries {
-		keys = append(keys, query.HashValue)
-		values = append(values, len(entries))
-	}
-	c.cardinalityCache.Put(ctx, keys, values)
-
-	if len(entries) > c.limits.CardinalityLimit(userID) {
-		return nil, errCardinalityExceeded
-	}
-
 	ids, err := c.parseIndexEntries(ctx, entries, matcher)
 	if err != nil {
 		return nil, err
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index cc36d5cb27d8f..1469e92d75ed5 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -3,6 +3,9 @@ package storage
 import (
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/validation"
+
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 
@@ -16,11 +19,15 @@ type fixture struct {
 
 func (f fixture) Name() string { return "caching-store" }
 func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+	limits, err := defaultLimits()
+	if err != nil {
+		return nil, nil, nil, chunk.SchemaConfig{}, err
+	}
 	indexClient, objectClient, tableClient, schemaConfig, err := f.fixture.Clients()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		Size:     500,
 		Validity: 5 * time.Minute,
-	}), 5*time.Minute)
+	}), 5*time.Minute, limits)
 	return indexClient, objectClient, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
@@ -29,3 +36,9 @@ func (f fixture) Teardown() error { return f.fixture.Teardown() }
 var Fixtures = []testutils.Fixture{
 	fixture{gcp.Fixtures[0]},
 }
+
+func defaultLimits() (*validation.Overrides, error) {
+	var defaults validation.Limits
+	flagext.DefaultValues(&defaults)
+	return validation.NewOverrides(defaults)
+}
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index fcb9a02204a1d..971f9ad7378ef 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -5,15 +5,18 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-kit/kit/log/level"
+	proto "github.com/golang/protobuf/proto"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/weaveworks/common/user"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
-	"github.com/go-kit/kit/log/level"
-	proto "github.com/golang/protobuf/proto"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 var (
@@ -43,9 +46,10 @@ type cachingIndexClient struct {
 	chunk.IndexClient
 	cache    cache.Cache
 	validity time.Duration
+	limits   *validation.Overrides
 }
 
-func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration) chunk.IndexClient {
+func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits *validation.Overrides) chunk.IndexClient {
 	if c == nil {
 		return client
 	}
@@ -54,6 +58,7 @@ func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity tim
 		IndexClient: client,
 		cache:       cache.NewSnappy(c),
 		validity:    validity,
+		limits:      limits,
 	}
 }
 
@@ -65,6 +70,12 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
 
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return err
+	}
+	cardinalityLimit := int32(s.limits.CardinalityLimit(userID))
+
 	// Build list of keys to lookup in the cache.
 	keys := make([]string, 0, len(queries))
 	queriesByKey := make(map[string][]chunk.IndexQuery, len(queries))
@@ -76,6 +87,10 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 
 	batches, misses := s.cacheFetch(ctx, keys)
 	for _, batch := range batches {
+		if cardinalityLimit > 0 && batch.Cardinality > cardinalityLimit {
+			return chunk.ErrCardinalityExceeded
+		}
+
 		queries := queriesByKey[batch.Key]
 		for _, query := range queries {
 			callback(query, batch)
@@ -115,7 +130,7 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 		results[key] = rb
 	}
 
-	err := s.IndexClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
+	err = s.IndexClient.QueryPages(ctx, cacheableMissed, func(cacheableQuery chunk.IndexQuery, r chunk.ReadBatch) bool {
 		resultsMtx.Lock()
 		defer resultsMtx.Unlock()
 		key := queryKey(cacheableQuery)
@@ -135,9 +150,20 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 		defer resultsMtx.Unlock()
 		keys := make([]string, 0, len(results))
 		batches := make([]ReadBatch, 0, len(results))
+		var cadinalityErr error
 		for key, batch := range results {
+			cardinality := int32(len(batch.Entries))
+			if cardinalityLimit > 0 && cardinality > cardinalityLimit {
+				batch.Cardinality = cardinality
+				batch.Entries = nil
+				cadinalityErr = chunk.ErrCardinalityExceeded
+			}
+
 			keys = append(keys, key)
 			batches = append(batches, batch)
+			if cadinalityErr != nil {
+				continue
+			}
 
 			queries := queriesByKey[key]
 			for _, query := range queries {
@@ -145,8 +171,8 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 			}
 		}
 		s.cacheStore(ctx, keys, batches)
+		return cadinalityErr
 	}
-	return nil
 }
 
 // Iterator implements chunk.ReadBatch.
diff --git a/storage/caching_index_client.proto b/storage/caching_index_client.proto
index 2621db517bb45..22a9d01ffaff4 100644
--- a/storage/caching_index_client.proto
+++ b/storage/caching_index_client.proto
@@ -18,4 +18,8 @@ message ReadBatch {
 
     // The time at which the key expires.
     int64 expiry = 3;
+
+    // The number of entries; used for cardinality limiting.
+    // entries will be empty when this is set.
+    int32 cardinality = 4;
 }
diff --git a/storage/caching_index_client_test.go b/storage/caching_index_client_test.go
index 7f02b729599d3..6f92690d7dc68 100644
--- a/storage/caching_index_client_test.go
+++ b/storage/caching_index_client_test.go
@@ -9,8 +9,11 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/user"
 )
 
+var ctx = user.InjectOrgID(context.Background(), "1")
+
 type mockStore struct {
 	chunk.IndexClient
 	queries int
@@ -34,20 +37,22 @@ func TestCachingStorageClientBasic(t *testing.T) {
 			}},
 		},
 	}
+	limits, err := defaultLimits()
+	require.NoError(t, err)
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
 		HashValue: "baz",
 	}}
-	err := client.QueryPages(context.Background(), queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
 		return true
 	})
 	require.NoError(t, err)
 	assert.EqualValues(t, 1, store.queries)
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
-	err = client.QueryPages(context.Background(), queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(_ chunk.IndexQuery, _ chunk.ReadBatch) bool {
 		return true
 	})
 	require.NoError(t, err)
@@ -63,15 +68,17 @@ func TestTempCachingStorageClient(t *testing.T) {
 			}},
 		},
 	}
+	limits, err := defaultLimits()
+	require.NoError(t, err)
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 100*time.Millisecond)
+	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
 		{TableName: "table", HashValue: "bar"},
 		{TableName: "table", HashValue: "baz"},
 	}
 	results := 0
-	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -84,7 +91,7 @@ func TestTempCachingStorageClient(t *testing.T) {
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
 	results = 0
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -98,7 +105,7 @@ func TestTempCachingStorageClient(t *testing.T) {
 	// If we do the query after validity, it should see the queries.
 	time.Sleep(100 * time.Millisecond)
 	results = 0
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -119,15 +126,17 @@ func TestPermCachingStorageClient(t *testing.T) {
 			}},
 		},
 	}
+	limits, err := defaultLimits()
+	require.NoError(t, err)
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 100*time.Millisecond)
+	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", Immutable: true},
 		{TableName: "table", HashValue: "bar", Immutable: true},
 		{TableName: "table", HashValue: "baz", Immutable: true},
 	}
 	results := 0
-	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -140,7 +149,7 @@ func TestPermCachingStorageClient(t *testing.T) {
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
 	results = 0
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -154,7 +163,7 @@ func TestPermCachingStorageClient(t *testing.T) {
 	// If we do the query after validity, it still shouldn't see the queries.
 	time.Sleep(200 * time.Millisecond)
 	results = 0
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results++
@@ -168,10 +177,12 @@ func TestPermCachingStorageClient(t *testing.T) {
 
 func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	store := &mockStore{}
+	limits, err := defaultLimits()
+	require.NoError(t, err)
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
-	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		assert.False(t, batch.Iterator().Next())
 		return true
 	})
@@ -179,7 +190,7 @@ func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	assert.EqualValues(t, 1, store.queries)
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		assert.False(t, batch.Iterator().Next())
 		return true
 	})
@@ -204,15 +215,17 @@ func TestCachingStorageClientCollision(t *testing.T) {
 			},
 		},
 	}
+	limits, err := defaultLimits()
+	require.NoError(t, err)
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("baz")},
 	}
 
 	var results ReadBatch
-	err := client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results.Entries = append(results.Entries, Entry{
@@ -228,7 +241,7 @@ func TestCachingStorageClientCollision(t *testing.T) {
 
 	// If we do the query to the cache again, the underlying store shouldn't see it.
 	results = ReadBatch{}
-	err = client.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		iter := batch.Iterator()
 		for iter.Next() {
 			results.Entries = append(results.Entries, Entry{
diff --git a/storage/factory.go b/storage/factory.go
index d45ed09de02e7..a0fd1b41406ac 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -98,7 +98,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating index client")
 		}
-		index = newCachingIndexClient(index, tieredCache, cfg.IndexCacheValidity)
+		index = newCachingIndexClient(index, tieredCache, cfg.IndexCacheValidity, limits)
 
 		objectStoreType := s.ObjectType
 		if objectStoreType == "" {
diff --git a/storage/index_client_test.go b/storage/index_client_test.go
index 08291d73a3ef2..dc4731f528ef3 100644
--- a/storage/index_client_test.go
+++ b/storage/index_client_test.go
@@ -1,7 +1,6 @@
 package storage
 
 import (
-	"context"
 	"fmt"
 	"testing"
 
@@ -16,7 +15,7 @@ func TestIndexBasic(t *testing.T) {
 		for i := 0; i < 30; i++ {
 			batch.Add(tableName, fmt.Sprintf("hash%d", i), []byte(fmt.Sprintf("range%d", i)), nil)
 		}
-		err := client.BatchWrite(context.Background(), batch)
+		err := client.BatchWrite(ctx, batch)
 		require.NoError(t, err)
 
 		// Make sure we get back the correct entries by hash value.
@@ -28,7 +27,7 @@ func TestIndexBasic(t *testing.T) {
 				},
 			}
 			var have []chunk.IndexEntry
-			err := client.QueryPages(context.Background(), entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+			err := client.QueryPages(ctx, entries, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
 				iter := read.Iterator()
 				for iter.Next() {
 					have = append(have, chunk.IndexEntry{
@@ -103,7 +102,7 @@ func TestQueryPages(t *testing.T) {
 			batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
 		}
 
-		err := client.BatchWrite(context.Background(), batch)
+		err := client.BatchWrite(ctx, batch)
 		require.NoError(t, err)
 
 		tests := []struct {
@@ -170,7 +169,7 @@ func TestQueryPages(t *testing.T) {
 				run := true
 				for run {
 					var have []chunk.IndexEntry
-					err = client.QueryPages(context.Background(), []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+					err = client.QueryPages(ctx, []chunk.IndexQuery{tt.query}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
 						iter := read.Iterator()
 						for iter.Next() {
 							have = append(have, chunk.IndexEntry{

From c1713e9a56c999a381bb637545dd86b08cdbef0e Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 7 Apr 2019 09:11:11 +0100
Subject: [PATCH 297/660] Review feedback: warn when trying to use a deprecated
 flag.  Typo.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store.go                  | 5 +++--
 storage/caching_index_client.go | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 83e45434c2857..e6a24d0137b1a 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -19,6 +19,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/weaveworks/common/httpgrpc"
@@ -70,8 +71,8 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 
 	// Deprecated.
-	f.Int("store.cardinality-cache-size", 0, "DEPRECATED. store.cardinality-cache.enable-fifocache and store.cardinality-cache.fifocache.size.")
-	f.Duration("store.cardinality-cache-validity", 1*time.Hour, "DEPRECATED. store.cardinality-cache.enable-fifocache and store.cardinality-cache.fifocache.duration.")
+	flagext.DeprecatedFlag(f, "store.cardinality-cache-size", "DEPRECATED. Use store.index-cache-size.enable-fifocache and store.cardinality-cache.fifocache.size instead.")
+	flagext.DeprecatedFlag(f, "store.cardinality-cache-validity", "DEPRECATED. Use store.index-cache-size.enable-fifocache and store.cardinality-cache.fifocache.duration instead.")
 }
 
 // store implements Store
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 971f9ad7378ef..50db6868ea3bb 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -150,18 +150,18 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 		defer resultsMtx.Unlock()
 		keys := make([]string, 0, len(results))
 		batches := make([]ReadBatch, 0, len(results))
-		var cadinalityErr error
+		var cardinalityErr error
 		for key, batch := range results {
 			cardinality := int32(len(batch.Entries))
 			if cardinalityLimit > 0 && cardinality > cardinalityLimit {
 				batch.Cardinality = cardinality
 				batch.Entries = nil
-				cadinalityErr = chunk.ErrCardinalityExceeded
+				cardinalityErr = chunk.ErrCardinalityExceeded
 			}
 
 			keys = append(keys, key)
 			batches = append(batches, batch)
-			if cadinalityErr != nil {
+			if cardinalityErr != nil {
 				continue
 			}
 
@@ -171,7 +171,7 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 			}
 		}
 		s.cacheStore(ctx, keys, batches)
-		return cadinalityErr
+		return cardinalityErr
 	}
 }
 

From 9b4a55d2e9dfbb6b57767c3ddafc29584d328990 Mon Sep 17 00:00:00 2001
From: zhoulin xie <zhoulin.xie@daocloud.io>
Date: Wed, 10 Apr 2019 23:18:16 +0800
Subject: [PATCH 298/660] Fix variable spelling error

Signed-off-by: zhoulin xie <zhoulin.xie@daocloud.io>
---
 gcp/bigtable_object_client.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index a43980bde1ac0..adf155f6c5e5f 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -114,7 +114,7 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 				decodeContext := chunk.NewDecodeContext()
 
 				var processingErr error
-				var recievedChunks = 0
+				var receivedChunks = 0
 
 				// rows are returned in key order, not order in row list
 				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
@@ -130,7 +130,7 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 						return false
 					}
 
-					recievedChunks++
+					receivedChunks++
 					outs <- chunk
 					return true
 				})
@@ -139,8 +139,8 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 					errs <- processingErr
 				} else if err != nil {
 					errs <- errors.WithStack(err)
-				} else if recievedChunks < len(page) {
-					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for Bigtable, received %d", len(page), recievedChunks))
+				} else if receivedChunks < len(page) {
+					errs <- errors.WithStack(fmt.Errorf("Asked for %d chunks for Bigtable, received %d", len(page), receivedChunks))
 				}
 			}(page)
 		}

From 1b31adbc1825c6f7ed9175a6913fc0a001428001 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 10 Apr 2019 18:11:09 +0100
Subject: [PATCH 299/660] Include metric name, label name, number of entries
 and limit in cardinality errors.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 series_store.go                 | 32 ++++++++++++++++++++++++--------
 storage/caching_fixtures.go     |  1 +
 storage/caching_index_client.go | 10 ++++++++--
 storage/index_client_test.go    | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 10 deletions(-)

diff --git a/series_store.go b/series_store.go
index 88c546751ea8a..e7dde42a33ade 100644
--- a/series_store.go
+++ b/series_store.go
@@ -7,7 +7,6 @@ import (
 	"net/http"
 
 	"github.com/go-kit/kit/log/level"
-	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -22,11 +21,19 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
-var (
-	// ErrCardinalityExceeded is returned when the user reads a row that
-	// is too large.
-	ErrCardinalityExceeded = errors.New("cardinality limit exceeded")
+// CardinalityExceededError is returned when the user reads a row that
+// is too large.
+type CardinalityExceededError struct {
+	MetricName, LabelName string
+	Size, Limit           int32
+}
+
+func (e CardinalityExceededError) Error() string {
+	return fmt.Sprintf("cardinality limit exceeded for %s{%s}; %d entries, more than limit of %d",
+		e.MetricName, e.LabelName, e.Size, e.Limit)
+}
 
+var (
 	indexLookupsPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_index_lookups_per_query",
@@ -196,6 +203,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	var preIntersectionCount int
 	var lastErr error
 	var cardinalityExceededErrors int
+	var cardinalityExceededError CardinalityExceededError
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingIDs:
@@ -210,8 +218,9 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 			// series and the other returns only 10 (a few), we don't lookup the first one at all.
 			// We just manually filter through the 10 series again using "filterChunksByMatchers",
 			// saving us from looking up and intersecting a lot of series.
-			if err == ErrCardinalityExceeded {
+			if e, ok := err.(CardinalityExceededError); ok {
 				cardinalityExceededErrors++
+				cardinalityExceededError = e
 			} else {
 				lastErr = err
 			}
@@ -220,7 +229,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 
 	// But if every single matcher returns a lot of series, then it makes sense to abort the query.
 	if cardinalityExceededErrors == len(matchers) {
-		return nil, ErrCardinalityExceeded
+		return nil, cardinalityExceededError
 	} else if lastErr != nil {
 		return nil, lastErr
 	}
@@ -241,11 +250,14 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 
 	var queries []IndexQuery
+	var labelName string
 	if matcher == nil {
 		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, model.LabelValue(metricName))
 	} else if matcher.Type != labels.MatchEqual {
+		labelName = matcher.Name
 		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name))
 	} else {
+		labelName = matcher.Name
 		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name), model.LabelValue(matcher.Value))
 	}
 	if err != nil {
@@ -254,7 +266,11 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	level.Debug(log).Log("queries", len(queries))
 
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
-	if err != nil {
+	if e, ok := err.(CardinalityExceededError); ok {
+		e.MetricName = metricName
+		e.LabelName = labelName
+		return nil, e
+	} else if err != nil {
 		return nil, err
 	}
 	level.Debug(log).Log("entries", len(entries))
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 1469e92d75ed5..47e90fe44e572 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -40,5 +40,6 @@ var Fixtures = []testutils.Fixture{
 func defaultLimits() (*validation.Overrides, error) {
 	var defaults validation.Limits
 	flagext.DefaultValues(&defaults)
+	defaults.CardinalityLimit = 5
 	return validation.NewOverrides(defaults)
 }
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 50db6868ea3bb..38381b297eee7 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -88,7 +88,10 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 	batches, misses := s.cacheFetch(ctx, keys)
 	for _, batch := range batches {
 		if cardinalityLimit > 0 && batch.Cardinality > cardinalityLimit {
-			return chunk.ErrCardinalityExceeded
+			return chunk.CardinalityExceededError{
+				Size:  batch.Cardinality,
+				Limit: cardinalityLimit,
+			}
 		}
 
 		queries := queriesByKey[batch.Key]
@@ -156,7 +159,10 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 			if cardinalityLimit > 0 && cardinality > cardinalityLimit {
 				batch.Cardinality = cardinality
 				batch.Entries = nil
-				cardinalityErr = chunk.ErrCardinalityExceeded
+				cardinalityErr = chunk.CardinalityExceededError{
+					Size:  cardinality,
+					Limit: cardinalityLimit,
+				}
 			}
 
 			keys = append(keys, key)
diff --git a/storage/index_client_test.go b/storage/index_client_test.go
index dc4731f528ef3..255097ae5cb53 100644
--- a/storage/index_client_test.go
+++ b/storage/index_client_test.go
@@ -2,7 +2,11 @@ package storage
 
 import (
 	"fmt"
+	"strconv"
 	"testing"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/stretchr/testify/require"
@@ -194,3 +198,32 @@ func TestQueryPages(t *testing.T) {
 		}
 	})
 }
+
+func TestCardinalityLimit(t *testing.T) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
+		limits, err := defaultLimits()
+		require.NoError(t, err)
+
+		client = newCachingIndexClient(client, cache.NewMockCache(), time.Minute, limits)
+		batch := client.NewWriteBatch()
+		for i := 0; i < 10; i++ {
+			batch.Add(tableName, "bar", []byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))
+		}
+		err = client.BatchWrite(ctx, batch)
+		require.NoError(t, err)
+
+		var have int
+		err = client.QueryPages(ctx, []chunk.IndexQuery{{
+			TableName: tableName,
+			HashValue: "bar",
+		}}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+			iter := read.Iterator()
+			for iter.Next() {
+				have++
+			}
+			return true
+		})
+		require.Error(t, err, "cardinality limit exceeded for {}; 10 entries, more than limit of 5")
+		require.Equal(t, 0, have)
+	})
+}

From 86e89f9990fe2cdb770015f6bb65cbcd44de9e8d Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 10 Apr 2019 18:11:19 +0100
Subject: [PATCH 300/660] Whitespace.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_test.go | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 97d63930abfcc..0602441b5e514 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -47,9 +47,7 @@ var stores = []struct {
 	{
 		name: "store",
 		configFn: func() StoreConfig {
-			var (
-				storeCfg StoreConfig
-			)
+			var storeCfg StoreConfig
 			flagext.DefaultValues(&storeCfg)
 			return storeCfg
 		},
@@ -57,15 +55,11 @@ var stores = []struct {
 	{
 		name: "cached_store",
 		configFn: func() StoreConfig {
-			var (
-				storeCfg StoreConfig
-			)
+			var storeCfg StoreConfig
 			flagext.DefaultValues(&storeCfg)
-
 			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
 				Size: 500,
 			})
-
 			return storeCfg
 		},
 	},
@@ -73,9 +67,7 @@ var stores = []struct {
 
 // newTestStore creates a new Store for testing.
 func newTestChunkStore(t *testing.T, schemaName string) Store {
-	var (
-		storeCfg StoreConfig
-	)
+	var storeCfg StoreConfig
 	flagext.DefaultValues(&storeCfg)
 	return newTestChunkStoreConfig(t, schemaName, storeCfg)
 }

From 85b1e78cbfd2ba36c727bb2d6d5254b6e51e808b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 10 Apr 2019 18:27:59 +0100
Subject: [PATCH 301/660] Sort imports.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/index_client_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/index_client_test.go b/storage/index_client_test.go
index 255097ae5cb53..8f88721ecc409 100644
--- a/storage/index_client_test.go
+++ b/storage/index_client_test.go
@@ -6,10 +6,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/stretchr/testify/require"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestIndexBasic(t *testing.T) {

From fa34a58a118a3e3e6e7023a7386fb911459a4ead Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 12 Apr 2019 15:46:35 +0530
Subject: [PATCH 302/660] Make sure the deprecation points to right flag

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index e6a24d0137b1a..cb119f4ed7c35 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -71,8 +71,8 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 
 	// Deprecated.
-	flagext.DeprecatedFlag(f, "store.cardinality-cache-size", "DEPRECATED. Use store.index-cache-size.enable-fifocache and store.cardinality-cache.fifocache.size instead.")
-	flagext.DeprecatedFlag(f, "store.cardinality-cache-validity", "DEPRECATED. Use store.index-cache-size.enable-fifocache and store.cardinality-cache.fifocache.duration instead.")
+	flagext.DeprecatedFlag(f, "store.cardinality-cache-size", "DEPRECATED. Use store.index-cache-read.enable-fifocache and store.index-cache-read.fifocache.size instead.")
+	flagext.DeprecatedFlag(f, "store.cardinality-cache-validity", "DEPRECATED. Use store.index-cache-read.enable-fifocache and store.index-cache-read.fifocache.duration instead.")
 }
 
 // store implements Store

From 8c3f38abb7d7788c04018daad8e0e2497cfe55d0 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 12 Apr 2019 16:13:42 +0530
Subject: [PATCH 303/660] querier: Nuke deprecated flags

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/bigtable_index_client.go | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 05417d4650556..0dfe256d8af21 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -46,9 +46,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
 
 	cfg.GRPCClientConfig.RegisterFlags("bigtable", f)
-
-	// Deprecated.
-	f.Int("bigtable.max-recv-msg-size", 100<<20, "DEPRECATED. Bigtable grpc max receive message size.")
 }
 
 // storageClientColumnKey implements chunk.storageClient for GCP.

From 4a4b6d29998fd850489bb9c0fb02ac742d534a2a Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 15 Apr 2019 19:22:27 +0530
Subject: [PATCH 304/660] Nuke index cache deprecated flags.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go      | 34 +++-------------------------------
 storage/factory_test.go |  1 -
 2 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index a0fd1b41406ac..463eddc17aae3 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -28,9 +28,7 @@ type Config struct {
 	BoltDBConfig           local.BoltDBConfig `yaml:"boltdb"`
 	FSConfig               local.FSConfig     `yaml:"filesystem"`
 
-	IndexCacheSize     int
 	IndexCacheValidity time.Duration
-	memcacheClient     cache.MemcachedClientConfig
 
 	indexQueriesCacheConfig cache.Config
 }
@@ -44,10 +42,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.BoltDBConfig.RegisterFlags(f)
 	cfg.FSConfig.RegisterFlags(f)
 
-	// Deprecated flags!!
-	f.IntVar(&cfg.IndexCacheSize, "store.index-cache-size", 0, "Deprecated: Use -store.index-cache-read.*; Size of in-memory index cache, 0 to disable.")
-	cfg.memcacheClient.RegisterFlagsWithPrefix("index.", "Deprecated: Use -store.index-cache-read.*;", f)
-
 	cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }
@@ -56,31 +50,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits *validation.Overrides) (chunk.Store, error) {
 	var err error
 
-	// Building up from deprecated flags.
-	var caches []cache.Cache
-	if cfg.IndexCacheSize > 0 {
-		fifocache := cache.Instrument("fifo-index", cache.NewFifoCache("index", cache.FifoCacheConfig{Size: cfg.IndexCacheSize}))
-		caches = append(caches, fifocache)
-	}
-	if cfg.memcacheClient.Host != "" {
-		client := cache.NewMemcachedClient(cfg.memcacheClient)
-		memcache := cache.Instrument("memcache-index", cache.NewMemcached(cache.MemcachedConfig{
-			Expiration: cfg.IndexCacheValidity,
-		}, client, "memcache-index"))
-		caches = append(caches, cache.NewBackground("memcache-index", cache.BackgroundConfig{
-			WriteBackGoroutines: 10,
-			WriteBackBuffer:     100,
-		}, memcache))
-	}
-
-	var tieredCache cache.Cache
-	if len(caches) > 0 {
-		tieredCache = cache.NewTiered(caches)
-	} else {
-		tieredCache, err = cache.New(cfg.indexQueriesCacheConfig)
-		if err != nil {
-			return nil, err
-		}
+	tieredCache, err := cache.New(cfg.indexQueriesCacheConfig)
+	if err != nil {
+		return nil, err
 	}
 
 	// Cache is shared by multiple stores, which means they will try and Stop
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 7aed255ddbb09..2ea0f4ad5f491 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -29,7 +29,6 @@ func TestFactoryStop(t *testing.T) {
 			IndexType: "inmemory",
 		},
 	}
-	cfg.memcacheClient.Host = "localhost" // Fake address that should at least resolve.
 
 	limits, err := validation.NewOverrides(defaults)
 	require.NoError(t, err)

From 63dffe67b34c8bb6c94f90071dd02c40376b3b6d Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 16 Apr 2019 12:19:56 +0530
Subject: [PATCH 305/660] Remove a very spammy debug line

With us caching entries for 15mins only and doing client side
validation after introducing permanent caching of older entries, this
debug is expected to print for millions of keys. It's not a very useful
line.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/caching_index_client.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 50db6868ea3bb..c4df850b885b0 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -276,7 +276,6 @@ func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (bat
 		}
 
 		if readBatch.Expiry != 0 && time.Now().After(time.Unix(0, readBatch.Expiry)) {
-			level.Debug(log).Log("msg", "dropping index cache entry due to expiration", "key", key, "readBatch.Key", readBatch.Key, "expiry", time.Unix(0, readBatch.Expiry))
 			continue
 		}
 

From 50b8ef64fad06aa369c25524bba7eadd798afd4c Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 19 Apr 2019 15:51:24 -0400
Subject: [PATCH 306/660] split the query validation in two (time/limit and
 matcher)

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index e6a24d0137b1a..5243f87f429a8 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -188,22 +188,22 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	return c.getMetricNameChunks(ctx, from, through, matchers, metricName)
 }
 
-func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
-	log, ctx := spanlogger.New(ctx, "store.validateQuery")
+func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, through *model.Time) (bool, error) {
+	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
 
 	if *through < from {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
+		return false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
 	}
 
 	userID, err := user.ExtractOrgID(ctx)
 	if err != nil {
-		return "", nil, false, err
+		return false, err
 	}
 
 	maxQueryLength := c.limits.MaxQueryLength(userID)
 	if maxQueryLength > 0 && (*through).Sub(from) > maxQueryLength {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(from), maxQueryLength)
+		return false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(from), maxQueryLength)
 	}
 
 	now := model.Now()
@@ -211,12 +211,12 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
 		level.Error(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
-		return "", nil, true, nil
+		return true, nil
 	}
 
 	if from.After(now.Add(-c.cfg.MinChunkAge)) {
 		// no data relevant to this query will have arrived at the store yet
-		return "", nil, true, nil
+		return true, nil
 	}
 
 	if through.After(now.Add(5 * time.Minute)) {
@@ -225,6 +225,21 @@ func (c *store) validateQuery(ctx context.Context, from model.Time, through *mod
 		*through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 
+	return false, nil
+}
+
+func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
+	log, ctx := spanlogger.New(ctx, "store.validateQuery")
+	defer log.Span.Finish()
+
+	shortcut, err := c.validateQueryTimeRange(ctx, from, through)
+	if err != nil {
+		return "", nil, false, err
+	}
+	if shortcut {
+		return "", nil, true, nil
+	}
+
 	// Check there is a metric name matcher of type equal,
 	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(matchers)
 	if !ok || metricNameMatcher.Type != labels.MatchEqual {

From bbc7ce371a9fd1b8ed6cb3ccb6792ce62b184272 Mon Sep 17 00:00:00 2001
From: Neeraj Poddar <neeraj@aspenmesh.io>
Date: Mon, 22 Apr 2019 16:10:41 -0600
Subject: [PATCH 307/660] Updated table deletes to ignore empty prefixes

Signed-off-by: Neeraj Poddar <neeraj@aspenmesh.io>
---
 table_manager.go | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 4c99a77fc9abc..507533bba2ef5 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -333,8 +333,12 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 		// Ensure we only delete tables which have a prefix managed by Cortex.
 		tablePrefixes := map[string]struct{}{}
 		for _, cfg := range m.schemaCfg.Configs {
-			tablePrefixes[cfg.IndexTables.Prefix] = struct{}{}
-			tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{}
+			if cfg.IndexTables.Prefix != "" {
+				tablePrefixes[cfg.IndexTables.Prefix] = struct{}{}
+			}
+			if cfg.ChunkTables.Prefix != "" {
+				tablePrefixes[cfg.ChunkTables.Prefix] = struct{}{}
+			}
 		}
 
 		for existingTable := range existingTables {

From 7d8510eca0522736e41ddea9a79735087c0801af Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 23 Apr 2019 13:49:31 +0000
Subject: [PATCH 308/660] Flip the ordering of schema, store and query so tests
 run faster

By running the different test cases as the inner loop we can re-use
the same store multiple times, saving a lot of CPU.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store_test.go | 60 +++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 97d63930abfcc..bf98988f25db4 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -173,7 +173,7 @@ func TestChunkStore_Get(t *testing.T) {
 	barSampleStream2, err := createSampleStreamFrom(barChunk2)
 	require.NoError(t, err)
 
-	for _, tc := range []struct {
+	testCases := []struct {
 		query  string
 		expect model.Matrix
 	}{
@@ -245,24 +245,25 @@ func TestChunkStore_Get(t *testing.T) {
 			`{__name__=~"bar", bar="baz",toms!="code"}`,
 			model.Matrix{barSampleStream1},
 		},
-	} {
-		for _, schema := range schemas {
-			for _, storeCase := range stores {
+	}
+	for _, schema := range schemas {
+		for _, storeCase := range stores {
+			storeCfg := storeCase.configFn()
+			store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+			defer store.Stop()
+
+			if err := store.Put(ctx, []Chunk{
+				fooChunk1,
+				fooChunk2,
+				barChunk1,
+				barChunk2,
+			}); err != nil {
+				t.Fatal(err)
+			}
+
+			for _, tc := range testCases {
 				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
 					t.Log("========= Running query", tc.query, "with schema", schema.name)
-					storeCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
-					defer store.Stop()
-
-					if err := store.Put(ctx, []Chunk{
-						fooChunk1,
-						fooChunk2,
-						barChunk1,
-						barChunk2,
-					}); err != nil {
-						t.Fatal(err)
-					}
-
 					matchers, err := promql.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)
@@ -326,7 +327,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 		"toms":                "code",
 	})
 
-	for _, tc := range []struct {
+	testCases := []struct {
 		query  string
 		expect []Chunk
 	}{
@@ -366,19 +367,20 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			`foo{toms="code", bar="baz"}`,
 			[]Chunk{chunk1},
 		},
-	} {
-		for _, schema := range schemas {
-			for _, storeCase := range stores {
-				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
-					t.Log("========= Running query", tc.query, "with schema", schema.name)
-					storeCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
-					defer store.Stop()
+	}
+	for _, schema := range schemas {
+		for _, storeCase := range stores {
+			storeCfg := storeCase.configFn()
+			store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+			defer store.Stop()
 
-					if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
-						t.Fatal(err)
-					}
+			if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
+				t.Fatal(err)
+			}
 
+			for _, tc := range testCases {
+				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running query", tc.query, "with schema", schema.name)
 					matchers, err := promql.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)

From ce4085e142a79337bc23e0dc68ec3dfec5578f94 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 19 Apr 2019 15:55:46 -0400
Subject: [PATCH 309/660] Adds the ability the fetch label values from the
 store

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go          |  42 +++++++++++++++
 chunk_store_test.go     | 112 ++++++++++++++++++++++++++++++++++++++++
 composite_store.go      |  15 ++++++
 composite_store_test.go |   3 ++
 4 files changed, 172 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index 5243f87f429a8..11b1f321d72e6 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -188,6 +188,48 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	return c.getMetricNameChunks(ctx, from, through, matchers, metricName)
 }
 
+// LabelValuesForMetricName retrieves all label values for a single label name and metric name.
+func (c *store) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName, labelName string) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelValues")
+	defer log.Span.Finish()
+	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "labelName", labelName)
+
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	shortcut, err := c.validateQueryTimeRange(ctx, from, &through)
+	if err != nil {
+		return nil, err
+	} else if shortcut {
+		return nil, nil
+	}
+
+	queries, err := c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(labelName))
+	if err != nil {
+		return nil, err
+	}
+
+	entries, err := c.lookupEntriesByQueries(ctx, queries)
+	if err != nil {
+		return nil, err
+	}
+
+	var result []string
+	for _, entry := range entries {
+		_, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, string(labelValue))
+	}
+
+	sort.Strings(result)
+	result = uniqueStrings(result)
+	return result, nil
+}
+
 func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, through *model.Time) (bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 97d63930abfcc..faf98d119f460 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -310,6 +310,118 @@ func TestChunkStore_Get(t *testing.T) {
 	}
 }
 
+func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	now := model.Now()
+
+	fooMetric1 := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":                 "baz",
+		"toms":                "code",
+		"flip":                "flop",
+	}
+	fooMetric2 := model.Metric{
+		model.MetricNameLabel: "foo",
+		"bar":                 "beep",
+		"toms":                "code",
+	}
+	fooMetric3 := model.Metric{
+		model.MetricNameLabel: "foo",
+		"flip":                "flap",
+		"bar":                 "bop",
+	}
+
+	// barMetric1 is a subset of barMetric2 to test over-matching bug.
+	barMetric1 := model.Metric{
+		model.MetricNameLabel: "bar",
+		"bar":                 "baz",
+	}
+	barMetric2 := model.Metric{
+		model.MetricNameLabel: "bar",
+		"bar":                 "baz",
+		"toms":                "code",
+	}
+
+	fooChunk1 := dummyChunkFor(now, fooMetric1)
+	fooChunk2 := dummyChunkFor(now, fooMetric2)
+	fooChunk3 := dummyChunkFor(now, fooMetric3)
+
+	barChunk1 := dummyChunkFor(now, barMetric1)
+	barChunk2 := dummyChunkFor(now, barMetric2)
+
+	for _, tc := range []struct {
+		metricName, labelName string
+		expect                []string
+	}{
+		{
+			`foo`, `bar`,
+			[]string{"baz", "beep", "bop"},
+		},
+		{
+			`bar`, `toms`,
+			[]string{"code"},
+		},
+		{
+			`bar`, `bar`,
+			[]string{"baz"},
+		},
+		{
+			`foo`, `foo`,
+			nil,
+		},
+		{
+			`foo`, `flip`,
+			[]string{"flap", "flop"},
+		},
+	} {
+		for _, schema := range schemas {
+			for _, storeCase := range stores {
+				t.Run(fmt.Sprintf("%s / %s / %s / %s", tc.metricName, tc.labelName, schema.name, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running labelValues with metricName", tc.metricName, "with labelName", tc.labelName, "with schema", schema.name)
+					storeCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+					defer store.Stop()
+
+					if err := store.Put(ctx, []Chunk{
+						fooChunk1,
+						fooChunk2,
+						fooChunk3,
+						barChunk1,
+						barChunk2,
+					}); err != nil {
+						t.Fatal(err)
+					}
+
+					// Query with ordinary time-range
+					labelValues1, err := store.LabelValuesForMetricName(ctx, now.Add(-time.Hour), now, tc.metricName, tc.labelName)
+					require.NoError(t, err)
+
+					if !reflect.DeepEqual(tc.expect, labelValues1) {
+						t.Fatalf("%s/%s: wrong label values - %s", tc.metricName, tc.labelName, test.Diff(tc.expect, labelValues1))
+					}
+
+					// Pushing end of time-range into future should yield exact same resultset
+					labelValues2, err := store.LabelValuesForMetricName(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName, tc.labelName)
+					require.NoError(t, err)
+
+					if !reflect.DeepEqual(tc.expect, labelValues2) {
+						t.Fatalf("%s/%s: wrong label values - %s", tc.metricName, tc.labelName, test.Diff(tc.expect, labelValues2))
+					}
+
+					// Query with both begin & end of time-range in future should yield empty resultset
+					labelValues3, err := store.LabelValuesForMetricName(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName, tc.labelName)
+					require.NoError(t, err)
+					if len(labelValues3) != 0 {
+						t.Fatalf("%s/%s: future query should yield empty resultset ... actually got %v label values: %#v",
+							tc.metricName, tc.labelName, len(labelValues3), labelValues3)
+					}
+				})
+			}
+		}
+	}
+
+}
+
 // TestChunkStore_getMetricNameChunks tests if chunks are fetched correctly when we have the metric name
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
diff --git a/composite_store.go b/composite_store.go
index 4b4f1c944d6ff..63369640fe5d9 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -15,6 +15,7 @@ type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
 	PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error
 	Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error)
 	Stop()
 }
 
@@ -88,6 +89,20 @@ func (c compositeStore) Get(ctx context.Context, from, through model.Time, match
 	return results, err
 }
 
+// LabelValuesForMetricName retrieves all label values for a single label name and metric name.
+func (c compositeStore) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error) {
+	var result []string
+	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
+		labelValues, err := store.LabelValuesForMetricName(ctx, from, through, metricName, labelName)
+		if err != nil {
+			return err
+		}
+		result = append(result, labelValues...)
+		return nil
+	})
+	return result, err
+}
+
 func (c compositeStore) Stop() {
 	for _, store := range c.stores {
 		store.Stop()
diff --git a/composite_store_test.go b/composite_store_test.go
index 3bb1cb2cb657e..6f0f8d3779ef8 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -24,6 +24,9 @@ func (m mockStore) PutOne(ctx context.Context, from, through model.Time, chunk C
 func (m mockStore) Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
 	return nil, nil
 }
+func (m mockStore) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error) {
+	return nil, nil
+}
 
 func (m mockStore) Stop() {}
 

From 9ca10d44b758833d13d0790fb2716c598000b6d8 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 24 Apr 2019 14:57:20 +0000
Subject: [PATCH 310/660] Take UserID from chunk instead of context

This is shorter, more efficient, and less surprising.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store.go  | 20 +++++---------------
 series_store.go | 19 +++++++------------
 2 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index e6a24d0137b1a..2265cd4b81d39 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -120,21 +120,16 @@ func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 
 // PutOne implements ChunkStore
 func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return err
-	}
-
 	chunks := []Chunk{chunk}
 
-	err = c.storage.PutChunks(ctx, chunks)
+	err := c.storage.PutChunks(ctx, chunks)
 	if err != nil {
 		return err
 	}
 
 	c.writeBackCache(ctx, chunks)
 
-	writeReqs, err := c.calculateIndexEntries(userID, from, through, chunk)
+	writeReqs, err := c.calculateIndexEntries(chunk.UserID, from, through, chunk)
 	if err != nil {
 		return err
 	}
@@ -302,7 +297,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 		}
 		level.Debug(log).Log("chunkIDs", len(chunkIDs))
 
-		return c.convertChunkIDsToChunks(ctx, chunkIDs)
+		return c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
 	}
 
 	// Otherwise get chunks which include other matchers
@@ -364,7 +359,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 	level.Debug(log).Log("msg", "post intersection", "chunkIDs", len(chunkIDs))
 
 	// Convert IndexEntry's into chunks
-	return c.convertChunkIDsToChunks(ctx, chunkIDs)
+	return c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
@@ -411,12 +406,7 @@ func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
 	return result, nil
 }
 
-func (c *store) convertChunkIDsToChunks(ctx context.Context, chunkIDs []string) ([]Chunk, error) {
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
+func (c *store) convertChunkIDsToChunks(ctx context.Context, userID string, chunkIDs []string) ([]Chunk, error) {
 	chunkSet := make([]Chunk, 0, len(chunkIDs))
 	for _, chunkID := range chunkIDs {
 		chunk, err := ParseExternalKey(userID, chunkID)
diff --git a/series_store.go b/series_store.go
index 88c546751ea8a..18497a0eca4d3 100644
--- a/series_store.go
+++ b/series_store.go
@@ -131,7 +131,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 	level.Debug(log).Log("chunk-ids", len(chunkIDs))
 
-	chunks, err := c.convertChunkIDsToChunks(ctx, chunkIDs)
+	chunks, err := c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
 	if err != nil {
 		level.Error(log).Log("err", "convertChunkIDsToChunks", "err", err)
 		return nil, err
@@ -310,21 +310,16 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return err
-	}
-
 	chunks := []Chunk{chunk}
 
-	err = c.storage.PutChunks(ctx, chunks)
+	err := c.storage.PutChunks(ctx, chunks)
 	if err != nil {
 		return err
 	}
 
 	c.writeBackCache(ctx, chunks)
 
-	writeReqs, keysToCache, err := c.calculateIndexEntries(userID, from, through, chunk)
+	writeReqs, keysToCache, err := c.calculateIndexEntries(from, through, chunk)
 	if err != nil {
 		return err
 	}
@@ -339,7 +334,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 }
 
 // calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
-func (c *seriesStore) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
+func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
 	seenIndexEntries := map[string]struct{}{}
 	entries := []IndexEntry{}
 	keysToCache := []string{}
@@ -349,7 +344,7 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 		return nil, nil, err
 	}
 
-	keys := c.schema.GetLabelEntryCacheKeys(from, through, userID, chunk.Metric)
+	keys := c.schema.GetLabelEntryCacheKeys(from, through, chunk.UserID, chunk.Metric)
 
 	cacheKeys := make([]string, 0, len(keys)) // Keys which translate to the strings stored in the cache.
 	for _, key := range keys {
@@ -360,7 +355,7 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 
 	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), cacheKeys)
 	if len(missing) != 0 {
-		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
+		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
 		if err != nil {
 			return nil, nil, err
 		}
@@ -369,7 +364,7 @@ func (c *seriesStore) calculateIndexEntries(userID string, from, through model.T
 		keysToCache = missing
 	}
 
-	chunkEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
+	chunkEntries, err := c.schema.GetChunkWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
 	if err != nil {
 		return nil, nil, err
 	}

From 8c690d4b92b67163578cb3fc89b10e1d5eb201db Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 25 Apr 2019 14:05:25 +0000
Subject: [PATCH 311/660] Refactor: do table names in just one place

Combine `tableForBucket()` and `TableFor()` into one, and replace
another line that did the same thing.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema_config.go | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index ec01b8415d00e..aa522284852c0 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -195,14 +195,6 @@ func (cfg PeriodConfig) createSchema() Schema {
 	return s
 }
 
-func (cfg *PeriodConfig) tableForBucket(bucketStart int64) string {
-	if cfg.IndexTables.Period == 0 {
-		return cfg.IndexTables.Prefix
-	}
-	// TODO remove reference to time package here
-	return cfg.IndexTables.Prefix + strconv.Itoa(int(bucketStart/int64(cfg.IndexTables.Period/time.Second)))
-}
-
 // Load the yaml file, or build the config from legacy command-line flags
 func (cfg *SchemaConfig) Load() error {
 	if len(cfg.Configs) > 0 {
@@ -268,7 +260,7 @@ func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string)
 		result = append(result, Bucket{
 			from:      uint32(relativeFrom),
 			through:   uint32(relativeThrough),
-			tableName: cfg.tableForBucket(i * secondsInHour),
+			tableName: cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInHour)),
 			hashKey:   fmt.Sprintf("%s:%d", userID, i),
 		})
 	}
@@ -303,7 +295,7 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 		result = append(result, Bucket{
 			from:      uint32(relativeFrom),
 			through:   uint32(relativeThrough),
-			tableName: cfg.tableForBucket(i * secondsInDay),
+			tableName: cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInDay)),
 			hashKey:   fmt.Sprintf("%s:d%d", userID, i),
 		})
 	}
@@ -368,8 +360,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 	}
 	for i := firstTable; i <= lastTable; i++ {
 		table := TableDesc{
-			// Name construction needs to be consistent with chunk_store.bigBuckets
-			Name:              cfg.Prefix + strconv.Itoa(int(i)),
+			Name:              cfg.tableForPeriod(i),
 			ProvisionedRead:   pCfg.InactiveReadThroughput,
 			ProvisionedWrite:  pCfg.InactiveWriteThroughput,
 			UseOnDemandIOMode: pCfg.InactiveThroughputOnDemandMode,
@@ -443,9 +434,13 @@ func (cfg SchemaConfig) ChunkTableFor(t model.Time) (string, error) {
 
 // TableFor calculates the table shard for a given point in time.
 func (cfg *PeriodicTableConfig) TableFor(t model.Time) string {
-	var (
-		periodSecs = int64(cfg.Period / time.Second)
-		table      = t.Unix() / periodSecs
-	)
-	return cfg.Prefix + strconv.Itoa(int(table))
+	if cfg.Period == 0 { // non-periodic
+		return cfg.Prefix
+	}
+	periodSecs := int64(cfg.Period / time.Second)
+	return cfg.tableForPeriod(t.Unix() / periodSecs)
+}
+
+func (cfg *PeriodicTableConfig) tableForPeriod(i int64) string {
+	return cfg.Prefix + strconv.Itoa(int(i))
 }

From 9fb96774cc2bfd1e554aae29ee757c72a84b5bfe Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 30 Apr 2019 16:41:14 +0530
Subject: [PATCH 312/660] minor: Better buckets for GCS latency

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/instrumentation.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index a87cd10b3dffb..99b211cdf0aca 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -33,9 +33,9 @@ var (
 		Name:      "gcs_request_duration_seconds",
 		Help:      "Time spent doing GCS requests.",
 
-		// Bigtable latency seems to range from a few ms to a few hundred ms and is
-		// important.  So use 6 buckets from 1ms to 1s.
-		Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
+		// GCS latency seems to range from a few ms to a few secs and is
+		// important.  So use 6 buckets from 5ms to 5s.
+		Buckets: prometheus.ExponentialBuckets(0.005, 4, 6),
 	}, []string{"operation", "status_code"})
 )
 

From bc228c02a93d4ee4a76894dd8efefc19de11afaa Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 27 Apr 2019 16:28:46 +0000
Subject: [PATCH 313/660] Remove cortex_dynamo_query_retry_count histogram

Because the backoff is reset on any successful query, almost every
data point is 0 and the histogram is not useful.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index a1dff2849329f..3255935d43ab8 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -79,12 +79,6 @@ var (
 		// metric names.
 		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
 	})
-	dynamoQueryRetryCount = prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_query_retry_count",
-		Help:      "Number of retries per DynamoDB operation.",
-		Buckets:   prometheus.LinearBuckets(0, 1, 21),
-	}, []string{"operation"})
 )
 
 func init() {
@@ -92,7 +86,6 @@ func init() {
 	prometheus.MustRegister(dynamoConsumedCapacity)
 	prometheus.MustRegister(dynamoFailures)
 	prometheus.MustRegister(dynamoQueryPagesCount)
-	prometheus.MustRegister(dynamoQueryRetryCount)
 	prometheus.MustRegister(dynamoDroppedRequests)
 }
 
@@ -212,9 +205,6 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 	unprocessed := dynamoDBWriteBatch{}
 
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
-	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("BatchWrite").Observe(float64(backoff.NumRetries()))
-	}()
 
 	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBWriteBatch{}
@@ -354,9 +344,6 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 
 func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) {
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
-	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("queryPage").Observe(float64(backoff.NumRetries()))
-	}()
 
 	var err error
 	for backoff.Ongoing() {
@@ -533,9 +520,6 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 	result := []chunk.Chunk{}
 	unprocessed := dynamoDBReadRequest{}
 	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
-	defer func() {
-		dynamoQueryRetryCount.WithLabelValues("getDynamoDBChunks").Observe(float64(backoff.NumRetries()))
-	}()
 
 	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBReadRequest{}

From 734d530897193908617b42cc5981eee64a9ce546 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 1 May 2019 11:30:43 +0000
Subject: [PATCH 314/660] Wrap chunk errors to provide context when something
 breaks

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chunk.go b/chunk.go
index 1ce3b1b9ae3e0..009fd8eb3f9e2 100644
--- a/chunk.go
+++ b/chunk.go
@@ -269,7 +269,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 			return err
 		}
 		c.encoded = input
-		return c.Data.UnmarshalFromBuf(input)
+		return errors.Wrap(c.Data.UnmarshalFromBuf(input), "when unmarshalling legacy chunk")
 	}
 
 	// First, calculate the checksum of the chunk and confirm it matches
@@ -282,14 +282,14 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	r := bytes.NewReader(input)
 	var metadataLen uint32
 	if err := binary.Read(r, binary.BigEndian, &metadataLen); err != nil {
-		return err
+		return errors.Wrap(err, "when reading metadata length from chunk")
 	}
 	var tempMetadata Chunk
 	decodeContext.reader.Reset(r)
 	json := jsoniter.ConfigFastest
 	err := json.NewDecoder(decodeContext.reader).Decode(&tempMetadata)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "when decoding chunk metadata")
 	}
 	if len(input)-r.Len() != int(metadataLen) {
 		return ErrMetadataLength
@@ -315,12 +315,12 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	// Finally, unmarshal the actual chunk data.
 	c.Data, err = prom_chunk.NewForEncoding(c.Encoding)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "when creating new chunk")
 	}
 
 	var dataLen uint32
 	if err := binary.Read(r, binary.BigEndian, &dataLen); err != nil {
-		return err
+		return errors.Wrap(err, "when reading data length from chunk")
 	}
 
 	c.encoded = input

From bd885606fa7a498342b952c63cf4cf44bc570626 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 27 Mar 2019 14:43:45 +0000
Subject: [PATCH 315/660] Use spanlogger to trace more info in dynamodb client

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index a1dff2849329f..1f494138d8cbb 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -10,7 +10,6 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
@@ -25,6 +24,7 @@ import (
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/user"
@@ -464,9 +464,9 @@ type chunksPlusError struct {
 
 // GetChunks implements chunk.ObjectClient.
 func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetChunks.DynamoDB")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
+	log, ctx := spanlogger.New(ctx, "GetChunks.DynamoDB", ot.Tag{Key: "numChunks", Value: len(chunks)})
+	defer log.Span.Finish()
+	level.Debug(log).Log("chunks requested", len(chunks))
 
 	dynamoDBChunks := chunks
 	var err error
@@ -499,13 +499,10 @@ func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chu
 		}
 		finalChunks = append(finalChunks, in.chunks...)
 	}
-	sp.LogFields(otlog.Int("chunks fetched", len(finalChunks)))
-	if err != nil {
-		sp.LogFields(otlog.String("error", err.Error()))
-	}
+	level.Debug(log).Log("chunks fetched", len(finalChunks))
 
 	// Return any chunks we did receive: a partial result may be useful
-	return finalChunks, err
+	return finalChunks, log.Error(err)
 }
 
 // As we're re-using the DynamoDB schema from the index for the chunk tables,
@@ -516,8 +513,8 @@ var placeholder = []byte{'c'}
 // Structure is identical to BatchWrite(), but operating on different datatypes
 // so cannot share implementation.  If you fix a bug here fix it there too.
 func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
-	defer sp.Finish()
+	log, ctx := spanlogger.New(ctx, "getDynamoDBChunks", ot.Tag{Key: "numChunks", Value: len(chunks)})
+	defer log.Span.Finish()
 	outstanding := dynamoDBReadRequest{}
 	chunksByKey := map[string]chunk.Chunk{}
 	for _, chunk := range chunks {
@@ -525,7 +522,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 		chunksByKey[key] = chunk
 		tableName, err := a.schemaCfg.ChunkTableFor(chunk.From)
 		if err != nil {
-			return nil, err
+			return nil, log.Error(err)
 		}
 		outstanding.Add(tableName, key, placeholder)
 	}
@@ -570,9 +567,8 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this read will never work, so the only option is to drop the offending request and continue.
-				level.Warn(util.Logger).Log("msg", "Error while fetching data from Dynamo", "err", awsErr)
-				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
-				util.Event().Log("msg", "ValidationException", "requests", requests)
+				level.Warn(log).Log("msg", "Error while fetching data from Dynamo", "err", awsErr)
+				level.Debug(log).Log("msg", "Dropped request details", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
@@ -587,7 +583,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 
 		processedChunks, err := processChunkResponse(response, chunksByKey)
 		if err != nil {
-			return nil, err
+			return nil, log.Error(err)
 		}
 		result = append(result, processedChunks...)
 
@@ -601,7 +597,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
 		// Return the chunks we did fetch, because partial results may be useful
-		return result, fmt.Errorf("failed to query chunks, %d values remaining: %s", valuesLeft, backoff.Err())
+		return result, log.Error(fmt.Errorf("failed to query chunks, %d values remaining: %s", valuesLeft, backoff.Err()))
 	}
 	return result, nil
 }

From 35844f4064d35cf737bef9bf15c4486683a59337 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 1 May 2019 13:18:51 +0000
Subject: [PATCH 316/660] Extract Tags struct from table_manager.go

Seemed odd to have schema_config.go depend on table_manager.go, and it
works ok as a self-contained file.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager.go | 42 ------------------------------------------
 tags.go          | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 42 deletions(-)
 create mode 100644 tags.go

diff --git a/table_manager.go b/table_manager.go
index 507533bba2ef5..b32034001ffee 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -110,48 +110,6 @@ func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Int64Var(&cfg.InactiveReadScaleLastN, argPrefix+".inactive-read-throughput.scale-last-n", 4, "Number of last inactive tables to enable read autoscale.")
 }
 
-// Tags is a string-string map that implements flag.Value.
-type Tags map[string]string
-
-// String implements flag.Value
-func (ts Tags) String() string {
-	if ts == nil {
-		return ""
-	}
-
-	return fmt.Sprintf("%v", map[string]string(ts))
-}
-
-// Set implements flag.Value
-func (ts *Tags) Set(s string) error {
-	if *ts == nil {
-		*ts = map[string]string{}
-	}
-
-	parts := strings.SplitN(s, "=", 2)
-	if len(parts) != 2 {
-		return fmt.Errorf("tag must of the format key=value")
-	}
-	(*ts)[parts[0]] = parts[1]
-	return nil
-}
-
-// Equals returns true is other matches ts.
-func (ts Tags) Equals(other Tags) bool {
-	if len(ts) != len(other) {
-		return false
-	}
-
-	for k, v1 := range ts {
-		v2, ok := other[k]
-		if !ok || v1 != v2 {
-			return false
-		}
-	}
-
-	return true
-}
-
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	client      TableClient
diff --git a/tags.go b/tags.go
new file mode 100644
index 0000000000000..6dca867bbcb3e
--- /dev/null
+++ b/tags.go
@@ -0,0 +1,48 @@
+package chunk
+
+import (
+	"fmt"
+	"strings"
+)
+
+// Tags is a string-string map that implements flag.Value.
+type Tags map[string]string
+
+// String implements flag.Value
+func (ts Tags) String() string {
+	if ts == nil {
+		return ""
+	}
+
+	return fmt.Sprintf("%v", map[string]string(ts))
+}
+
+// Set implements flag.Value
+func (ts *Tags) Set(s string) error {
+	if *ts == nil {
+		*ts = map[string]string{}
+	}
+
+	parts := strings.SplitN(s, "=", 2)
+	if len(parts) != 2 {
+		return fmt.Errorf("tag must of the format key=value")
+	}
+	(*ts)[parts[0]] = parts[1]
+	return nil
+}
+
+// Equals returns true is other matches ts.
+func (ts Tags) Equals(other Tags) bool {
+	if len(ts) != len(other) {
+		return false
+	}
+
+	for k, v1 := range ts {
+		v2, ok := other[k]
+		if !ok || v1 != v2 {
+			return false
+		}
+	}
+
+	return true
+}

From 6d0b094f50f6d03f9487cf727f8fddb7a9f1027a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 1 May 2019 13:29:20 +0000
Subject: [PATCH 317/660] Support setting table tags in yaml

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 tags.go | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tags.go b/tags.go
index 6dca867bbcb3e..b51849ceb2782 100644
--- a/tags.go
+++ b/tags.go
@@ -31,6 +31,16 @@ func (ts *Tags) Set(s string) error {
 	return nil
 }
 
+// UnmarshalYAML implements yaml.Unmarshaler.
+func (ts *Tags) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	var m map[string]string
+	if err := unmarshal(&m); err != nil {
+		return err
+	}
+	*ts = Tags(m)
+	return nil
+}
+
 // Equals returns true is other matches ts.
 func (ts Tags) Equals(other Tags) bool {
 	if len(ts) != len(other) {

From c84b819a297b83583d6154595e6abc15e90bfcec Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 30 Apr 2019 12:33:34 +0530
Subject: [PATCH 318/660] Expose the chunk config as YAML

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/background.go       |  4 +--
 cache/cache.go            | 60 +++++++++++++++++++--------------------
 cache/diskcache.go        |  4 +--
 cache/fifo_cache.go       |  4 +--
 cache/memcached.go        |  6 ++--
 cache/memcached_client.go | 10 +++----
 chunk_store.go            |  8 +++---
 storage/factory.go        |  8 ++----
 8 files changed, 51 insertions(+), 53 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 6b552047c03aa..ea589630725a1 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -26,8 +26,8 @@ var (
 
 // BackgroundConfig is config for a Background Cache.
 type BackgroundConfig struct {
-	WriteBackGoroutines int
-	WriteBackBuffer     int
+	WriteBackGoroutines int `yaml:"writeback_goroutines,omitempty"`
+	WriteBackBuffer     int `yaml:"writeback_buffer,omitempty"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/cache.go b/cache/cache.go
index 62ef40958da93..8dbe001be03c9 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -15,19 +15,19 @@ type Cache interface {
 
 // Config for building Caches.
 type Config struct {
-	EnableDiskcache bool
-	EnableFifoCache bool
+	EnableDiskcache bool `yaml:"enable_diskcache,omitempty"`
+	EnableFifoCache bool `yaml:"enable_fifocache,omitempty"`
 
-	DefaultValidity time.Duration
+	DefaultValidity time.Duration `yaml:"defaul_validity,omitempty"`
 
-	background     BackgroundConfig
-	memcache       MemcachedConfig
-	memcacheClient MemcachedClientConfig
-	diskcache      DiskcacheConfig
-	fifocache      FifoCacheConfig
+	Background     BackgroundConfig      `yaml:"background,omitempty"`
+	Memcache       MemcachedConfig       `yaml:"memcached,omitempty"`
+	MemcacheClient MemcachedClientConfig `yaml:"memcached_client,omitempty"`
+	Diskcache      DiskcacheConfig       `yaml:"diskcache,omitempty"`
+	Fifocache      FifoCacheConfig       `yaml:"fifocache,omitempty"`
 
 	// This is to name the cache metrics properly.
-	prefix string
+	Prefix string `yaml:"prefix,omitempty"`
 
 	// For tests to inject specific implementations.
 	Cache Cache
@@ -35,17 +35,17 @@ type Config struct {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
-	cfg.background.RegisterFlagsWithPrefix(prefix, description, f)
-	cfg.memcache.RegisterFlagsWithPrefix(prefix, description, f)
-	cfg.memcacheClient.RegisterFlagsWithPrefix(prefix, description, f)
-	cfg.diskcache.RegisterFlagsWithPrefix(prefix, description, f)
-	cfg.fifocache.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.Background.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.Memcache.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.MemcacheClient.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.Diskcache.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.Fifocache.RegisterFlagsWithPrefix(prefix, description, f)
 
 	f.BoolVar(&cfg.EnableDiskcache, prefix+"cache.enable-diskcache", false, description+"Enable on-disk cache.")
 	f.BoolVar(&cfg.EnableFifoCache, prefix+"cache.enable-fifocache", false, description+"Enable in-memory cache.")
 	f.DurationVar(&cfg.DefaultValidity, prefix+"default-validity", 0, description+"The default validity of entries for caches unless overridden.")
 
-	cfg.prefix = prefix
+	cfg.Prefix = prefix
 }
 
 // New creates a new Cache using Config.
@@ -57,39 +57,39 @@ func New(cfg Config) (Cache, error) {
 	caches := []Cache{}
 
 	if cfg.EnableFifoCache {
-		if cfg.fifocache.Validity == 0 && cfg.DefaultValidity != 0 {
-			cfg.fifocache.Validity = cfg.DefaultValidity
+		if cfg.Fifocache.Validity == 0 && cfg.DefaultValidity != 0 {
+			cfg.Fifocache.Validity = cfg.DefaultValidity
 		}
 
-		cache := NewFifoCache(cfg.prefix+"fifocache", cfg.fifocache)
-		caches = append(caches, Instrument(cfg.prefix+"fifocache", cache))
+		cache := NewFifoCache(cfg.Prefix+"fifocache", cfg.Fifocache)
+		caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
 	}
 
 	if cfg.EnableDiskcache {
-		cache, err := NewDiskcache(cfg.diskcache)
+		cache, err := NewDiskcache(cfg.Diskcache)
 		if err != nil {
 			return nil, err
 		}
 
-		cacheName := cfg.prefix + "diskcache"
-		caches = append(caches, NewBackground(cacheName, cfg.background, Instrument(cacheName, cache)))
+		cacheName := cfg.Prefix + "diskcache"
+		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
 	}
 
-	if cfg.memcacheClient.Host != "" {
-		if cfg.memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
-			cfg.memcache.Expiration = cfg.DefaultValidity
+	if cfg.MemcacheClient.Host != "" {
+		if cfg.Memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
+			cfg.Memcache.Expiration = cfg.DefaultValidity
 		}
 
-		client := NewMemcachedClient(cfg.memcacheClient)
-		cache := NewMemcached(cfg.memcache, client, cfg.prefix)
+		client := NewMemcachedClient(cfg.MemcacheClient)
+		cache := NewMemcached(cfg.Memcache, client, cfg.Prefix)
 
-		cacheName := cfg.prefix + "memcache"
-		caches = append(caches, NewBackground(cacheName, cfg.background, Instrument(cacheName, cache)))
+		cacheName := cfg.Prefix + "memcache"
+		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
 	}
 
 	cache := NewTiered(caches)
 	if len(caches) > 1 {
-		cache = Instrument(cfg.prefix+"tiered", cache)
+		cache = Instrument(cfg.Prefix+"tiered", cache)
 	}
 	return cache, nil
 }
diff --git a/cache/diskcache.go b/cache/diskcache.go
index 6463827f73d6e..28e3ac4f72d20 100644
--- a/cache/diskcache.go
+++ b/cache/diskcache.go
@@ -54,8 +54,8 @@ const (
 
 // DiskcacheConfig for the Disk cache.
 type DiskcacheConfig struct {
-	Path string
-	Size int
+	Path string `yaml:"path,omitempty"`
+	Size int    `yaml:"size,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 59c7d35b71f10..bd7b2be44a545 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -56,8 +56,8 @@ var (
 
 // FifoCacheConfig holds config for the FifoCache.
 type FifoCacheConfig struct {
-	Size     int
-	Validity time.Duration
+	Size     int           `yaml:"size,omitempty"`
+	Validity time.Duration `yaml:"validity,omitempty"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/memcached.go b/cache/memcached.go
index 37836c77df018..dce4652018784 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -40,10 +40,10 @@ func (o observableVecCollector) After(method, statusCode string, start time.Time
 
 // MemcachedConfig is config to make a Memcached
 type MemcachedConfig struct {
-	Expiration time.Duration
+	Expiration time.Duration `yaml:"expiration,omitempty"`
 
-	BatchSize   int
-	Parallelism int
+	BatchSize   int `yaml:"batch_size,omitempty"`
+	Parallelism int `yaml:"parallelism,omitempty"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index d43fa93713f58..43e40cbbbaaa7 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -33,11 +33,11 @@ type memcachedClient struct {
 
 // MemcachedClientConfig defines how a MemcachedClient should be constructed.
 type MemcachedClientConfig struct {
-	Host           string
-	Service        string
-	Timeout        time.Duration
-	MaxIdleConns   int
-	UpdateInterval time.Duration
+	Host           string        `yaml:"host,omitempty"`
+	Service        string        `yaml:"service,omitempty"`
+	Timeout        time.Duration `yaml:"timeout,omitempty"`
+	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
+	UpdateInterval time.Duration `yaml:"update_interval,omitempty"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/chunk_store.go b/chunk_store.go
index 3ebf7e75a9f18..6564a8f2fd00d 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -55,11 +55,11 @@ func init() {
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	ChunkCacheConfig       cache.Config
-	WriteDedupeCacheConfig cache.Config
+	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config,omitempty"`
+	WriteDedupeCacheConfig cache.Config `yaml:"write_dedupe_cache_config,omitempty"`
 
-	MinChunkAge           time.Duration
-	CacheLookupsOlderThan time.Duration
+	MinChunkAge           time.Duration `yaml:"min_chunk_age,omitempty"`
+	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/storage/factory.go b/storage/factory.go
index 463eddc17aae3..30d3cd1764810 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -30,7 +30,7 @@ type Config struct {
 
 	IndexCacheValidity time.Duration
 
-	indexQueriesCacheConfig cache.Config
+	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config,omitempty"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -42,15 +42,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.BoltDBConfig.RegisterFlags(f)
 	cfg.FSConfig.RegisterFlags(f)
 
-	cfg.indexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
+	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }
 
 // NewStore makes the storage clients based on the configuration.
 func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits *validation.Overrides) (chunk.Store, error) {
-	var err error
-
-	tieredCache, err := cache.New(cfg.indexQueriesCacheConfig)
+	tieredCache, err := cache.New(cfg.IndexQueriesCacheConfig)
 	if err != nil {
 		return nil, err
 	}

From 1f94574615a2b46049be194c57af6354c8faef0d Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 25 Mar 2019 13:08:24 +0000
Subject: [PATCH 319/660] Remove the FromStr parsing hack in schema config, do
 it with a custom type.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/dynamodb_table_client_test.go |  2 +-
 fixtures.go                       |  2 +-
 local/fixtures.go                 |  2 +-
 schema_config.go                  | 61 ++++++++++++++++++-------------
 schema_config_test.go             | 21 ++++++-----
 storage/factory_test.go           |  4 +-
 table_manager.go                  |  8 ++--
 table_manager_test.go             | 18 ++++-----
 8 files changed, 64 insertions(+), 54 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index fe1c69b4baa4a..2274c198d7e67 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -154,7 +154,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			},
 			{
 				IndexType:   "aws-dynamo",
-				From:        model.TimeFromUnix(0),
+				From:        chunk.DayTime{model.TimeFromUnix(0)},
 				IndexTables: fixturePeriodicTableConfig(tablePrefix),
 				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			}},
diff --git a/fixtures.go b/fixtures.go
index 9c6898d23fae2..abc073279758b 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -33,7 +33,7 @@ func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
 		Configs: []PeriodConfig{{
 			IndexType: store,
 			Schema:    schema,
-			From:      from,
+			From:      DayTime{from},
 			ChunkTables: PeriodicTableConfig{
 				Prefix: "cortex",
 				Period: 7 * 24 * time.Hour,
diff --git a/local/fixtures.go b/local/fixtures.go
index baf338d70c4eb..406fec4b166ec 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -51,7 +51,7 @@ func (f *fixture) Clients() (
 	schemaConfig = chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{{
 			IndexType: "boltdb",
-			From:      model.Now(),
+			From:      chunk.DayTime{model.Now()},
 			ChunkTables: chunk.PeriodicTableConfig{
 				Prefix: "chunks",
 				Period: 10 * time.Minute,
diff --git a/schema_config.go b/schema_config.go
index aa522284852c0..40b499a9fa42d 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -25,16 +25,40 @@ const (
 
 // PeriodConfig defines the schema and tables to use for a period of time
 type PeriodConfig struct {
-	From        model.Time          `yaml:"-"`              // used when working with config
-	FromStr     string              `yaml:"from,omitempty"` // used when loading from yaml
-	IndexType   string              `yaml:"store"`          // type of index client to use.
-	ObjectType  string              `yaml:"object_store"`   // type of object client to use; if omitted, defaults to store.
+	From        DayTime             `yaml:"from"`         // used when working with config
+	IndexType   string              `yaml:"store"`        // type of index client to use.
+	ObjectType  string              `yaml:"object_store"` // type of object client to use; if omitted, defaults to store.
 	Schema      string              `yaml:"schema"`
 	IndexTables PeriodicTableConfig `yaml:"index"`
 	ChunkTables PeriodicTableConfig `yaml:"chunks,omitempty"`
 	RowShards   uint32              `yaml:"row_shards"`
 }
 
+// DayTime is a model.Time what holds day-aligned values, and marshals to/from
+// YAML in YYYY-MM-DD format.
+type DayTime struct {
+	model.Time
+}
+
+// MarshalYAML implements yaml.Marshaller.
+func (d DayTime) MarshalYAML() (interface{}, error) {
+	return d.Time.Time().Format("2006-01-02"), nil
+}
+
+// UnmarshalYAML implements yaml.Unmarshaller.
+func (d *DayTime) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	var from string
+	if err := unmarshal(&from); err != nil {
+		return err
+	}
+	t, err := time.Parse("2006-01-02", from)
+	if err != nil {
+		return err
+	}
+	d.Time = model.TimeFromUnix(t.Unix())
+	return nil
+}
+
 // SchemaConfig contains the config for our chunk index schemas
 type SchemaConfig struct {
 	Configs []PeriodConfig `yaml:"configs"`
@@ -98,8 +122,7 @@ func (cfg *SchemaConfig) translate() error {
 
 	add := func(t string, f model.Time) {
 		cfg.Configs = append(cfg.Configs, PeriodConfig{
-			From:      f,
-			FromStr:   f.Time().Format("2006-01-02"),
+			From:      DayTime{f},
 			Schema:    t,
 			IndexType: cfg.legacy.StorageClient,
 			IndexTables: PeriodicTableConfig{
@@ -153,13 +176,13 @@ func (cfg *SchemaConfig) translate() error {
 // entries if necessary so there is an entry starting at t
 func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
 	for i := 0; i < len(cfg.Configs); i++ {
-		if t > cfg.Configs[i].From &&
-			(i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
+		if t > cfg.Configs[i].From.Time &&
+			(i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From.Time) {
 			// Split the i'th entry by duplicating then overwriting the From time
 			cfg.Configs = append(cfg.Configs[:i+1], cfg.Configs[i:]...)
-			cfg.Configs[i+1].From = t
+			cfg.Configs[i+1].From = DayTime{t}
 		}
-		if cfg.Configs[i].From >= t {
+		if cfg.Configs[i].From.Time >= t {
 			f(&cfg.Configs[i])
 		}
 	}
@@ -211,25 +234,11 @@ func (cfg *SchemaConfig) Load() error {
 
 	decoder := yaml.NewDecoder(f)
 	decoder.SetStrict(true)
-	if err := decoder.Decode(&cfg); err != nil {
-		return err
-	}
-	for i := range cfg.Configs {
-		t, err := time.Parse("2006-01-02", cfg.Configs[i].FromStr)
-		if err != nil {
-			return err
-		}
-		cfg.Configs[i].From = model.TimeFromUnix(t.Unix())
-	}
-
-	return nil
+	return decoder.Decode(&cfg)
 }
 
 // PrintYaml dumps the yaml to stdout, to aid in migration
 func (cfg SchemaConfig) PrintYaml() {
-	for i := range cfg.Configs {
-		cfg.Configs[i].FromStr = cfg.Configs[i].From.Time().Format("2006-01-02")
-	}
 	encoder := yaml.NewEncoder(os.Stdout)
 	encoder.Encode(cfg)
 }
@@ -425,7 +434,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 // ChunkTableFor calculates the chunk table shard for a given point in time.
 func (cfg SchemaConfig) ChunkTableFor(t model.Time) (string, error) {
 	for i := range cfg.Configs {
-		if t >= cfg.Configs[i].From && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From) {
+		if t >= cfg.Configs[i].From.Time && (i+1 == len(cfg.Configs) || t < cfg.Configs[i+1].From.Time) {
 			return cfg.Configs[i].ChunkTables.TableFor(t), nil
 		}
 	}
diff --git a/schema_config_test.go b/schema_config_test.go
index 00f310136081d..6e401ec3bd110 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -187,7 +187,7 @@ func TestChunkTableFor(t *testing.T) {
 
 	periodConfigs := []PeriodConfig{
 		{
-			FromStr: "1970-01-01",
+			From: MustParseDayTime("1970-01-01"),
 			IndexTables: PeriodicTableConfig{
 				Prefix: "index_1_",
 				Period: tablePeriod,
@@ -198,7 +198,7 @@ func TestChunkTableFor(t *testing.T) {
 			},
 		},
 		{
-			FromStr: "2019-01-02",
+			From: MustParseDayTime("2019-01-02"),
 			IndexTables: PeriodicTableConfig{
 				Prefix: "index_2_",
 				Period: tablePeriod,
@@ -209,7 +209,7 @@ func TestChunkTableFor(t *testing.T) {
 			},
 		},
 		{
-			FromStr: "2019-03-06",
+			From: MustParseDayTime("2019-03-06"),
 			IndexTables: PeriodicTableConfig{
 				Prefix: "index_3_",
 				Period: tablePeriod,
@@ -221,13 +221,6 @@ func TestChunkTableFor(t *testing.T) {
 		},
 	}
 
-	for i, cfg := range periodConfigs {
-		ts, err := time.Parse("2006-01-02", cfg.FromStr)
-		require.NoError(t, err)
-
-		periodConfigs[i].From = model.TimeFromUnix(ts.Unix())
-	}
-
 	schemaCfg := SchemaConfig{
 		Configs: periodConfigs,
 	}
@@ -276,3 +269,11 @@ func TestChunkTableFor(t *testing.T) {
 		require.Equal(t, tc.chunkTable, table)
 	}
 }
+
+func MustParseDayTime(s string) DayTime {
+	t, err := time.Parse("2006-01-02", s)
+	if err != nil {
+		panic(err)
+	}
+	return DayTime{model.TimeFromUnix(t.Unix())}
+}
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 2ea0f4ad5f491..9473717ae32ea 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -21,11 +21,11 @@ func TestFactoryStop(t *testing.T) {
 	flagext.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
 	schemaConfig.Configs = []chunk.PeriodConfig{
 		{
-			From:      model.Time(0),
+			From:      chunk.DayTime{model.Time(0)},
 			IndexType: "inmemory",
 		},
 		{
-			From:      model.Time(1),
+			From:      chunk.DayTime{model.Time(1)},
 			IndexType: "inmemory",
 		},
 	}
diff --git a/table_manager.go b/table_manager.go
index b32034001ffee..9bbe325da24d7 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -195,7 +195,7 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 	result := []TableDesc{}
 
 	for i, config := range m.schemaCfg.Configs {
-		if config.From.Time().After(mtime.Now()) {
+		if config.From.Time.Time().After(mtime.Now()) {
 			continue
 		}
 		if config.IndexTables.Period == 0 { // non-periodic table
@@ -240,18 +240,18 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 		} else {
 			endTime := mtime.Now().Add(m.cfg.CreationGracePeriod)
 			if i+1 < len(m.schemaCfg.Configs) {
-				nextFrom := m.schemaCfg.Configs[i+1].From.Time()
+				nextFrom := m.schemaCfg.Configs[i+1].From.Time.Time()
 				if endTime.After(nextFrom) {
 					endTime = nextFrom
 				}
 			}
 			endModelTime := model.TimeFromUnix(endTime.Unix())
 			result = append(result, config.IndexTables.periodicTables(
-				config.From, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
+				config.From.Time, endModelTime, m.cfg.IndexTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
 			)...)
 			if config.ChunkTables.Prefix != "" {
 				result = append(result, config.ChunkTables.periodicTables(
-					config.From, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
+					config.From.Time, endModelTime, m.cfg.ChunkTables, m.cfg.CreationGracePeriod, m.maxChunkAge, m.cfg.RetentionPeriod,
 				)...)
 			}
 		}
diff --git a/table_manager_test.go b/table_manager_test.go
index 6cbcc4166421c..4bd82d24808ac 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -125,13 +125,13 @@ func TestTableManager(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
-				From: model.TimeFromUnix(baseTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(baseTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: baseTableName,
 				},
 			},
 			{
-				From: model.TimeFromUnix(weeklyTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(weeklyTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -143,7 +143,7 @@ func TestTableManager(t *testing.T) {
 				},
 			},
 			{
-				From: model.TimeFromUnix(weeklyTable2Start.Unix()),
+				From: DayTime{model.TimeFromUnix(weeklyTable2Start.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: table2Prefix,
 					Period: tablePeriod,
@@ -308,13 +308,13 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
-				From: model.TimeFromUnix(baseTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(baseTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: baseTableName,
 				},
 			},
 			{
-				From: model.TimeFromUnix(weeklyTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(weeklyTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -394,13 +394,13 @@ func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
-				From: model.TimeFromUnix(baseTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(baseTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: baseTableName,
 				},
 			},
 			{
-				From: model.TimeFromUnix(weeklyTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(weeklyTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -561,7 +561,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 	cfg := SchemaConfig{
 		Configs: []PeriodConfig{
 			{
-				From: model.TimeFromUnix(baseTableStart.Unix()),
+				From: DayTime{model.TimeFromUnix(baseTableStart.Unix())},
 				IndexTables: PeriodicTableConfig{
 					Prefix: tablePrefix,
 					Period: tablePeriod,
@@ -673,7 +673,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 
 	// Verify that with a retention period of zero no tables outside the configs 'From' range are removed
 	tableManager.cfg.RetentionPeriod = 0
-	tableManager.schemaCfg.Configs[0].From = model.TimeFromUnix(baseTableStart.Add(tablePeriod).Unix())
+	tableManager.schemaCfg.Configs[0].From = DayTime{model.TimeFromUnix(baseTableStart.Add(tablePeriod).Unix())}
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
 	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
 	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})

From 6b4fa658ff2039087082acbd9b22f8a8fa114a61 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 7 Apr 2019 10:11:28 +0100
Subject: [PATCH 320/660] Review feedback & minor fixups.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/dynamodb_table_client_test.go | 2 +-
 local/fixtures.go                 | 2 +-
 storage/factory_test.go           | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 2274c198d7e67..68f5f8d407d32 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -154,7 +154,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 			},
 			{
 				IndexType:   "aws-dynamo",
-				From:        chunk.DayTime{model.TimeFromUnix(0)},
+				From:        chunk.DayTime{Time: model.TimeFromUnix(0)},
 				IndexTables: fixturePeriodicTableConfig(tablePrefix),
 				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
 			}},
diff --git a/local/fixtures.go b/local/fixtures.go
index 406fec4b166ec..323a51ae8c130 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -51,7 +51,7 @@ func (f *fixture) Clients() (
 	schemaConfig = chunk.SchemaConfig{
 		Configs: []chunk.PeriodConfig{{
 			IndexType: "boltdb",
-			From:      chunk.DayTime{model.Now()},
+			From:      chunk.DayTime{Time: model.Now()},
 			ChunkTables: chunk.PeriodicTableConfig{
 				Prefix: "chunks",
 				Period: 10 * time.Minute,
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 9473717ae32ea..0b6737d94d0e3 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -21,11 +21,11 @@ func TestFactoryStop(t *testing.T) {
 	flagext.DefaultValues(&cfg, &storeConfig, &schemaConfig, &defaults)
 	schemaConfig.Configs = []chunk.PeriodConfig{
 		{
-			From:      chunk.DayTime{model.Time(0)},
+			From:      chunk.DayTime{Time: model.Time(0)},
 			IndexType: "inmemory",
 		},
 		{
-			From:      chunk.DayTime{model.Time(1)},
+			From:      chunk.DayTime{Time: model.Time(1)},
 			IndexType: "inmemory",
 		},
 	}

From 9b5d8d67243c92954947963342bd51d2299f558c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 14 May 2019 15:12:51 +0000
Subject: [PATCH 321/660] Write chunks to DynamoDB in the same request as index
 entries

A call to `PutChunks()` is always followed by a call to
`BatchWrite()`; we can save CPU time and network roundtrips by putting
both into the same batch sent to DynamoDB.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 25 ++++++++++++++++++++++---
 series_store.go                | 22 +++++++++++++---------
 storage_client.go              |  5 +++++
 3 files changed, 40 insertions(+), 12 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 3356eaa9ccbcf..7a4f9984c8e33 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -616,8 +616,27 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 	return result, nil
 }
 
+// PutChunkAndIndex implements chunk.ObjectAndIndexClient
+// Combine both sets of writes before sending to DynamoDB, for performance
+func (a dynamoDBStorageClient) PutChunkAndIndex(ctx context.Context, c chunk.Chunk, index chunk.WriteBatch) error {
+	dynamoDBWrites, err := a.writesForChunks([]chunk.Chunk{c})
+	if err != nil {
+		return err
+	}
+	dynamoDBWrites.TakeReqs(index.(dynamoDBWriteBatch), 0)
+	return a.BatchWrite(ctx, dynamoDBWrites)
+}
+
 // PutChunks implements chunk.ObjectClient.
 func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	dynamoDBWrites, err := a.writesForChunks(chunks)
+	if err != nil {
+		return err
+	}
+	return a.BatchWrite(ctx, dynamoDBWrites)
+}
+
+func (a dynamoDBStorageClient) writesForChunks(chunks []chunk.Chunk) (dynamoDBWriteBatch, error) {
 	var (
 		dynamoDBWrites = dynamoDBWriteBatch{}
 	)
@@ -625,19 +644,19 @@ func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
 		if err != nil {
-			return err
+			return nil, err
 		}
 		key := chunks[i].ExternalKey()
 
 		table, err := a.schemaCfg.ChunkTableFor(chunks[i].From)
 		if err != nil {
-			return err
+			return nil, err
 		}
 
 		dynamoDBWrites.Add(table, key, placeholder, buf)
 	}
 
-	return a.BatchWrite(ctx, dynamoDBWrites)
+	return dynamoDBWrites, nil
 }
 
 // Slice of values returned; map key is attribute name
diff --git a/series_store.go b/series_store.go
index 695e2d24b580f..872d803e4f5a4 100644
--- a/series_store.go
+++ b/series_store.go
@@ -328,21 +328,25 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	chunks := []Chunk{chunk}
 
-	err := c.storage.PutChunks(ctx, chunks)
-	if err != nil {
-		return err
-	}
-
-	c.writeBackCache(ctx, chunks)
-
 	writeReqs, keysToCache, err := c.calculateIndexEntries(from, through, chunk)
 	if err != nil {
 		return err
 	}
 
-	if err := c.index.BatchWrite(ctx, writeReqs); err != nil {
-		return err
+	if oic, ok := c.storage.(ObjectAndIndexClient); ok {
+		if err = oic.PutChunkAndIndex(ctx, chunk, writeReqs); err != nil {
+			return err
+		}
+	} else {
+		err := c.storage.PutChunks(ctx, chunks)
+		if err != nil {
+			return err
+		}
+		if err := c.index.BatchWrite(ctx, writeReqs); err != nil {
+			return err
+		}
 	}
+	c.writeBackCache(ctx, chunks)
 
 	bufs := make([][]byte, len(keysToCache))
 	c.writeDedupeCache.Store(ctx, keysToCache, bufs)
diff --git a/storage_client.go b/storage_client.go
index 0448239db3c2e..802173f88a751 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -22,6 +22,11 @@ type ObjectClient interface {
 	GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error)
 }
 
+// ObjectAndIndexClient allows optimisations where the same client handles both
+type ObjectAndIndexClient interface {
+	PutChunkAndIndex(ctx context.Context, c Chunk, index WriteBatch) error
+}
+
 // WriteBatch represents a batch of writes.
 type WriteBatch interface {
 	Add(tableName, hashValue string, rangeValue []byte, value []byte)

From bdc8e414a7c84eb776689b55b686d4896db230da Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 8 May 2019 11:27:33 +0000
Subject: [PATCH 322/660] Change Chunk.Metric from a map to a slice, for
 performance

This reduces the work done in sorting labels

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 cache/cache_test.go    |   7 +-
 chunk.go               |  11 ++-
 chunk_store.go         |  14 +--
 chunk_store_test.go    | 115 +++++++++++-----------
 chunk_store_utils.go   |   2 +-
 chunk_test.go          |  38 ++++----
 fixtures.go            |  39 ++++----
 schema.go              | 217 +++++++++++++++++++++--------------------
 schema_caching.go      |   6 +-
 schema_caching_test.go |   2 +-
 schema_test.go         |  33 ++++---
 schema_util.go         |   2 +-
 series_store.go        |  14 ++-
 testutils/testutils.go |  20 ++--
 14 files changed, 264 insertions(+), 256 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index f020af1239245..8274408c0f532 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -14,6 +14,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 )
 
@@ -35,9 +36,9 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 		c := chunk.NewChunk(
 			userID,
 			model.Fingerprint(1),
-			model.Metric{
-				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+			labels.Labels{
+				{Name: model.MetricNameLabel, Value: "foo"},
+				{Name: "bar", Value: "baz"},
 			},
 			promChunk[0],
 			ts,
diff --git a/chunk.go b/chunk.go
index 009fd8eb3f9e2..7e46a889afdcb 100644
--- a/chunk.go
+++ b/chunk.go
@@ -18,6 +18,7 @@ import (
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/util"
 	errs "github.com/weaveworks/common/errors"
@@ -45,9 +46,9 @@ type Chunk struct {
 	UserID      string            `json:"userID"`
 
 	// These fields will be in all chunks, including old ones.
-	From    model.Time   `json:"from"`
-	Through model.Time   `json:"through"`
-	Metric  model.Metric `json:"metric"`
+	From    model.Time    `json:"from"`
+	Through model.Time    `json:"through"`
+	Metric  labels.Labels `json:"metric"`
 
 	// The hash is not written to the external storage either.  We use
 	// crc32, Castagnoli table.  See http://www.evanjones.ca/crc32c.html.
@@ -69,7 +70,7 @@ type Chunk struct {
 }
 
 // NewChunk creates a new chunk
-func NewChunk(userID string, fp model.Fingerprint, metric model.Metric, c prom_chunk.Chunk, from, through model.Time) Chunk {
+func NewChunk(userID string, fp model.Fingerprint, metric labels.Labels, c prom_chunk.Chunk, from, through model.Time) Chunk {
 	return Chunk{
 		Fingerprint: fp,
 		UserID:      userID,
@@ -352,7 +353,7 @@ func ChunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Tim
 			return nil, err
 		}
 
-		metrics[c.Fingerprint] = c.Metric
+		metrics[c.Fingerprint] = util.LabelsToMetric(c.Metric)
 		samplesBySeries[c.Fingerprint] = append(samplesBySeries[c.Fingerprint], ss)
 	}
 	sp.LogFields(otlog.Int("series", len(samplesBySeries)))
diff --git a/chunk_store.go b/chunk_store.go
index a93befb10901e..cc59d53cd0899 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -141,9 +141,9 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 func (c *store) calculateIndexEntries(userID string, from, through model.Time, chunk Chunk) (WriteBatch, error) {
 	seenIndexEntries := map[string]struct{}{}
 
-	metricName, err := extract.MetricNameFromMetric(chunk.Metric)
-	if err != nil {
-		return nil, err
+	metricName := chunk.Metric.Get(labels.MetricName)
+	if metricName == "" {
+		return nil, fmt.Errorf("no MetricNameLabel for chunk")
 	}
 
 	entries, err := c.schema.GetWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
@@ -201,7 +201,7 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, from, through mode
 		return nil, nil
 	}
 
-	queries, err := c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(labelName))
+	queries, err := c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
 	if err != nil {
 		return nil, err
 	}
@@ -336,7 +336,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 
 	// Just get chunks for metric if there are no matchers
 	if len(matchers) == 0 {
-		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, model.LabelValue(metricName))
+		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
 		if err != nil {
 			return nil, err
 		}
@@ -366,9 +366,9 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, from, through mode
 			var queries []IndexQuery
 			var err error
 			if matcher.Type != labels.MatchEqual {
-				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name))
+				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
 			} else {
-				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name), model.LabelValue(matcher.Value))
+				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
 			}
 			if err != nil {
 				incomingErrors <- err
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 65678d3a83dc5..da04691facf63 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -17,6 +17,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
@@ -103,7 +104,7 @@ func createSampleStreamFrom(chunk Chunk) (*model.SampleStream, error) {
 		return nil, err
 	}
 	return &model.SampleStream{
-		Metric: chunk.Metric,
+		Metric: util.LabelsToMetric(chunk.Metric),
 		Values: samples,
 	}, nil
 }
@@ -126,27 +127,27 @@ func TestChunkStore_Get(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 
-	fooMetric1 := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+	fooMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "flip", Value: "flop"},
+		{Name: "toms", Value: "code"},
 	}
-	fooMetric2 := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+	fooMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "beep"},
+		{Name: "toms", Value: "code"},
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
-	barMetric1 := model.Metric{
-		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
+	barMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
 	}
-	barMetric2 := model.Metric{
-		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
-		"toms":                "code",
+	barMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -307,32 +308,32 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
 
-	fooMetric1 := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+	fooMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "flip", Value: "flop"},
+		{Name: "toms", Value: "code"},
 	}
-	fooMetric2 := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+	fooMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "beep"},
+		{Name: "toms", Value: "code"},
 	}
-	fooMetric3 := model.Metric{
-		model.MetricNameLabel: "foo",
-		"flip":                "flap",
-		"bar":                 "bop",
+	fooMetric3 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "bop"},
+		{Name: "flip", Value: "flap"},
 	}
 
 	// barMetric1 is a subset of barMetric2 to test over-matching bug.
-	barMetric1 := model.Metric{
-		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
+	barMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
 	}
-	barMetric2 := model.Metric{
-		model.MetricNameLabel: "bar",
-		"bar":                 "baz",
-		"toms":                "code",
+	barMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	}
 
 	fooChunk1 := dummyChunkFor(now, fooMetric1)
@@ -419,16 +420,16 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	now := model.Now()
-	chunk1 := dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
-		"flip":                "flop",
+	chunk1 := dummyChunkFor(now, labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "flip", Value: "flop"},
+		{Name: "toms", Value: "code"},
 	})
-	chunk2 := dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "beep",
-		"toms":                "code",
+	chunk2 := dummyChunkFor(now, labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "beep"},
+		{Name: "toms", Value: "code"},
 	})
 
 	testCases := []struct {
@@ -529,9 +530,9 @@ func TestChunkStoreRandom(t *testing.T) {
 				chunk := NewChunk(
 					userID,
 					model.Fingerprint(1),
-					model.Metric{
-						model.MetricNameLabel: "foo",
-						"bar":                 "baz",
+					labels.Labels{
+						{Name: labels.MetricName, Value: "foo"},
+						{Name: "bar", Value: "baz"},
 					},
 					chunks[0],
 					ts,
@@ -553,7 +554,7 @@ func TestChunkStoreRandom(t *testing.T) {
 				endTime := model.TimeFromUnix(end)
 
 				matchers := []*labels.Matcher{
-					mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
+					mustNewLabelMatcher(labels.MatchEqual, labels.MetricName, "foo"),
 					mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
 				}
 				chunks, err := store.Get(ctx, startTime, endTime, matchers...)
@@ -594,9 +595,9 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		chunk := NewChunk(
 			userID,
 			model.Fingerprint(1),
-			model.Metric{
-				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
+			labels.Labels{
+				{Name: labels.MetricName, Value: "foo"},
+				{Name: "bar", Value: "baz"},
 			},
 			chunks[0],
 			ts,
@@ -618,7 +619,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 		startTime := model.TimeFromUnix(start)
 		endTime := model.TimeFromUnix(end)
 		matchers := []*labels.Matcher{
-			mustNewLabelMatcher(labels.MatchEqual, model.MetricNameLabel, "foo"),
+			mustNewLabelMatcher(labels.MatchEqual, labels.MetricName, "foo"),
 			mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
 		}
 
@@ -642,9 +643,9 @@ func TestChunkStoreLeastRead(t *testing.T) {
 
 func TestIndexCachingWorks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
-	metric := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
+	metric := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
 	}
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index b61a2eaa60545..d61037866ead4 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -34,7 +34,7 @@ func filterChunksByMatchers(chunks []Chunk, filters []*labels.Matcher) []Chunk {
 outer:
 	for _, chunk := range chunks {
 		for _, filter := range filters {
-			if !filter.Matches(string(chunk.Metric[model.LabelName(filter.Name)])) {
+			if !filter.Matches(chunk.Metric.Get(filter.Name)) {
 				continue outer
 			}
 		}
diff --git a/chunk_test.go b/chunk_test.go
index dcf4bc413c9aa..ecb5ce75cecff 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -8,9 +8,11 @@ import (
 	"time"
 
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 )
 
@@ -21,14 +23,14 @@ func init() {
 }
 
 func dummyChunk(now model.Time) Chunk {
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+	return dummyChunkFor(now, labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	})
 }
 
-func dummyChunkForEncoding(now model.Time, metric model.Metric, enc encoding.Encoding, samples int) Chunk {
+func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.Encoding, samples int) Chunk {
 	c, _ := encoding.NewForEncoding(enc)
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
@@ -40,7 +42,7 @@ func dummyChunkForEncoding(now model.Time, metric model.Metric, enc encoding.Enc
 	}
 	chunk := NewChunk(
 		userID,
-		metric.Fingerprint(),
+		client.Fingerprint(metric),
 		metric,
 		c,
 		now.Add(-time.Hour),
@@ -54,7 +56,7 @@ func dummyChunkForEncoding(now model.Time, metric model.Metric, enc encoding.Enc
 	return chunk
 }
 
-func dummyChunkFor(now model.Time, metric model.Metric) Chunk {
+func dummyChunkFor(now model.Time, metric labels.Labels) Chunk {
 	return dummyChunkForEncoding(now, metric, encoding.Varbit, 1)
 }
 
@@ -154,10 +156,10 @@ func TestParseExternalKey(t *testing.T) {
 
 func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
-	metric := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+	metric := labels.Labels{
+		{Name: model.MetricNameLabel, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	}
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, metric)
@@ -168,22 +170,22 @@ func TestChunksToMatrix(t *testing.T) {
 	require.NoError(t, err)
 
 	ss1 := &model.SampleStream{
-		Metric: chunk1.Metric,
+		Metric: util.LabelsToMetric(chunk1.Metric),
 		Values: util.MergeSampleSets(chunk1Samples, chunk2Samples),
 	}
 
 	// Create another chunk with a different metric
-	otherMetric := model.Metric{
-		model.MetricNameLabel: "foo2",
-		"bar":                 "baz",
-		"toms":                "code",
+	otherMetric := labels.Labels{
+		{Name: model.MetricNameLabel, Value: "foo2"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	}
 	chunk3 := dummyChunkFor(now, otherMetric)
 	chunk3Samples, err := chunk3.Samples(chunk3.From, chunk3.Through)
 	require.NoError(t, err)
 
 	ss2 := &model.SampleStream{
-		Metric: chunk3.Metric,
+		Metric: util.LabelsToMetric(chunk3.Metric),
 		Values: chunk3Samples,
 	}
 
@@ -216,7 +218,7 @@ func TestChunksToMatrix(t *testing.T) {
 }
 
 func benchmarkChunk(now model.Time) Chunk {
-	return dummyChunkFor(now, BenchmarkMetric)
+	return dummyChunkFor(now, BenchmarkLabels)
 }
 
 func BenchmarkEncode(b *testing.B) {
diff --git a/fixtures.go b/fixtures.go
index 9c6898d23fae2..f4e2d8f1b0171 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -4,27 +4,28 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 )
 
-// BenchmarkMetric is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
-var BenchmarkMetric = model.Metric{
-	model.MetricNameLabel:                      "container_cpu_usage_seconds_total",
-	"beta_kubernetes_io_arch":                  "amd64",
-	"beta_kubernetes_io_instance_type":         "c3.somesize",
-	"beta_kubernetes_io_os":                    "linux",
-	"container_name":                           "some-name",
-	"cpu":                                      "cpu01",
-	"failure_domain_beta_kubernetes_io_region": "somewhere-1",
-	"failure_domain_beta_kubernetes_io_zone":   "somewhere-1b",
-	"id":                                       "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28",
-	"image":                                    "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506",
-	"instance":                                 "ip-111-11-1-11.ec2.internal",
-	"job":                                      "kubernetes-cadvisor",
-	"kubernetes_io_hostname":                   "ip-111-11-1-11",
-	"monitor":                                  "prod",
-	"name":                                     "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0",
-	"namespace":                                "kube-system",
-	"pod_name":                                 "some-other-name-5j8s8",
+// BenchmarkLabels is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
+var BenchmarkLabels = labels.Labels{
+	{Name: model.MetricNameLabel, Value: "container_cpu_usage_seconds_total"},
+	{Name: "beta_kubernetes_io_arch", Value: "amd64"},
+	{Name: "beta_kubernetes_io_instance_type", Value: "c3.somesize"},
+	{Name: "beta_kubernetes_io_os", Value: "linux"},
+	{Name: "container_name", Value: "some-name"},
+	{Name: "cpu", Value: "cpu01"},
+	{Name: "failure_domain_beta_kubernetes_io_region", Value: "somewhere-1"},
+	{Name: "failure_domain_beta_kubernetes_io_zone", Value: "somewhere-1b"},
+	{Name: "id", Value: "/kubepods/burstable/pod6e91c467-e4c5-11e7-ace3-0a97ed59c75e/a3c8498918bd6866349fed5a6f8c643b77c91836427fb6327913276ebc6bde28"},
+	{Name: "image", Value: "registry/organisation/name@sha256:dca3d877a80008b45d71d7edc4fd2e44c0c8c8e7102ba5cbabec63a374d1d506"},
+	{Name: "instance", Value: "ip-111-11-1-11.ec2.internal"},
+	{Name: "job", Value: "kubernetes-cadvisor"},
+	{Name: "kubernetes_io_hostname", Value: "ip-111-11-1-11"},
+	{Name: "monitor", Value: "prod"},
+	{Name: "name", Value: "k8s_some-name_some-other-name-5j8s8_kube-system_6e91c467-e4c5-11e7-ace3-0a97ed59c75e_0"},
+	{Name: "namespace", Value: "kube-system"},
+	{Name: "pod_name", Value: "some-other-name-5j8s8"},
 }
 
 // DefaultSchemaConfig creates a simple schema config for testing
diff --git a/schema.go b/schema.go
index 307f4c1d60303..0e489b8b41a03 100644
--- a/schema.go
+++ b/schema.go
@@ -7,6 +7,7 @@ import (
 	"strings"
 
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 )
 
 var (
@@ -29,17 +30,17 @@ var (
 // to write or read chunks from the external index.
 type Schema interface {
 	// When doing a write, use this method to return the list of entries you should write to.
-	GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
 	// Should only be used with the seriesStore. TODO: Make seriesStore implement a different interface altogether.
-	GetLabelWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetLabelEntryCacheKeys(from, through model.Time, userID string, labels model.Metric) []string
+	GetLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+	GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+	GetLabelEntryCacheKeys(from, through model.Time, userID string, labels labels.Labels) []string
 
 	// When doing a read, use these methods to return the list of entries you should query
-	GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error)
-	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
-	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+	GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error)
+	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error)
+	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 
 	// If the query resulted in series IDs, use this method to find chunks.
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
@@ -82,7 +83,7 @@ type schema struct {
 	entries entries
 }
 
-func (s schema) GetWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (s schema) GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
@@ -95,7 +96,7 @@ func (s schema) GetWriteEntries(from, through model.Time, userID string, metricN
 	return result, nil
 }
 
-func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
@@ -108,7 +109,7 @@ func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, me
 	return result, nil
 }
 
-func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
@@ -123,7 +124,7 @@ func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, me
 }
 
 // Should only used for v9Schema
-func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string, labels model.Metric) []string {
+func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string, labels labels.Labels) []string {
 	var result []string
 	for _, bucket := range s.buckets(from, through, userID) {
 		key := strings.Join([]string{
@@ -140,7 +141,7 @@ func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string,
 	return result
 }
 
-func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -154,7 +155,7 @@ func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string,
 	return result, nil
 }
 
-func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -168,7 +169,7 @@ func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID st
 	return result, nil
 }
 
-func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -197,13 +198,13 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 }
 
 type entries interface {
-	GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
-	GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error)
+	GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+	GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+	GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
-	GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error)
-	GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error)
-	GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error)
+	GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error)
+	GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error)
+	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 }
 
@@ -213,60 +214,60 @@ type entries interface {
 
 type originalEntries struct{}
 
-func (originalEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (originalEntries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
 	result := []IndexEntry{}
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		if strings.ContainsRune(string(value), '\x00') {
+		if strings.ContainsRune(string(v.Value), '\x00') {
 			return nil, fmt.Errorf("label values cannot contain null byte")
 		}
 		result = append(result, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
-			RangeValue: encodeRangeKey([]byte(key), []byte(value), chunkIDBytes),
+			HashValue:  bucket.hashKey + ":" + metricName,
+			RangeValue: encodeRangeKey([]byte(v.Name), []byte(v.Value), chunkIDBytes),
 		})
 	}
 	return result, nil
 }
 
-func (originalEntries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (originalEntries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
-func (originalEntries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (originalEntries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName:        bucket.tableName,
-			HashValue:        bucket.hashKey + ":" + string(metricName),
+			HashValue:        bucket.hashKey + ":" + metricName,
 			RangeValuePrefix: nil,
 		},
 	}, nil
 }
 
-func (originalEntries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (originalEntries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName:        bucket.tableName,
-			HashValue:        bucket.hashKey + ":" + string(metricName),
+			HashValue:        bucket.hashKey + ":" + metricName,
 			RangeValuePrefix: encodeRangeKey([]byte(labelName)),
 		},
 	}, nil
 }
 
-func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	if strings.ContainsRune(string(labelValue), '\x00') {
 		return nil, fmt.Errorf("label values cannot contain null byte")
 	}
 	return []IndexQuery{
 		{
 			TableName:        bucket.tableName,
-			HashValue:        bucket.hashKey + ":" + string(metricName),
+			HashValue:        bucket.hashKey + ":" + metricName,
 			RangeValuePrefix: encodeRangeKey([]byte(labelName), []byte(labelValue)),
 		},
 	}, nil
@@ -283,37 +284,37 @@ type base64Entries struct {
 	originalEntries
 }
 
-func (base64Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (base64Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
 	result := []IndexEntry{}
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
 
-		encodedBytes := encodeBase64Value(value)
+		encodedBytes := encodeBase64Value(v.Value)
 		result = append(result, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
-			RangeValue: encodeRangeKey([]byte(key), encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
+			HashValue:  bucket.hashKey + ":" + metricName,
+			RangeValue: encodeRangeKey([]byte(v.Name), encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
 		})
 	}
 	return result, nil
 }
 
-func (base64Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (base64Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
-func (base64Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (base64Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	encodedBytes := encodeBase64Value(labelValue)
 	return []IndexQuery{
 		{
 			TableName:        bucket.tableName,
-			HashValue:        bucket.hashKey + ":" + string(metricName),
+			HashValue:        bucket.hashKey + ":" + metricName,
 			RangeValuePrefix: encodeRangeKey([]byte(labelName), encodedBytes),
 		},
 	}, nil
@@ -326,24 +327,24 @@ func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName mo
 //    - range key: \0\0<chunk name>\0<version 3>
 type labelNameInHashKeyEntries struct{}
 
-func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
 	entries := []IndexEntry{
 		{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
+			HashValue:  bucket.hashKey + ":" + metricName,
 			RangeValue: encodeRangeKey(nil, nil, chunkIDBytes, chunkTimeRangeKeyV2),
 		},
 	}
 
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		encodedBytes := encodeBase64Value(value)
+		encodedBytes := encodeBase64Value(v.Value)
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
 			RangeValue: encodeRangeKey(nil, encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
 		})
 	}
@@ -351,23 +352,23 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName model
 	return entries, nil
 }
 
-func (labelNameInHashKeyEntries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (labelNameInHashKeyEntries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
-func (labelNameInHashKeyEntries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (labelNameInHashKeyEntries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
-			HashValue: bucket.hashKey + ":" + string(metricName),
+			HashValue: bucket.hashKey + ":" + metricName,
 		},
 	}, nil
 }
 
-func (labelNameInHashKeyEntries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (labelNameInHashKeyEntries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
@@ -376,7 +377,7 @@ func (labelNameInHashKeyEntries) GetReadMetricLabelQueries(bucket Bucket, metric
 	}, nil
 }
 
-func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	encodedBytes := encodeBase64Value(labelValue)
 	return []IndexQuery{
 		{
@@ -396,26 +397,26 @@ func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]Index
 // so the chunk end times are ignored.
 type v5Entries struct{}
 
-func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v5Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
 		{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
+			HashValue:  bucket.hashKey + ":" + metricName,
 			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
 		},
 	}
 
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		encodedValueBytes := encodeBase64Value(value)
+		encodedValueBytes := encodeBase64Value(v.Value)
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
 			RangeValue: encodeRangeKey(encodedThroughBytes, encodedValueBytes, chunkIDBytes, chunkTimeRangeKeyV4),
 		})
 	}
@@ -423,23 +424,23 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, lab
 	return entries, nil
 }
 
-func (v5Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v5Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
-func (v5Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v5Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
-			HashValue: bucket.hashKey + ":" + string(metricName),
+			HashValue: bucket.hashKey + ":" + metricName,
 		},
 	}, nil
 }
 
-func (v5Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (v5Entries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
@@ -448,7 +449,7 @@ func (v5Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.Label
 	}, nil
 }
 
-func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, _ model.LabelValue) ([]IndexQuery, error) {
+func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, _ string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
@@ -465,52 +466,52 @@ func (v5Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 // moves label value out of range key (see #199).
 type v6Entries struct{}
 
-func (v6Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v6Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
 		{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
+			HashValue:  bucket.hashKey + ":" + metricName,
 			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
 		},
 	}
 
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
 			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV5),
-			Value:      []byte(value),
+			Value:      []byte(v.Value),
 		})
 	}
 
 	return entries, nil
 }
 
-func (v6Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v6Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
-func (v6Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v6Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
 		{
 			TableName:       bucket.tableName,
-			HashValue:       bucket.hashKey + ":" + string(metricName),
+			HashValue:       bucket.hashKey + ":" + metricName,
 			RangeValueStart: encodeRangeKey(encodedFromBytes),
 		},
 	}, nil
 }
 
-func (v6Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (v6Entries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
 		{
@@ -521,7 +522,7 @@ func (v6Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.Label
 	}, nil
 }
 
-func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
 		{
@@ -541,41 +542,41 @@ func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 type v9Entries struct {
 }
 
-func (v9Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
 
 	entries := []IndexEntry{
 		// Entry for metricName -> seriesID
 		{
 			TableName:  bucket.tableName,
-			HashValue:  bucket.hashKey + ":" + string(metricName),
+			HashValue:  bucket.hashKey + ":" + metricName,
 			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
 		},
 	}
 
 	// Entries for metricName:labelName -> hash(value):seriesID
 	// We use a hash of the value to limit its length.
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		valueHash := sha256bytes(string(value))
+		valueHash := sha256bytes(v.Value)
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, key),
+			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
 			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
-			Value:      []byte(value),
+			Value:      []byte(v.Value),
 		})
 	}
 
 	return entries, nil
 }
 
-func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
 	encodedThroughBytes := encodeTime(bucket.through)
 
@@ -591,16 +592,16 @@ func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue
 	return entries, nil
 }
 
-func (v9Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (v9Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
-			HashValue: bucket.hashKey + ":" + string(metricName),
+			HashValue: bucket.hashKey + ":" + metricName,
 		},
 	}, nil
 }
 
-func (v9Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (v9Entries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
 			TableName: bucket.tableName,
@@ -609,8 +610,8 @@ func (v9Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.Label
 	}, nil
 }
 
-func (v9Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
-	valueHash := sha256bytes(string(labelValue))
+func (v9Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
+	valueHash := sha256bytes(labelValue)
 	return []IndexQuery{
 		{
 			TableName:       bucket.tableName,
@@ -637,11 +638,11 @@ type v10Entries struct {
 	rowShards uint32
 }
 
-func (v10Entries) GetWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v10Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
 
-func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
 
 	// read first 32 bits of the hash and use this to calculate the shard
@@ -651,30 +652,30 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName model.LabelVa
 		// Entry for metricName -> seriesID
 		{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, string(metricName)),
+			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
 			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
 		},
 	}
 
 	// Entries for metricName:labelName -> hash(value):seriesID
 	// We use a hash of the value to limit its length.
-	for key, value := range labels {
-		if key == model.MetricNameLabel {
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		valueHash := sha256bytes(string(value))
+		valueHash := sha256bytes(v.Value)
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
-			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, key),
+			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, v.Name),
 			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
-			Value:      []byte(value),
+			Value:      []byte(v.Value),
 		})
 	}
 
 	return entries, nil
 }
 
-func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValue, labels model.Metric, chunkID string) ([]IndexEntry, error) {
+func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := sha256bytes(labels.String())
 	encodedThroughBytes := encodeTime(bucket.through)
 
@@ -690,18 +691,18 @@ func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName model.LabelValu
 	return entries, nil
 }
 
-func (s v10Entries) GetReadMetricQueries(bucket Bucket, metricName model.LabelValue) ([]IndexQuery, error) {
+func (s v10Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	result := make([]IndexQuery, 0, s.rowShards)
 	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
 			TableName: bucket.tableName,
-			HashValue: fmt.Sprintf("%02d:%s:%s", i, bucket.hashKey, string(metricName)),
+			HashValue: fmt.Sprintf("%02d:%s:%s", i, bucket.hashKey, metricName),
 		})
 	}
 	return result, nil
 }
 
-func (s v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (s v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) {
 	result := make([]IndexQuery, 0, s.rowShards)
 	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
@@ -712,8 +713,8 @@ func (s v10Entries) GetReadMetricLabelQueries(bucket Bucket, metricName model.La
 	return result, nil
 }
 
-func (s v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
-	valueHash := sha256bytes(string(labelValue))
+func (s v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
+	valueHash := sha256bytes(labelValue)
 	result := make([]IndexQuery, 0, s.rowShards)
 	for i := uint32(0); i < s.rowShards; i++ {
 		result = append(result, IndexQuery{
diff --git a/schema_caching.go b/schema_caching.go
index 2444b0c458df5..c20e74968efb6 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -13,7 +13,7 @@ type schemaCaching struct {
 	cacheOlderThan time.Duration
 }
 
-func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName model.LabelValue) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName)
@@ -29,7 +29,7 @@ func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID
 	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
 }
 
-func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName)
@@ -45,7 +45,7 @@ func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, u
 	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
 }
 
-func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName model.LabelValue, labelName model.LabelName, labelValue model.LabelValue) ([]IndexQuery, error) {
+func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
 
 	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue)
diff --git a/schema_caching_test.go b/schema_caching_test.go
index f197ba420898f..e02993d0a8f33 100644
--- a/schema_caching_test.go
+++ b/schema_caching_test.go
@@ -58,7 +58,7 @@ func TestCachingSchema(t *testing.T) {
 	} {
 		have, err := schema.GetReadQueriesForMetric(
 			model.TimeFromUnix(tc.from.Unix()), model.TimeFromUnix(tc.through.Unix()),
-			userID, model.LabelValue("foo"),
+			userID, "foo",
 		)
 		if err != nil {
 			t.Fatal(err)
diff --git a/schema_test.go b/schema_test.go
index 9326d0ceb4f28..a85061001c8de 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -9,6 +9,7 @@ import (
 	"testing"
 
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 )
@@ -61,9 +62,9 @@ func TestSchemaHashKeys(t *testing.T) {
 	hourlyBuckets := makeSchema("v1")
 	dailyBuckets := makeSchema("v3")
 	labelBuckets := makeSchema("v4")
-	metric := model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
+	metric := labels.Labels{
+		{Name: model.MetricNameLabel, Value: "foo"},
+		{Name: "bar", Value: "baz"},
 	}
 	chunkID := "chunkID"
 
@@ -106,7 +107,7 @@ func TestSchemaHashKeys(t *testing.T) {
 		t.Run(fmt.Sprintf("TestSchemaHashKeys[%d]", i), func(t *testing.T) {
 			have, err := tc.Schema.GetWriteEntries(
 				model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through),
-				userID, model.LabelValue(tc.metricName),
+				userID, tc.metricName,
 				metric, chunkID,
 			)
 			if err != nil {
@@ -186,20 +187,20 @@ func TestSchemaRangeKey(t *testing.T) {
 		labelBuckets  = makeSchema("v4")
 		tsRangeKeys   = makeSchema("v5")
 		v6RangeKeys   = makeSchema("v6")
-		metric        = model.Metric{
-			model.MetricNameLabel: metricName,
-			"bar":                 "bary",
-			"baz":                 "bazy",
+		metric        = labels.Labels{
+			{Name: model.MetricNameLabel, Value: metricName},
+			{Name: "bar", Value: "bary"},
+			{Name: "baz", Value: "bazy"},
 		}
 	)
 
-	mkEntries := func(hashKey string, callback func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte)) []IndexEntry {
+	mkEntries := func(hashKey string, callback func(labelName, labelValue string) ([]byte, []byte)) []IndexEntry {
 		result := []IndexEntry{}
-		for labelName, labelValue := range metric {
-			if labelName == model.MetricNameLabel {
+		for _, label := range metric {
+			if label.Name == model.MetricNameLabel {
 				continue
 			}
-			rangeValue, value := callback(labelName, labelValue)
+			rangeValue, value := callback(label.Name, label.Value)
 			result = append(result, IndexEntry{
 				TableName:  table,
 				HashValue:  hashKey,
@@ -217,19 +218,19 @@ func TestSchemaRangeKey(t *testing.T) {
 		// Basic test case for the various bucketing schemes
 		{
 			hourlyBuckets,
-			mkEntries("userid:0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+			mkEntries("userid:0:foo", func(labelName, labelValue string) ([]byte, []byte) {
 				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
 			}),
 		},
 		{
 			dailyBuckets,
-			mkEntries("userid:d0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+			mkEntries("userid:d0:foo", func(labelName, labelValue string) ([]byte, []byte) {
 				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x00", labelName, labelValue, chunkID)), nil
 			}),
 		},
 		{
 			base64Keys,
-			mkEntries("userid:d0:foo", func(labelName model.LabelName, labelValue model.LabelValue) ([]byte, []byte) {
+			mkEntries("userid:d0:foo", func(labelName, labelValue string) ([]byte, []byte) {
 				encodedValue := base64.RawStdEncoding.EncodeToString([]byte(labelValue))
 				return []byte(fmt.Sprintf("%s\x00%s\x00%s\x001\x00", labelName, encodedValue, chunkID)), nil
 			}),
@@ -300,7 +301,7 @@ func TestSchemaRangeKey(t *testing.T) {
 		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
 			have, err := tc.Schema.GetWriteEntries(
 				model.TimeFromUnix(0), model.TimeFromUnix(60*60)-1,
-				userID, model.LabelValue(metricName),
+				userID, metricName,
 				metric, chunkID,
 			)
 			if err != nil {
diff --git a/schema_util.go b/schema_util.go
index bbf5d5074dc11..9d9235c39d697 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -59,7 +59,7 @@ func encodeBase64Bytes(bytes []byte) []byte {
 	return encoded
 }
 
-func encodeBase64Value(value model.LabelValue) []byte {
+func encodeBase64Value(value string) []byte {
 	encodedLen := base64.RawStdEncoding.EncodedLen(len(value))
 	encoded := make([]byte, encodedLen, encodedLen)
 	base64.RawStdEncoding.Encode(encoded, []byte(value))
diff --git a/series_store.go b/series_store.go
index 695e2d24b580f..5471d21057f1b 100644
--- a/series_store.go
+++ b/series_store.go
@@ -16,7 +16,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -252,13 +251,13 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	var queries []IndexQuery
 	var labelName string
 	if matcher == nil {
-		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, model.LabelValue(metricName))
+		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
 	} else if matcher.Type != labels.MatchEqual {
 		labelName = matcher.Name
-		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name))
+		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
 	} else {
 		labelName = matcher.Name
-		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, model.LabelValue(metricName), model.LabelName(matcher.Name), model.LabelValue(matcher.Value))
+		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
 	}
 	if err != nil {
 		return nil, err
@@ -355,11 +354,10 @@ func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chun
 	entries := []IndexEntry{}
 	keysToCache := []string{}
 
-	metricName, err := extract.MetricNameFromMetric(chunk.Metric)
-	if err != nil {
-		return nil, nil, err
+	metricName := chunk.Metric.Get(labels.MetricName)
+	if metricName == "" {
+		return nil, nil, fmt.Errorf("no MetricNameLabel for chunk")
 	}
-
 	keys := c.schema.GetLabelEntryCacheKeys(from, through, chunk.UserID, chunk.Metric)
 
 	cacheKeys := make([]string, 0, len(keys)) // Keys which translate to the strings stored in the cache.
diff --git a/testutils/testutils.go b/testutils/testutils.go
index d6cfb69f0e228..daf0eb1a91c93 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -8,8 +8,10 @@ import (
 	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/ingester/client"
 )
 
 const (
@@ -59,9 +61,9 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 	keys := []string{}
 	chunks := []chunk.Chunk{}
 	for j := 0; j < batchSize; j++ {
-		chunk := dummyChunkFor(start, model.Metric{
-			model.MetricNameLabel: "foo",
-			"index":               model.LabelValue(strconv.Itoa(startIndex*batchSize + j)),
+		chunk := dummyChunkFor(start, labels.Labels{
+			{Name: model.MetricNameLabel, Value: "foo"},
+			{Name: "index", Value: strconv.Itoa(startIndex*batchSize + j)},
 		})
 		chunks = append(chunks, chunk)
 		keys = append(keys, chunk.ExternalKey())
@@ -70,18 +72,18 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 }
 
 func dummyChunk(now model.Time) chunk.Chunk {
-	return dummyChunkFor(now, model.Metric{
-		model.MetricNameLabel: "foo",
-		"bar":                 "baz",
-		"toms":                "code",
+	return dummyChunkFor(now, labels.Labels{
+		{Name: model.MetricNameLabel, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
 	})
 }
 
-func dummyChunkFor(now model.Time, metric model.Metric) chunk.Chunk {
+func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
 	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
 	chunk := chunk.NewChunk(
 		userID,
-		metric.Fingerprint(),
+		client.Fingerprint(metric),
 		metric,
 		cs[0],
 		now.Add(-time.Hour),

From 5e7c2e2060331f7aae633eacc6c11a6ea8b4736a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 8 May 2019 11:57:13 +0000
Subject: [PATCH 323/660] Refactor tests to avoid going via Matrix

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 chunk_store_test.go | 96 ++++++++++++---------------------------------
 1 file changed, 25 insertions(+), 71 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index da04691facf63..f3d53399fa8f2 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -4,7 +4,6 @@ import (
 	"fmt"
 	"math/rand"
 	"reflect"
-	"sort"
 	"testing"
 	"time"
 
@@ -17,7 +16,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
@@ -98,30 +96,6 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 	return store
 }
 
-func createSampleStreamFrom(chunk Chunk) (*model.SampleStream, error) {
-	samples, err := chunk.Samples(chunk.From, chunk.Through)
-	if err != nil {
-		return nil, err
-	}
-	return &model.SampleStream{
-		Metric: util.LabelsToMetric(chunk.Metric),
-		Values: samples,
-	}, nil
-}
-
-// Allow sorting of local.SeriesIterator by fingerprint (for comparisation tests)
-type ByFingerprint model.Matrix
-
-func (bfp ByFingerprint) Len() int {
-	return len(bfp)
-}
-func (bfp ByFingerprint) Swap(i, j int) {
-	bfp[i], bfp[j] = bfp[j], bfp[i]
-}
-func (bfp ByFingerprint) Less(i, j int) bool {
-	return bfp[i].Metric.Fingerprint() < bfp[j].Metric.Fingerprint()
-}
-
 // TestChunkStore_Get tests results are returned correctly depending on the type of query
 func TestChunkStore_Get(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
@@ -156,87 +130,77 @@ func TestChunkStore_Get(t *testing.T) {
 	barChunk1 := dummyChunkFor(now, barMetric1)
 	barChunk2 := dummyChunkFor(now, barMetric2)
 
-	fooSampleStream1, err := createSampleStreamFrom(fooChunk1)
-	require.NoError(t, err)
-	fooSampleStream2, err := createSampleStreamFrom(fooChunk2)
-	require.NoError(t, err)
-
-	barSampleStream1, err := createSampleStreamFrom(barChunk1)
-	require.NoError(t, err)
-	barSampleStream2, err := createSampleStreamFrom(barChunk2)
-	require.NoError(t, err)
-
 	testCases := []struct {
 		query  string
-		expect model.Matrix
+		expect []Chunk
 	}{
 		{
 			`foo`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`foo{flip=""}`,
-			model.Matrix{fooSampleStream2},
+			[]Chunk{fooChunk2},
 		},
 		{
 			`foo{bar="baz"}`,
-			model.Matrix{fooSampleStream1},
+			[]Chunk{fooChunk1},
 		},
 		{
 			`foo{bar="beep"}`,
-			model.Matrix{fooSampleStream2},
+			[]Chunk{fooChunk2},
 		},
 		{
 			`foo{toms="code"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`foo{bar!="baz"}`,
-			model.Matrix{fooSampleStream2},
+			[]Chunk{fooChunk2},
 		},
 		{
 			`foo{bar=~"beep|baz"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`foo{toms="code", bar=~"beep|baz"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`foo{toms="code", bar="baz"}`,
-			model.Matrix{fooSampleStream1},
+			[]Chunk{fooChunk1},
 		},
 		{
 			`{__name__=~"foo"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`{__name__=~"foobar"}`,
-			model.Matrix{},
+			[]Chunk{},
 		},
 		{
 			`{__name__=~"fo.*"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`{__name__=~"foo", toms="code"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`{__name__!="foo", toms="code"}`,
-			model.Matrix{barSampleStream2},
+			[]Chunk{barChunk2},
 		},
 		{
 			`{__name__!="bar", toms="code"}`,
-			model.Matrix{fooSampleStream1, fooSampleStream2},
+			[]Chunk{fooChunk1, fooChunk2},
 		},
 		{
 			`{__name__=~"bar", bar="baz"}`,
-			model.Matrix{barSampleStream1, barSampleStream2},
+			[]Chunk{barChunk1, barChunk2},
 		},
 		{
 			`{__name__=~"bar", bar="baz",toms!="code"}`,
-			model.Matrix{barSampleStream1},
+			[]Chunk{barChunk1},
 		},
 	}
 	for _, schema := range schemas {
@@ -270,33 +234,23 @@ func TestChunkStore_Get(t *testing.T) {
 					// Query with ordinary time-range
 					chunks1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
 					require.NoError(t, err)
-
-					matrix1, err := ChunksToMatrix(ctx, chunks1, now.Add(-time.Hour), now)
-					require.NoError(t, err)
-
-					sort.Sort(ByFingerprint(matrix1))
-					if !reflect.DeepEqual(tc.expect, matrix1) {
-						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix1))
+					if !reflect.DeepEqual(tc.expect, chunks1) {
+						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks1))
 					}
 
 					// Pushing end of time-range into future should yield exact same resultset
 					chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), matchers...)
 					require.NoError(t, err)
-
-					matrix2, err := ChunksToMatrix(ctx, chunks2, now.Add(-time.Hour), now)
-					require.NoError(t, err)
-
-					sort.Sort(ByFingerprint(matrix2))
-					if !reflect.DeepEqual(tc.expect, matrix2) {
-						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, matrix2))
+					if !reflect.DeepEqual(tc.expect, chunks2) {
+						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks2))
 					}
 
 					// Query with both begin & end of time-range in future should yield empty resultset
-					matrix3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
+					chunks3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
 					require.NoError(t, err)
-					if len(matrix3) != 0 {
+					if len(chunks3) != 0 {
 						t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
-							tc.query, len(matrix3), matrix3)
+							tc.query, len(chunks3), chunks3)
 					}
 				})
 			}

From a12162e4d1ecbc3dfc4cc73ba86ffacb0d79a54b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bjboreham@gmail.com>
Date: Wed, 8 May 2019 11:57:46 +0000
Subject: [PATCH 324/660] Move ChunksToMatrix() into test-only file

and remove Spans which are not needed in testing.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 chunk.go    | 35 -----------------------------------
 fixtures.go | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/chunk.go b/chunk.go
index 7e46a889afdcb..c69e1be2a18db 100644
--- a/chunk.go
+++ b/chunk.go
@@ -2,7 +2,6 @@ package chunk
 
 import (
 	"bytes"
-	"context"
 	"encoding/binary"
 	"fmt"
 	"hash/crc32"
@@ -14,13 +13,10 @@ import (
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 	"github.com/golang/snappy"
 	jsoniter "github.com/json-iterator/go"
-	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	errs "github.com/weaveworks/common/errors"
 )
 
@@ -338,37 +334,6 @@ func equalByKey(a, b Chunk) bool {
 		a.From == b.From && a.Through == b.Through && a.Checksum == b.Checksum
 }
 
-// ChunksToMatrix converts a set of chunks to a model.Matrix.
-func ChunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "chunksToMatrix")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks", len(chunks)))
-
-	// Group chunks by series, sort and dedupe samples.
-	metrics := map[model.Fingerprint]model.Metric{}
-	samplesBySeries := map[model.Fingerprint][][]model.SamplePair{}
-	for _, c := range chunks {
-		ss, err := c.Samples(from, through)
-		if err != nil {
-			return nil, err
-		}
-
-		metrics[c.Fingerprint] = util.LabelsToMetric(c.Metric)
-		samplesBySeries[c.Fingerprint] = append(samplesBySeries[c.Fingerprint], ss)
-	}
-	sp.LogFields(otlog.Int("series", len(samplesBySeries)))
-
-	matrix := make(model.Matrix, 0, len(samplesBySeries))
-	for fp, ss := range samplesBySeries {
-		matrix = append(matrix, &model.SampleStream{
-			Metric: metrics[fp],
-			Values: util.MergeNSampleSets(ss...),
-		})
-	}
-
-	return matrix, nil
-}
-
 // Samples returns all SamplePairs for the chunk.
 func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, error) {
 	it := c.Data.NewIterator()
diff --git a/fixtures.go b/fixtures.go
index f4e2d8f1b0171..bf59b04781ea0 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -1,8 +1,12 @@
 package chunk
 
+// Chunk functions used only in tests
+
 import (
+	"context"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 )
@@ -46,3 +50,29 @@ func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
 		}},
 	}
 }
+
+// ChunksToMatrix converts a set of chunks to a model.Matrix.
+func ChunksToMatrix(ctx context.Context, chunks []Chunk, from, through model.Time) (model.Matrix, error) {
+	// Group chunks by series, sort and dedupe samples.
+	metrics := map[model.Fingerprint]model.Metric{}
+	samplesBySeries := map[model.Fingerprint][][]model.SamplePair{}
+	for _, c := range chunks {
+		ss, err := c.Samples(from, through)
+		if err != nil {
+			return nil, err
+		}
+
+		metrics[c.Fingerprint] = util.LabelsToMetric(c.Metric)
+		samplesBySeries[c.Fingerprint] = append(samplesBySeries[c.Fingerprint], ss)
+	}
+
+	matrix := make(model.Matrix, 0, len(samplesBySeries))
+	for fp, ss := range samplesBySeries {
+		matrix = append(matrix, &model.SampleStream{
+			Metric: metrics[fp],
+			Values: util.MergeNSampleSets(ss...),
+		})
+	}
+
+	return matrix, nil
+}

From af57a3aaa0e9c2f2535db6c098f39eeaade47b0c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 8 May 2019 13:40:53 +0000
Subject: [PATCH 325/660] Add jsoniter helpers for labels.Labels

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 json_helpers.go | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/json_helpers.go b/json_helpers.go
index fb0c576e17534..b52f338e4cb71 100644
--- a/json_helpers.go
+++ b/json_helpers.go
@@ -5,26 +5,47 @@ import (
 
 	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 )
 
 func init() {
-	jsoniter.RegisterTypeDecoderFunc("model.Metric", decodeMetric)
+	jsoniter.RegisterTypeDecoderFunc("labels.Labels", decodeLabels)
+	jsoniter.RegisterTypeEncoderFunc("labels.Labels", encodeLabels, labelsIsEmpty)
 	jsoniter.RegisterTypeDecoderFunc("model.Time", decodeModelTime)
 	jsoniter.RegisterTypeEncoderFunc("model.Time", encodeModelTime, modelTimeIsEmpty)
 }
 
-// decoding model.Metric via ReadMapCB is faster than the generic jsoniter
-// decoder because the latter allocates memory for each string via reflect.
-func decodeMetric(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
-	mapPtr := (*model.Metric)(ptr)
-	*mapPtr = make(model.Metric, 10)
+// Override Prometheus' labels.Labels decoder which goes via a map
+func decodeLabels(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
+	labelsPtr := (*labels.Labels)(ptr)
+	*labelsPtr = make(labels.Labels, 0, 10)
 	iter.ReadMapCB(func(iter *jsoniter.Iterator, key string) bool {
 		value := iter.ReadString()
-		(*mapPtr)[model.LabelName(key)] = model.LabelValue(value)
+		*labelsPtr = append(*labelsPtr, labels.Label{Name: key, Value: value})
 		return true
 	})
 }
 
+// Override Prometheus' labels.Labels encoder which goes via a map
+func encodeLabels(ptr unsafe.Pointer, stream *jsoniter.Stream) {
+	labelsPtr := (*labels.Labels)(ptr)
+	stream.WriteObjectStart()
+	for i, v := range *labelsPtr {
+		if i != 0 {
+			stream.WriteMore()
+		}
+		stream.WriteString(v.Name)
+		stream.WriteRaw(`:`)
+		stream.WriteString(v.Value)
+	}
+	stream.WriteObjectEnd()
+}
+
+func labelsIsEmpty(ptr unsafe.Pointer) bool {
+	labelsPtr := (*labels.Labels)(ptr)
+	return len(*labelsPtr) == 0
+}
+
 // Decode via jsoniter's float64 routine is faster than getting the string data and decoding as two integers
 func decodeModelTime(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
 	pt := (*model.Time)(ptr)

From 2e8f562847cfd1d8b6ca37c9db98f25d928a55cc Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 8 May 2019 14:10:26 +0000
Subject: [PATCH 326/660] Add a benchmark for index caching

Use require.TestingT interface so fixture function can be used for
both tests and benchmarks.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store_test.go | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index f3d53399fa8f2..e00334c99ae93 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -65,13 +65,13 @@ var stores = []struct {
 }
 
 // newTestStore creates a new Store for testing.
-func newTestChunkStore(t *testing.T, schemaName string) Store {
+func newTestChunkStore(t require.TestingT, schemaName string) Store {
 	var storeCfg StoreConfig
 	flagext.DefaultValues(&storeCfg)
 	return newTestChunkStoreConfig(t, schemaName, storeCfg)
 }
 
-func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConfig) Store {
+func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg StoreConfig) Store {
 	var (
 		tbmConfig TableManagerConfig
 		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
@@ -625,6 +625,23 @@ func TestIndexCachingWorks(t *testing.T) {
 	require.Equal(t, n+1, storage.numWrites)
 }
 
+func BenchmarkIndexCaching(b *testing.B) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	storeMaker := stores[1]
+	storeCfg := storeMaker.configFn()
+
+	store := newTestChunkStoreConfig(b, "v9", storeCfg)
+	defer store.Stop()
+
+	fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), BenchmarkLabels)
+
+	b.ResetTimer()
+
+	for i := 0; i < b.N; i++ {
+		store.Put(ctx, []Chunk{fooChunk1})
+	}
+}
+
 func TestChunkStoreError(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 	for _, tc := range []struct {

From b8bd5f1edc7efdf3c68779fd888c5cf8bbfedd9b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sun, 12 May 2019 10:25:41 +0000
Subject: [PATCH 327/660] Refactor: extract labelsForDummyChunks to avoid
 duplication

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_test.go | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/chunk_test.go b/chunk_test.go
index ecb5ce75cecff..7e289d263d5ec 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -22,12 +22,14 @@ func init() {
 	encoding.DefaultEncoding = encoding.Varbit
 }
 
+var labelsForDummyChunks = labels.Labels{
+	{Name: labels.MetricName, Value: "foo"},
+	{Name: "bar", Value: "baz"},
+	{Name: "toms", Value: "code"},
+}
+
 func dummyChunk(now model.Time) Chunk {
-	return dummyChunkFor(now, labels.Labels{
-		{Name: labels.MetricName, Value: "foo"},
-		{Name: "bar", Value: "baz"},
-		{Name: "toms", Value: "code"},
-	})
+	return dummyChunkFor(now, labelsForDummyChunks)
 }
 
 func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.Encoding, samples int) Chunk {
@@ -156,16 +158,11 @@ func TestParseExternalKey(t *testing.T) {
 
 func TestChunksToMatrix(t *testing.T) {
 	// Create 2 chunks which have the same metric
-	metric := labels.Labels{
-		{Name: model.MetricNameLabel, Value: "foo"},
-		{Name: "bar", Value: "baz"},
-		{Name: "toms", Value: "code"},
-	}
 	now := model.Now()
-	chunk1 := dummyChunkFor(now, metric)
+	chunk1 := dummyChunkFor(now, labelsForDummyChunks)
 	chunk1Samples, err := chunk1.Samples(chunk1.From, chunk1.Through)
 	require.NoError(t, err)
-	chunk2 := dummyChunkFor(now, metric)
+	chunk2 := dummyChunkFor(now, labelsForDummyChunks)
 	chunk2Samples, err := chunk2.Samples(chunk2.From, chunk2.Through)
 	require.NoError(t, err)
 

From 1b8142b189b8ba1e04038763c7ddcb55949df3a5 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sun, 12 May 2019 10:25:53 +0000
Subject: [PATCH 328/660] Check chunk decode for backwards-compatibility

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_test.go | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/chunk_test.go b/chunk_test.go
index 7e289d263d5ec..8a10bb72a1d46 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -126,6 +126,33 @@ func TestChunkCodec(t *testing.T) {
 	}
 }
 
+const fixedTimestamp = model.Time(1557654321000)
+
+func encodeForCompatibilityTest(t *testing.T) {
+	dummy := dummyChunkForEncoding(fixedTimestamp, labelsForDummyChunks, encoding.Bigchunk, 1)
+	encoded, err := dummy.Encoded()
+	require.NoError(t, err)
+	fmt.Printf("%q\n%q\n", dummy.ExternalKey(), encoded)
+}
+
+func TestChunkDecodeBackwardsCompatibility(t *testing.T) {
+	// Chunk encoded using code at commit b1777a50ab19
+	rawData := []byte("\x00\x00\x00\xb7\xff\x06\x00\x00sNaPpY\x01\xa5\x00\x00\x04\xc7a\xba{\"fingerprint\":18245339272195143978,\"userID\":\"userID\",\"from\":1557650721,\"through\":1557654321,\"metric\":{\"bar\":\"baz\",\"toms\":\"code\",\"__name__\":\"foo\"},\"encoding\":3}\n\x00\x00\x00\x15\x01\x00\x11\x00\x00\x01\xd0\xdd\xf5\xb6\xd5Z\x00\x00\x00\x00\x00\x00\x00\x00\x00")
+	decodeContext := NewDecodeContext()
+	have, err := ParseExternalKey(userID, "userID/fd3477666dacf92a:16aab37c8e8:16aab6eb768:38eb373c")
+	require.NoError(t, err)
+	require.NoError(t, have.Decode(decodeContext, rawData))
+	want := dummyChunkForEncoding(fixedTimestamp, labelsForDummyChunks, encoding.Bigchunk, 1)
+	// We can't just compare these two chunks, since the Bigchunk internals are different on construction and read-in.
+	// Compare the serialised version instead
+	require.NoError(t, have.Encode())
+	require.NoError(t, want.Encode())
+	haveEncoded, _ := have.Encoded()
+	wantEncoded, _ := want.Encoded()
+	require.Equal(t, haveEncoded, wantEncoded)
+	require.Equal(t, have.ExternalKey(), want.ExternalKey())
+}
+
 func TestParseExternalKey(t *testing.T) {
 	for _, c := range []struct {
 		key   string

From 975c96bac9f57e7f9a1f347c2911294234e78d7b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sun, 12 May 2019 10:26:04 +0000
Subject: [PATCH 329/660] Sort labels after JSON decode

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 json_helpers.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/json_helpers.go b/json_helpers.go
index b52f338e4cb71..10ecb5c6f08b4 100644
--- a/json_helpers.go
+++ b/json_helpers.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"sort"
 	"unsafe"
 
 	jsoniter "github.com/json-iterator/go"
@@ -24,6 +25,9 @@ func decodeLabels(ptr unsafe.Pointer, iter *jsoniter.Iterator) {
 		*labelsPtr = append(*labelsPtr, labels.Label{Name: key, Value: value})
 		return true
 	})
+	// Labels are always sorted, but earlier Cortex using a map would
+	// output in any order so we have to sort on read in
+	sort.Sort(*labelsPtr)
 }
 
 // Override Prometheus' labels.Labels encoder which goes via a map

From e451b1c8923514fb9fbc12f8f310529e182183b7 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 15 May 2019 17:54:34 +0530
Subject: [PATCH 330/660] Make sure ObjectClient also support bigtable-hashed

This means we don't crash when users specify store: bt-hashed

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index 30d3cd1764810..2e393f241cc8e 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -115,7 +115,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	case "boltdb":
 		return local.NewBoltDBIndexClient(cfg.BoltDBConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }
 
@@ -138,7 +138,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "gcp-columnkey", "bigtable":
+	case "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
 		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, schemaCfg)
@@ -147,7 +147,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 	case "filesystem":
 		return local.NewFSObjectClient(cfg.FSConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, cassandra, inmemory", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }
 
@@ -169,6 +169,6 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 	case "boltdb":
 		return local.NewTableClient()
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, gcp, inmemory", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }

From 9006cb00272df202228c17d5265a66e3981f12b8 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 27 Mar 2019 18:20:42 +0530
Subject: [PATCH 331/660] The lazy loading stuff

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store.go          |  5 +++
 composite_store.go      | 19 +++++++++-
 composite_store_test.go |  4 +++
 series_store.go         | 80 ++++++++++++++++++++++++++---------------
 4 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index a93befb10901e..c8cea81df2f58 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"errors"
 	"flag"
 	"fmt"
 	"net/http"
@@ -183,6 +184,10 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	return c.getMetricNameChunks(ctx, from, through, matchers, metricName)
 }
 
+func (c *store) GetChunkRefs(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+	return nil, nil, errors.New("not implemented")
+}
+
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
 func (c *store) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName, labelName string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelValues")
diff --git a/composite_store.go b/composite_store.go
index 63369640fe5d9..a04208d3a9b0d 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -14,7 +14,8 @@ import (
 type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
 	PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error
-	Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
 	LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error)
 	Stop()
 }
@@ -103,6 +104,22 @@ func (c compositeStore) LabelValuesForMetricName(ctx context.Context, from, thro
 	return result, err
 }
 
+func (c compositeStore) GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+	chunkIDs := [][]Chunk{}
+	fetchers := []*Fetcher{}
+	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
+		ids, fetcher, err := store.GetChunkRefs(ctx, from, through, matchers...)
+		if err != nil {
+			return err
+		}
+
+		chunkIDs = append(chunkIDs, ids...)
+		fetchers = append(fetchers, fetcher...)
+		return nil
+	})
+	return chunkIDs, fetchers, err
+}
+
 func (c compositeStore) Stop() {
 	for _, store := range c.stores {
 		store.Stop()
diff --git a/composite_store_test.go b/composite_store_test.go
index 6f0f8d3779ef8..7afd5fd85d44f 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -28,6 +28,10 @@ func (m mockStore) LabelValuesForMetricName(ctx context.Context, from, through m
 	return nil, nil
 }
 
+func (m mockStore) GetChunkRefs(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+	return nil, nil, nil
+}
+
 func (m mockStore) Stop() {}
 
 func TestCompositeStore(t *testing.T) {
diff --git a/series_store.go b/series_store.go
index 872d803e4f5a4..3f43b3367e1f3 100644
--- a/series_store.go
+++ b/series_store.go
@@ -111,38 +111,17 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 		return nil, err
 	}
 
-	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
+	chks, _, err := c.GetChunkRefs(ctx, from, through, allMatchers...)
 	if err != nil {
 		return nil, err
-	} else if shortcut {
-		return nil, nil
 	}
 
-	level.Debug(log).Log("metric", metricName)
-
-	// Fetch the series IDs from the index, based on non-empty matchers from
-	// the query.
-	_, matchers = util.SplitFiltersAndMatchers(matchers)
-	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricName, matchers)
-	if err != nil {
-		return nil, err
-	}
-	level.Debug(log).Log("series-ids", len(seriesIDs))
-
-	// Lookup the series in the index to get the chunks.
-	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
-	if err != nil {
-		level.Error(log).Log("msg", "lookupChunksBySeries", "err", err)
-		return nil, err
+	if len(chks) == 0 {
+		// Shortcut
+		return nil, nil
 	}
-	level.Debug(log).Log("chunk-ids", len(chunkIDs))
 
-	chunks, err := c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
-	if err != nil {
-		level.Error(log).Log("err", "convertChunkIDsToChunks", "err", err)
-		return nil, err
-	}
+	chunks := chks[0]
 	// Filter out chunks that are not in the selected time range.
 	filtered, keys := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("chunks-post-filtering", len(chunks))
@@ -150,8 +129,8 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 
 	// Protect ourselves against OOMing.
 	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
-	if maxChunksPerQuery > 0 && len(chunkIDs) > maxChunksPerQuery {
-		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunkIDs), maxChunksPerQuery)
+	if maxChunksPerQuery > 0 && len(chunks) > maxChunksPerQuery {
+		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunks), maxChunksPerQuery)
 		level.Error(log).Log("err", err)
 		return nil, err
 	}
@@ -168,6 +147,51 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	return filteredChunks, nil
 }
 
+func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+	log, ctx := spanlogger.New(ctx, "SeriesStore.GetChunkRefs")
+	defer log.Span.Finish()
+
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	// Validate the query is within reasonable bounds.
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
+	if err != nil {
+		return nil, nil, err
+	} else if shortcut {
+		return nil, nil, nil
+	}
+
+	level.Debug(log).Log("metric", metricName)
+
+	// Fetch the series IDs from the index, based on non-empty matchers from
+	// the query.
+	_, matchers = util.SplitFiltersAndMatchers(matchers)
+	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricName, matchers)
+	if err != nil {
+		return nil, nil, err
+	}
+	level.Debug(log).Log("series-ids", len(seriesIDs))
+
+	// Lookup the series in the index to get the chunks.
+	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
+	if err != nil {
+		level.Error(log).Log("msg", "lookupChunksBySeries", "err", err)
+		return nil, nil, err
+	}
+	level.Debug(log).Log("chunk-ids", len(chunkIDs))
+
+	chunks, err := c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
+	if err != nil {
+		level.Error(log).Log("op", "convertChunkIDsToChunks", "err", err)
+		return nil, nil, err
+	}
+
+	return [][]Chunk{chunks}, []*Fetcher{c.store.Fetcher}, nil
+}
+
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()

From e36b8b6136f45e24bf6027f63aba13c0c9efd207 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 29 Apr 2019 19:16:22 +0530
Subject: [PATCH 332/660] review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store.go       |  3 ++-
 chunk_store_utils.go | 15 +++++++++++----
 composite_store.go   |  2 ++
 series_store.go      | 15 ++++++++-------
 4 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index c8cea81df2f58..836c5d4872f99 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -309,7 +309,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	level.Debug(log).Log("Chunks in index", len(chunks))
 
 	// Filter out chunks that are not in the selected time range.
-	filtered, keys := filterChunksByTime(from, through, chunks)
+	filtered := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("Chunks post filtering", len(chunks))
 
 	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
@@ -320,6 +320,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
+	keys := keysFromChunks(filtered)
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		return nil, promql.ErrStorage{Err: err}
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index b61a2eaa60545..28f323a427749 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -16,17 +16,24 @@ import (
 
 const chunkDecodeParallelism = 16
 
-func filterChunksByTime(from, through model.Time, chunks []Chunk) ([]Chunk, []string) {
+func filterChunksByTime(from, through model.Time, chunks []Chunk) []Chunk {
 	filtered := make([]Chunk, 0, len(chunks))
-	keys := make([]string, 0, len(chunks))
 	for _, chunk := range chunks {
 		if chunk.Through < from || through < chunk.From {
 			continue
 		}
 		filtered = append(filtered, chunk)
-		keys = append(keys, chunk.ExternalKey())
 	}
-	return filtered, keys
+	return filtered
+}
+
+func keysFromChunks(chunks []Chunk) []string {
+	keys := make([]string, 0, len(chunks))
+	for _, chk := range chunks {
+		keys = append(keys, chk.ExternalKey())
+	}
+
+	return keys
 }
 
 func filterChunksByMatchers(chunks []Chunk, filters []*labels.Matcher) []Chunk {
diff --git a/composite_store.go b/composite_store.go
index a04208d3a9b0d..762aa424e3625 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -15,6 +15,8 @@ type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
 	PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error
 	Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	// GetChunkRefs returns the un-loaded chunks and the fetchers to be used to load them. You can load each slice of chunks ([]Chunk),
+	// using the corresponding Fetcher (fetchers[i].FetchChunks(ctx, chunks[i], ...)
 	GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
 	LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error)
 	Stop()
diff --git a/series_store.go b/series_store.go
index 3f43b3367e1f3..fc12849f9223c 100644
--- a/series_store.go
+++ b/series_store.go
@@ -111,7 +111,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 		return nil, err
 	}
 
-	chks, _, err := c.GetChunkRefs(ctx, from, through, allMatchers...)
+	chks, fetchers, err := c.GetChunkRefs(ctx, from, through, allMatchers...)
 	if err != nil {
 		return nil, err
 	}
@@ -122,11 +122,7 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 
 	chunks := chks[0]
-	// Filter out chunks that are not in the selected time range.
-	filtered, keys := filterChunksByTime(from, through, chunks)
-	level.Debug(log).Log("chunks-post-filtering", len(chunks))
-	chunksPerQuery.Observe(float64(len(filtered)))
-
+	fetcher := fetchers[0]
 	// Protect ourselves against OOMing.
 	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
 	if maxChunksPerQuery > 0 && len(chunks) > maxChunksPerQuery {
@@ -136,7 +132,8 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	}
 
 	// Now fetch the actual chunk data from Memcache / S3
-	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	keys := keysFromChunks(chunks)
+	allChunks, err := fetcher.FetchChunks(ctx, chunks, keys)
 	if err != nil {
 		level.Error(log).Log("msg", "FetchChunks", "err", err)
 		return nil, err
@@ -189,6 +186,10 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time
 		return nil, nil, err
 	}
 
+	chunks = filterChunksByTime(from, through, chunks)
+	level.Debug(log).Log("chunks-post-filtering", len(chunks))
+	chunksPerQuery.Observe(float64(len(chunks)))
+
 	return [][]Chunk{chunks}, []*Fetcher{c.store.Fetcher}, nil
 }
 

From 93363e4eed4657d63a5636f609a81831869b4b99 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 9 May 2019 19:55:41 +0530
Subject: [PATCH 333/660] boltdb retention

Implemented some of the boltdb table client methods to help with retention
Periodic reloading of open boltdbs to drop references to deleted tables

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go | 64 +++++++++++++++++++++++++++++++++---
 local/boltdb_table_client.go | 21 +++++++++---
 local/fixtures.go            |  2 +-
 storage/factory.go           |  2 +-
 4 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 69ad4e6e1217d..849881166733b 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -8,6 +8,10 @@ import (
 	"os"
 	"path"
 	"sync"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/go-kit/kit/log/level"
 
 	"github.com/etcd-io/bbolt"
 
@@ -18,8 +22,9 @@ import (
 var bucketName = []byte("index")
 
 const (
-	separator = "\000"
-	null      = string('\xff')
+	separator      = "\000"
+	null           = string('\xff')
+	dbReloadPeriod = 1 * time.Minute
 )
 
 // BoltDBConfig for a BoltDB index client.
@@ -37,6 +42,8 @@ type boltIndexClient struct {
 
 	dbsMtx sync.RWMutex
 	dbs    map[string]*bbolt.DB
+	done   chan struct{}
+	wait   sync.WaitGroup
 }
 
 // NewBoltDBIndexClient creates a new IndexClient that used BoltDB.
@@ -45,10 +52,57 @@ func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
 		return nil, err
 	}
 
-	return &boltIndexClient{
+	indexClient := &boltIndexClient{
 		cfg: cfg,
 		dbs: map[string]*bbolt.DB{},
-	}, nil
+	}
+
+	go indexClient.loop()
+	return indexClient, nil
+}
+
+func (b *boltIndexClient) loop() {
+	defer b.wait.Done()
+
+	ticker := time.NewTicker(dbReloadPeriod)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			b.reload()
+		case <-b.done:
+			return
+		}
+	}
+}
+
+func (b *boltIndexClient) reload() {
+	b.dbsMtx.Lock()
+	defer b.dbsMtx.Unlock()
+
+	dbsFailedToOpen := []string{}
+	for name, db := range b.dbs {
+		db.Close()
+
+		if _, err := os.Stat(path.Join(b.cfg.Directory, name)); err != nil && os.IsNotExist(err) {
+			dbsFailedToOpen = append(dbsFailedToOpen, name)
+			level.Debug(util.Logger).Log("msg", "boltdb file got removed", "filename", name)
+			continue
+		}
+
+		db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
+		if err != nil {
+			dbsFailedToOpen = append(dbsFailedToOpen, name)
+			level.Error(util.Logger).Log("msg", "failed to open boltdb", "filename", name)
+			continue
+		}
+		b.dbs[name] = db
+	}
+
+	for _, name := range dbsFailedToOpen {
+		delete(b.dbs, name)
+	}
 }
 
 func (b *boltIndexClient) Stop() {
@@ -57,6 +111,8 @@ func (b *boltIndexClient) Stop() {
 	for _, db := range b.dbs {
 		db.Close()
 	}
+
+	b.wait.Wait()
 }
 
 func (b *boltIndexClient) NewWriteBatch() chunk.WriteBatch {
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index 72c568772fa41..f32078f7b5562 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -2,19 +2,30 @@ package local
 
 import (
 	"context"
+	"os"
+	"path/filepath"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
-type tableClient struct{}
+type tableClient struct {
+	directory string
+}
 
 // NewTableClient returns a new TableClient.
-func NewTableClient() (chunk.TableClient, error) {
-	return &tableClient{}, nil
+func NewTableClient(directory string) (chunk.TableClient, error) {
+	return &tableClient{directory: directory}, nil
 }
 
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
-	return nil, nil
+	boltDbFiles := []string{}
+	_ = filepath.Walk(c.directory, func(path string, info os.FileInfo, err error) error {
+		if !info.IsDir() {
+			boltDbFiles = append(boltDbFiles, info.Name())
+		}
+		return nil
+	})
+	return boltDbFiles, nil
 }
 
 func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
@@ -22,7 +33,7 @@ func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 }
 
 func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
-	return nil
+	return os.Remove(filepath.Join(c.directory, name))
 }
 
 func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
diff --git a/local/fixtures.go b/local/fixtures.go
index 323a51ae8c130..cc7673cab9541 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -43,7 +43,7 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	tableClient, err = NewTableClient()
+	tableClient, err = NewTableClient(f.dirname)
 	if err != nil {
 		return
 	}
diff --git a/storage/factory.go b/storage/factory.go
index 2e393f241cc8e..239f1dec3d8c5 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -167,7 +167,7 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 	case "cassandra":
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
 	case "boltdb":
-		return local.NewTableClient()
+		return local.NewTableClient(cfg.BoltDBConfig.Directory)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}

From 9173ed708a0cb39f9bedcea8616db56eb0f16a42 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 10 May 2019 11:07:44 +0530
Subject: [PATCH 334/660] tests for boltdb reload and some fixes

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go |  8 +++-
 local/boltdb_reload_test.go  | 72 ++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 local/boltdb_reload_test.go

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 849881166733b..709d10fdf27d8 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -53,10 +53,12 @@ func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
 	}
 
 	indexClient := &boltIndexClient{
-		cfg: cfg,
-		dbs: map[string]*bbolt.DB{},
+		cfg:  cfg,
+		dbs:  map[string]*bbolt.DB{},
+		done: make(chan struct{}),
 	}
 
+	indexClient.wait.Add(1)
 	go indexClient.loop()
 	return indexClient, nil
 }
@@ -106,6 +108,8 @@ func (b *boltIndexClient) reload() {
 }
 
 func (b *boltIndexClient) Stop() {
+	close(b.done)
+
 	b.dbsMtx.Lock()
 	defer b.dbsMtx.Unlock()
 	for _, db := range b.dbs {
diff --git a/local/boltdb_reload_test.go b/local/boltdb_reload_test.go
new file mode 100644
index 0000000000000..64f3beaf9ee89
--- /dev/null
+++ b/local/boltdb_reload_test.go
@@ -0,0 +1,72 @@
+package local
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/etcd-io/bbolt"
+	"github.com/stretchr/testify/require"
+)
+
+var (
+	testKey  = []byte("test-key")
+	testValue  = []byte("test-value")
+)
+
+func setupDb(t *testing.T, boltdbIndexClient *boltIndexClient, dbname string) {
+	db, err := boltdbIndexClient.getDB(dbname)
+	require.NoError(t, err)
+
+	err = db.Update(func(tx *bbolt.Tx) error {
+		b, err := tx.CreateBucketIfNotExists(bucketName)
+		if err != nil {
+			return err
+		}
+
+		return b.Put(testKey, testValue)
+	})
+	require.NoError(t, err)
+}
+
+func TestBoltDBReload(t *testing.T) {
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	if err != nil {
+		return
+	}
+
+	indexClient, err := NewBoltDBIndexClient(BoltDBConfig{
+		Directory: dirname,
+	})
+
+	testDb1 := "test1"
+	testDb2 := "test2"
+
+	boltdbIndexClient := indexClient.(*boltIndexClient)
+	setupDb(t, boltdbIndexClient, testDb1)
+	setupDb(t, boltdbIndexClient, testDb2)
+
+	boltdbIndexClient.reload()
+	require.Equal(t, 2, len(boltdbIndexClient.dbs), "There should be 2 boltdbs open")
+
+	require.NoError(t, os.Remove(filepath.Join(dirname, testDb1)))
+
+	droppedDb, err := boltdbIndexClient.getDB(testDb1)
+	require.NoError(t, err)
+
+	valueFromDb := []byte{}
+	_ = droppedDb.View(func(tx *bbolt.Tx) error {
+		b := tx.Bucket(bucketName)
+		valueFromDb = b.Get(testKey)
+		return nil
+	})
+	require.Equal(t, testValue, valueFromDb, "should match value from db")
+
+	boltdbIndexClient.reload()
+
+	require.Equal(t, 1, len(boltdbIndexClient.dbs), "There should be 1 boltdb open")
+
+	boltdbIndexClient.Stop()
+	require.NoError(t, os.RemoveAll(dirname))
+}

From 5d06301a1329946dc78166264d48d1b2e279c4a2 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 10 May 2019 14:12:31 +0530
Subject: [PATCH 335/660] retention changes for filesystem as chunk store and
 relevant tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_table_client_test.go | 14 ++---
 aws/metrics_autoscaling_test.go   |  4 +-
 chunk_store_test.go               |  2 +-
 local/boltdb_reload_test.go       |  4 +-
 table_manager.go                  | 88 ++++++++++++++++++++++++++-----
 table_manager_test.go             | 83 ++++++++++++++++++++++++++---
 testutils/testutils.go            |  2 +-
 7 files changed, 166 insertions(+), 31 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 68f5f8d407d32..3f7e8df6f86e3 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -167,7 +167,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are created with autoscale
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -186,7 +186,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -205,7 +205,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -225,7 +225,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.Enabled = false
 		tbm.ChunkTables.WriteScale.Enabled = false
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -270,7 +270,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -286,7 +286,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables are autoscaled even if there are less than the limit.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -303,7 +303,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables past the limit do not autoscale but the latest N do.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index ae4d899c00fc3..c4549961d0245 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -58,7 +58,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureProvisionConfig(2, chunkWriteScale, inactiveWriteScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -216,7 +216,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureReadProvisionConfig(chunkReadScale, inactiveReadScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 65678d3a83dc5..0535268263b18 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -79,7 +79,7 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 	)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage)
+	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, "")
 	require.NoError(t, err)
 
 	err = tableManager.SyncTables(context.Background())
diff --git a/local/boltdb_reload_test.go b/local/boltdb_reload_test.go
index 64f3beaf9ee89..81a864fd6100b 100644
--- a/local/boltdb_reload_test.go
+++ b/local/boltdb_reload_test.go
@@ -11,8 +11,8 @@ import (
 )
 
 var (
-	testKey  = []byte("test-key")
-	testValue  = []byte("test-value")
+	testKey   = []byte("test-key")
+	testValue = []byte("test-value")
 )
 
 func setupDb(t *testing.T, boltdbIndexClient *boltIndexClient, dbname string) {
diff --git a/table_manager.go b/table_manager.go
index 9bbe325da24d7..ddb28e39245a4 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"os"
+	"path/filepath"
 	"sort"
 	"strings"
 	"sync"
@@ -21,6 +23,8 @@ import (
 const (
 	readLabel  = "read"
 	writeLabel = "write"
+
+	fsRetentionEnforcementInterval = 12 * time.Hour
 )
 
 var (
@@ -112,22 +116,24 @@ func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
-	client      TableClient
-	cfg         TableManagerConfig
-	schemaCfg   SchemaConfig
-	maxChunkAge time.Duration
-	done        chan struct{}
-	wait        sync.WaitGroup
+	client                TableClient
+	cfg                   TableManagerConfig
+	schemaCfg             SchemaConfig
+	maxChunkAge           time.Duration
+	done                  chan struct{}
+	wait                  sync.WaitGroup
+	directoryFromFsConfig string
 }
 
 // NewTableManager makes a new TableManager
-func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient) (*TableManager, error) {
+func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient, directoryFromFsConfig string) (*TableManager, error) {
 	return &TableManager{
-		cfg:         cfg,
-		schemaCfg:   schemaCfg,
-		maxChunkAge: maxChunkAge,
-		client:      tableClient,
-		done:        make(chan struct{}),
+		cfg:                   cfg,
+		schemaCfg:             schemaCfg,
+		maxChunkAge:           maxChunkAge,
+		client:                tableClient,
+		done:                  make(chan struct{}),
+		directoryFromFsConfig: directoryFromFsConfig,
 	}, nil
 }
 
@@ -135,6 +141,15 @@ func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge
 func (m *TableManager) Start() {
 	m.wait.Add(1)
 	go m.loop()
+
+	if m.isFSRetentionEnforcementRequired() {
+		if m.directoryFromFsConfig == "" {
+			level.Error(util.Logger).Log("msg", "can't enforce filesystem retention with empty diretory path in config")
+			return
+		}
+		m.wait.Add(1)
+		go m.fsRetentionEnforcementLoop()
+	}
 }
 
 // Stop the TableManager
@@ -169,6 +184,55 @@ func (m *TableManager) loop() {
 	}
 }
 
+func (m *TableManager) fsRetentionEnforcementLoop() {
+	defer m.wait.Done()
+
+	ticker := time.NewTicker(fsRetentionEnforcementInterval)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ticker.C:
+			err := m.enforceFSRetention()
+
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
+			}
+		case <-m.done:
+			return
+		}
+	}
+}
+
+func (m *TableManager) isFSRetentionEnforcementRequired() bool {
+	if m.cfg.RetentionPeriod == 0 || m.cfg.RetentionDeletesEnabled {
+		return false
+	}
+
+	for _, config := range m.schemaCfg.Configs {
+		if config.ObjectType == "filesystem" {
+			return true
+		}
+	}
+
+	return false
+}
+
+func (m *TableManager) enforceFSRetention() error {
+	if m.isFSRetentionEnforcementRequired() {
+		return nil
+	}
+	return filepath.Walk(m.directoryFromFsConfig, func(path string, info os.FileInfo, err error) error {
+		if !info.IsDir() && info.ModTime().Before(mtime.Now().Add(-m.cfg.RetentionPeriod)) {
+			level.Info(util.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
+			if err := os.Remove(path); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
+
 // SyncTables will calculate the tables expected to exist, create those that do
 // not and update those that need it.  It is exposed for testing.
 func (m *TableManager) SyncTables(ctx context.Context) error {
diff --git a/table_manager_test.go b/table_manager_test.go
index 4bd82d24808ac..5a836bbb52e9c 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -2,6 +2,8 @@ package chunk
 
 import (
 	"context"
+	"io/ioutil"
+	"os"
 	"sync"
 	"testing"
 	"time"
@@ -174,7 +176,7 @@ func TestTableManager(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -344,7 +346,7 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -432,7 +434,7 @@ func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
 			InactiveThroughputOnDemandMode: true,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -515,7 +517,7 @@ func TestTableManagerTags(t *testing.T) {
 				IndexTables: PeriodicTableConfig{},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -539,7 +541,7 @@ func TestTableManagerTags(t *testing.T) {
 				},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, "")
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -593,7 +595,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -692,3 +694,72 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 }
+
+func TestTableManagerFSRetention(t *testing.T) {
+	fsChunksDir, err := ioutil.TempDir(os.TempDir(), "fs-chunks")
+	require.NoError(t, err)
+
+	defer func() {
+		require.NoError(t, os.RemoveAll(fsChunksDir))
+	}()
+
+	file1 := "file1"
+	file2 := "file2"
+
+	tbmConfig := TableManagerConfig{
+		RetentionPeriod:         tableRetention,
+		RetentionDeletesEnabled: true,
+	}
+
+	cfg := SchemaConfig{
+		Configs: []PeriodConfig{
+			{
+				From:       model.TimeFromUnix(baseTableStart.Unix()),
+				ObjectType: "filesystem",
+			},
+		},
+	}
+
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, newMockTableClient(), fsChunksDir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	tableManager.cfg.RetentionPeriod = tableRetention
+	tableManager.cfg.RetentionDeletesEnabled = true
+
+	// Creating dummy files
+	require.NoError(t, os.Chdir(fsChunksDir))
+
+	f, err := os.Create(file1)
+	require.NoError(t, err)
+	require.NoError(t, f.Close())
+
+	f, err = os.Create(file2)
+	require.NoError(t, err)
+	require.NoError(t, f.Close())
+
+	// Verify whether all files are created
+	files, _ := ioutil.ReadDir(".")
+	require.Equal(t, 2, len(files), "Number of files should be 2")
+
+	// No files should be deleted after enforcing retention
+	require.NoError(t, tableManager.enforceFSRetention())
+	files, _ = ioutil.ReadDir(".")
+	require.Equal(t, 2, len(files), "Number of files should be 2")
+
+	// Changing mtime of file to expire them
+	require.NoError(t, os.Chtimes(file1, time.Now().Add(-tableRetention), time.Now().Add(-tableRetention)))
+	require.NoError(t, tableManager.enforceFSRetention())
+
+	// Verifying whether expired file got deleted
+	files, _ = ioutil.ReadDir(".")
+	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
+
+	// Disabling retention deletes and checking whether retention deletes file
+	tableManager.cfg.RetentionDeletesEnabled = false
+	require.NoError(t, os.Chtimes(file2, time.Now().Add(-tableRetention), time.Now().Add(-tableRetention)))
+	require.NoError(t, tableManager.enforceFSRetention())
+	files, _ = ioutil.ReadDir(".")
+	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
+
+}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index d6cfb69f0e228..ea8d27e5e74d3 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -38,7 +38,7 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectCl
 		return nil, nil, err
 	}
 
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, "")
 	if err != nil {
 		return nil, nil, err
 	}

From 9a1068d6ae75a187c9a21df32332a2a9aff7a37c Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 15 May 2019 17:42:43 +0530
Subject: [PATCH 336/660] Added BucketClient interface for chunk retention and
 relevant tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_table_client_test.go | 14 +++---
 aws/metrics_autoscaling_test.go   |  4 +-
 bucket_client.go                  | 11 ++++
 chunk_store_test.go               |  2 +-
 local/boltdb_table_client.go      |  9 +++-
 local/fs_object_client.go         | 28 +++++++++++
 local/fs_object_client_test.go    | 58 +++++++++++++++++++++
 storage/factory.go                | 10 ++++
 table_manager.go                  | 76 ++++++++--------------------
 table_manager_test.go             | 83 +++----------------------------
 testutils/testutils.go            |  2 +-
 11 files changed, 153 insertions(+), 144 deletions(-)
 create mode 100644 bucket_client.go
 create mode 100644 local/fs_object_client_test.go

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 3f7e8df6f86e3..1c2bce578ed84 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -167,7 +167,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are created with autoscale
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -186,7 +186,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -205,7 +205,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -225,7 +225,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.Enabled = false
 		tbm.ChunkTables.WriteScale.Enabled = false
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -270,7 +270,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -286,7 +286,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables are autoscaled even if there are less than the limit.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -303,7 +303,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables past the limit do not autoscale but the latest N do.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index c4549961d0245..927e20efb7960 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -58,7 +58,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureProvisionConfig(2, chunkWriteScale, inactiveWriteScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -216,7 +216,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureReadProvisionConfig(chunkReadScale, inactiveReadScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, "")
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/bucket_client.go b/bucket_client.go
new file mode 100644
index 0000000000000..aabf1498ab2d5
--- /dev/null
+++ b/bucket_client.go
@@ -0,0 +1,11 @@
+package chunk
+
+import (
+	"context"
+	"time"
+)
+
+// BucketClient is used to enforce retention on chunk buckets.
+type BucketClient interface {
+	DeleteChunksBefore(ctx context.Context, ts time.Time) error
+}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 0535268263b18..f43adfaa40926 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -79,7 +79,7 @@ func newTestChunkStoreConfig(t *testing.T, schemaName string, storeCfg StoreConf
 	)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, "")
+	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil)
 	require.NoError(t, err)
 
 	err = tableManager.SyncTables(context.Background())
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index f32078f7b5562..4c3659a429e90 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -19,12 +19,19 @@ func NewTableClient(directory string) (chunk.TableClient, error) {
 
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	boltDbFiles := []string{}
-	_ = filepath.Walk(c.directory, func(path string, info os.FileInfo, err error) error {
+	err := filepath.Walk(c.directory, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
 		if !info.IsDir() {
 			boltDbFiles = append(boltDbFiles, info.Name())
 		}
 		return nil
 	})
+
+	if err != nil {
+		return nil, err
+	}
 	return boltDbFiles, nil
 }
 
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 164e877b8628e..1b2565d5a0af0 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -5,10 +5,16 @@ import (
 	"encoding/base64"
 	"flag"
 	"io/ioutil"
+	"os"
 	"path"
+	"path/filepath"
+	"time"
+
+	"github.com/go-kit/kit/log/level"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
+	pkgUtil "github.com/cortexproject/cortex/pkg/util"
 )
 
 // FSConfig is the config for a fsObjectClient.
@@ -70,3 +76,25 @@ func (f *fsObjectClient) getChunk(_ context.Context, decodeContext *chunk.Decode
 
 	return c, nil
 }
+
+// NewBucketClient makes a chunk.BucketClient which stores chunks as files in the local filesystem.
+func NewBucketClient(cfg FSConfig) (chunk.BucketClient, error) {
+	bucketClient, err := NewFSObjectClient(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return bucketClient.(*fsObjectClient), nil
+}
+
+func (f *fsObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
+	return filepath.Walk(f.cfg.Directory, func(path string, info os.FileInfo, err error) error {
+		if !info.IsDir() && info.ModTime().Before(ts) {
+			level.Info(pkgUtil.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
+			if err := os.Remove(path); err != nil {
+				return err
+			}
+		}
+		return nil
+	})
+}
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
new file mode 100644
index 0000000000000..bfb76971f2cdc
--- /dev/null
+++ b/local/fs_object_client_test.go
@@ -0,0 +1,58 @@
+package local
+
+import (
+	"context"
+	"io/ioutil"
+	"os"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestFsObjectClient_DeleteChunksBefore(t *testing.T) {
+	deleteFilesOlderThan := 10 * time.Minute
+
+	fsChunksDir, err := ioutil.TempDir(os.TempDir(), "fs-chunks")
+	require.NoError(t, err)
+
+	bucketClient, err := NewBucketClient(FSConfig{
+		Directory: fsChunksDir,
+	})
+	require.NoError(t, err)
+
+	defer func() {
+		require.NoError(t, os.RemoveAll(fsChunksDir))
+	}()
+
+	file1 := "file1"
+	file2 := "file2"
+
+	// Creating dummy files
+	require.NoError(t, os.Chdir(fsChunksDir))
+
+	f, err := os.Create(file1)
+	require.NoError(t, err)
+	require.NoError(t, f.Close())
+
+	f, err = os.Create(file2)
+	require.NoError(t, err)
+	require.NoError(t, f.Close())
+
+	// Verify whether all files are created
+	files, _ := ioutil.ReadDir(".")
+	require.Equal(t, 2, len(files), "Number of files should be 2")
+
+	// No files should be deleted, since all of them are not much older
+	require.NoError(t, bucketClient.DeleteChunksBefore(context.Background(), time.Now().Add(-deleteFilesOlderThan)))
+	files, _ = ioutil.ReadDir(".")
+	require.Equal(t, 2, len(files), "Number of files should be 2")
+
+	// Changing mtime of file1 to make it look older
+	require.NoError(t, os.Chtimes(file1, time.Now().Add(-deleteFilesOlderThan), time.Now().Add(-deleteFilesOlderThan)))
+	require.NoError(t, bucketClient.DeleteChunksBefore(context.Background(), time.Now().Add(-deleteFilesOlderThan)))
+
+	// Verifying whether older file got deleted
+	files, _ = ioutil.ReadDir(".")
+	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
+}
diff --git a/storage/factory.go b/storage/factory.go
index 239f1dec3d8c5..831db7f84d706 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -172,3 +172,13 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }
+
+// NewBucketClient makes a new bucket client based on the configuration.
+func NewBucketClient(name string, storageConfig Config) (chunk.BucketClient, error) {
+	switch name {
+	case "filesystem":
+		return local.NewBucketClient(storageConfig.FSConfig)
+	default:
+		return nil, fmt.Errorf("Unrecognized bucket client %v, choose one of: filesystem", name)
+	}
+}
diff --git a/table_manager.go b/table_manager.go
index ddb28e39245a4..1da93c51b914f 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -4,8 +4,6 @@ import (
 	"context"
 	"flag"
 	"fmt"
-	"os"
-	"path/filepath"
 	"sort"
 	"strings"
 	"sync"
@@ -24,7 +22,7 @@ const (
 	readLabel  = "read"
 	writeLabel = "write"
 
-	fsRetentionEnforcementInterval = 12 * time.Hour
+	bucketRetentionEnforcementInterval = 12 * time.Hour
 )
 
 var (
@@ -116,24 +114,25 @@ func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
-	client                TableClient
-	cfg                   TableManagerConfig
-	schemaCfg             SchemaConfig
-	maxChunkAge           time.Duration
-	done                  chan struct{}
-	wait                  sync.WaitGroup
-	directoryFromFsConfig string
+	client       TableClient
+	cfg          TableManagerConfig
+	schemaCfg    SchemaConfig
+	maxChunkAge  time.Duration
+	done         chan struct{}
+	wait         sync.WaitGroup
+	bucketClient BucketClient
 }
 
 // NewTableManager makes a new TableManager
-func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient, directoryFromFsConfig string) (*TableManager, error) {
+func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient,
+	objectClient BucketClient) (*TableManager, error) {
 	return &TableManager{
-		cfg:                   cfg,
-		schemaCfg:             schemaCfg,
-		maxChunkAge:           maxChunkAge,
-		client:                tableClient,
-		done:                  make(chan struct{}),
-		directoryFromFsConfig: directoryFromFsConfig,
+		cfg:          cfg,
+		schemaCfg:    schemaCfg,
+		maxChunkAge:  maxChunkAge,
+		client:       tableClient,
+		done:         make(chan struct{}),
+		bucketClient: objectClient,
 	}, nil
 }
 
@@ -142,13 +141,9 @@ func (m *TableManager) Start() {
 	m.wait.Add(1)
 	go m.loop()
 
-	if m.isFSRetentionEnforcementRequired() {
-		if m.directoryFromFsConfig == "" {
-			level.Error(util.Logger).Log("msg", "can't enforce filesystem retention with empty diretory path in config")
-			return
-		}
+	if m.bucketClient != nil && m.cfg.RetentionPeriod != 0 && m.cfg.RetentionDeletesEnabled == true {
 		m.wait.Add(1)
-		go m.fsRetentionEnforcementLoop()
+		go m.bucketRetentionLoop()
 	}
 }
 
@@ -184,16 +179,16 @@ func (m *TableManager) loop() {
 	}
 }
 
-func (m *TableManager) fsRetentionEnforcementLoop() {
+func (m *TableManager) bucketRetentionLoop() {
 	defer m.wait.Done()
 
-	ticker := time.NewTicker(fsRetentionEnforcementInterval)
+	ticker := time.NewTicker(bucketRetentionEnforcementInterval)
 	defer ticker.Stop()
 
 	for {
 		select {
 		case <-ticker.C:
-			err := m.enforceFSRetention()
+			err := m.bucketClient.DeleteChunksBefore(context.Background(), mtime.Now().Add(-m.cfg.RetentionPeriod))
 
 			if err != nil {
 				level.Error(util.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
@@ -204,35 +199,6 @@ func (m *TableManager) fsRetentionEnforcementLoop() {
 	}
 }
 
-func (m *TableManager) isFSRetentionEnforcementRequired() bool {
-	if m.cfg.RetentionPeriod == 0 || m.cfg.RetentionDeletesEnabled {
-		return false
-	}
-
-	for _, config := range m.schemaCfg.Configs {
-		if config.ObjectType == "filesystem" {
-			return true
-		}
-	}
-
-	return false
-}
-
-func (m *TableManager) enforceFSRetention() error {
-	if m.isFSRetentionEnforcementRequired() {
-		return nil
-	}
-	return filepath.Walk(m.directoryFromFsConfig, func(path string, info os.FileInfo, err error) error {
-		if !info.IsDir() && info.ModTime().Before(mtime.Now().Add(-m.cfg.RetentionPeriod)) {
-			level.Info(util.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
-			if err := os.Remove(path); err != nil {
-				return err
-			}
-		}
-		return nil
-	})
-}
-
 // SyncTables will calculate the tables expected to exist, create those that do
 // not and update those that need it.  It is exposed for testing.
 func (m *TableManager) SyncTables(ctx context.Context) error {
diff --git a/table_manager_test.go b/table_manager_test.go
index 5a836bbb52e9c..e2704669ce2c7 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -2,8 +2,6 @@ package chunk
 
 import (
 	"context"
-	"io/ioutil"
-	"os"
 	"sync"
 	"testing"
 	"time"
@@ -176,7 +174,7 @@ func TestTableManager(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -346,7 +344,7 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -434,7 +432,7 @@ func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
 			InactiveThroughputOnDemandMode: true,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -517,7 +515,7 @@ func TestTableManagerTags(t *testing.T) {
 				IndexTables: PeriodicTableConfig{},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, "")
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -541,7 +539,7 @@ func TestTableManagerTags(t *testing.T) {
 				},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, "")
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -595,7 +593,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, "")
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -694,72 +692,3 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 }
-
-func TestTableManagerFSRetention(t *testing.T) {
-	fsChunksDir, err := ioutil.TempDir(os.TempDir(), "fs-chunks")
-	require.NoError(t, err)
-
-	defer func() {
-		require.NoError(t, os.RemoveAll(fsChunksDir))
-	}()
-
-	file1 := "file1"
-	file2 := "file2"
-
-	tbmConfig := TableManagerConfig{
-		RetentionPeriod:         tableRetention,
-		RetentionDeletesEnabled: true,
-	}
-
-	cfg := SchemaConfig{
-		Configs: []PeriodConfig{
-			{
-				From:       model.TimeFromUnix(baseTableStart.Unix()),
-				ObjectType: "filesystem",
-			},
-		},
-	}
-
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, newMockTableClient(), fsChunksDir)
-	if err != nil {
-		t.Fatal(err)
-	}
-	tableManager.cfg.RetentionPeriod = tableRetention
-	tableManager.cfg.RetentionDeletesEnabled = true
-
-	// Creating dummy files
-	require.NoError(t, os.Chdir(fsChunksDir))
-
-	f, err := os.Create(file1)
-	require.NoError(t, err)
-	require.NoError(t, f.Close())
-
-	f, err = os.Create(file2)
-	require.NoError(t, err)
-	require.NoError(t, f.Close())
-
-	// Verify whether all files are created
-	files, _ := ioutil.ReadDir(".")
-	require.Equal(t, 2, len(files), "Number of files should be 2")
-
-	// No files should be deleted after enforcing retention
-	require.NoError(t, tableManager.enforceFSRetention())
-	files, _ = ioutil.ReadDir(".")
-	require.Equal(t, 2, len(files), "Number of files should be 2")
-
-	// Changing mtime of file to expire them
-	require.NoError(t, os.Chtimes(file1, time.Now().Add(-tableRetention), time.Now().Add(-tableRetention)))
-	require.NoError(t, tableManager.enforceFSRetention())
-
-	// Verifying whether expired file got deleted
-	files, _ = ioutil.ReadDir(".")
-	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
-
-	// Disabling retention deletes and checking whether retention deletes file
-	tableManager.cfg.RetentionDeletesEnabled = false
-	require.NoError(t, os.Chtimes(file2, time.Now().Add(-tableRetention), time.Now().Add(-tableRetention)))
-	require.NoError(t, tableManager.enforceFSRetention())
-	files, _ = ioutil.ReadDir(".")
-	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
-
-}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index ea8d27e5e74d3..f084aecea4a3c 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -38,7 +38,7 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectCl
 		return nil, nil, err
 	}
 
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, "")
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil)
 	if err != nil {
 		return nil, nil, err
 	}

From cbad0c08445cdb5d4f702b2c7cc7b13f64c4c0f7 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 16 May 2019 16:04:47 +0530
Subject: [PATCH 337/660] improvements in boltdb reload code

avoiding reopening of non-deleted files
get exclusive lock only when files are deleted for reference cleanup

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 709d10fdf27d8..670b87efb56f9 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -24,7 +24,7 @@ var bucketName = []byte("index")
 const (
 	separator      = "\000"
 	null           = string('\xff')
-	dbReloadPeriod = 1 * time.Minute
+	dbReloadPeriod = 10 * time.Minute
 )
 
 // BoltDBConfig for a BoltDB index client.
@@ -80,31 +80,31 @@ func (b *boltIndexClient) loop() {
 }
 
 func (b *boltIndexClient) reload() {
-	b.dbsMtx.Lock()
-	defer b.dbsMtx.Unlock()
-
-	dbsFailedToOpen := []string{}
-	for name, db := range b.dbs {
-		db.Close()
+	b.dbsMtx.RLock()
 
+	removedDBs := []string{}
+	for name := range b.dbs {
 		if _, err := os.Stat(path.Join(b.cfg.Directory, name)); err != nil && os.IsNotExist(err) {
-			dbsFailedToOpen = append(dbsFailedToOpen, name)
+			removedDBs = append(removedDBs, name)
 			level.Debug(util.Logger).Log("msg", "boltdb file got removed", "filename", name)
 			continue
 		}
+	}
+	b.dbsMtx.RUnlock()
 
-		db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
-		if err != nil {
-			dbsFailedToOpen = append(dbsFailedToOpen, name)
-			level.Error(util.Logger).Log("msg", "failed to open boltdb", "filename", name)
-			continue
+	if len(removedDBs) != 0 {
+		b.dbsMtx.Lock()
+		defer b.dbsMtx.Unlock()
+
+		for _, name := range removedDBs {
+			if err := b.dbs[name].Close(); err != nil {
+				level.Error(util.Logger).Log("msg", "failed to close removed boltdb", "filename", name, "err", err)
+				continue
+			}
+			delete(b.dbs, name)
 		}
-		b.dbs[name] = db
 	}
 
-	for _, name := range dbsFailedToOpen {
-		delete(b.dbs, name)
-	}
 }
 
 func (b *boltIndexClient) Stop() {

From 7fdd3dfaa8f701b5634ee8b2e54dbfb147aa6b4e Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 16 May 2019 18:00:55 +0530
Subject: [PATCH 338/660] some code refactoring in filesystem client

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go   |  5 ++---
 local/fs_object_client.go      | 33 ++++++++++++++-------------------
 local/fs_object_client_test.go |  2 +-
 storage/factory.go             | 11 +++++------
 4 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 670b87efb56f9..55f5416212ab7 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -10,13 +10,12 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/go-kit/kit/log/level"
-
 	"github.com/etcd-io/bbolt"
+	"github.com/go-kit/kit/log/level"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 var bucketName = []byte("index")
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 1b2565d5a0af0..3d58753a9ba3c 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -17,7 +17,7 @@ import (
 	pkgUtil "github.com/cortexproject/cortex/pkg/util"
 )
 
-// FSConfig is the config for a fsObjectClient.
+// FSConfig is the config for a FSObjectClient.
 type FSConfig struct {
 	Directory string `yaml:"directory"`
 }
@@ -27,24 +27,27 @@ func (cfg *FSConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Directory, "local.chunk-directory", "", "Directory to store chunks in.")
 }
 
-type fsObjectClient struct {
+// FSObjectClient holds config for filesystem as object store
+type FSObjectClient struct {
 	cfg FSConfig
 }
 
 // NewFSObjectClient makes a chunk.ObjectClient which stores chunks as files in the local filesystem.
-func NewFSObjectClient(cfg FSConfig) (chunk.ObjectClient, error) {
+func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 	if err := ensureDirectory(cfg.Directory); err != nil {
 		return nil, err
 	}
 
-	return &fsObjectClient{
+	return &FSObjectClient{
 		cfg: cfg,
 	}, nil
 }
 
-func (fsObjectClient) Stop() {}
+// Stop implements ObjectClient
+func (FSObjectClient) Stop() {}
 
-func (f *fsObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) error {
+// PutChunks implements ObjectClient
+func (f *FSObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
 		if err != nil {
@@ -59,11 +62,12 @@ func (f *fsObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) erro
 	return nil
 }
 
-func (f *fsObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+// GetChunks implements ObjectClient
+func (f *FSObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	return util.GetParallelChunks(ctx, chunks, f.getChunk)
 }
 
-func (f *fsObjectClient) getChunk(_ context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+func (f *FSObjectClient) getChunk(_ context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
 	filename := base64.StdEncoding.EncodeToString([]byte(c.ExternalKey()))
 	buf, err := ioutil.ReadFile(path.Join(f.cfg.Directory, filename))
 	if err != nil {
@@ -77,17 +81,8 @@ func (f *fsObjectClient) getChunk(_ context.Context, decodeContext *chunk.Decode
 	return c, nil
 }
 
-// NewBucketClient makes a chunk.BucketClient which stores chunks as files in the local filesystem.
-func NewBucketClient(cfg FSConfig) (chunk.BucketClient, error) {
-	bucketClient, err := NewFSObjectClient(cfg)
-	if err != nil {
-		return nil, err
-	}
-
-	return bucketClient.(*fsObjectClient), nil
-}
-
-func (f *fsObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
+// DeleteChunksBefore implements BucketClient
+func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
 	return filepath.Walk(f.cfg.Directory, func(path string, info os.FileInfo, err error) error {
 		if !info.IsDir() && info.ModTime().Before(ts) {
 			level.Info(pkgUtil.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
index bfb76971f2cdc..141e61dc5adef 100644
--- a/local/fs_object_client_test.go
+++ b/local/fs_object_client_test.go
@@ -16,7 +16,7 @@ func TestFsObjectClient_DeleteChunksBefore(t *testing.T) {
 	fsChunksDir, err := ioutil.TempDir(os.TempDir(), "fs-chunks")
 	require.NoError(t, err)
 
-	bucketClient, err := NewBucketClient(FSConfig{
+	bucketClient, err := NewFSObjectClient(FSConfig{
 		Directory: fsChunksDir,
 	})
 	require.NoError(t, err)
diff --git a/storage/factory.go b/storage/factory.go
index 831db7f84d706..7502dc92c714b 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -174,11 +174,10 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 }
 
 // NewBucketClient makes a new bucket client based on the configuration.
-func NewBucketClient(name string, storageConfig Config) (chunk.BucketClient, error) {
-	switch name {
-	case "filesystem":
-		return local.NewBucketClient(storageConfig.FSConfig)
-	default:
-		return nil, fmt.Errorf("Unrecognized bucket client %v, choose one of: filesystem", name)
+func NewBucketClient(storageConfig Config) (chunk.BucketClient, error) {
+	if storageConfig.FSConfig.Directory != "" {
+		return local.NewFSObjectClient(storageConfig.FSConfig)
 	}
+
+	return nil, nil
 }

From 9320843c0263d70ee9be658182db0398e5f9af5b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sat, 18 May 2019 11:41:20 +0100
Subject: [PATCH 339/660] Commit the generated protos so Cortex can be used as
 a dependancy.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_index_client.pb.go | 830 +++++++++++++++++++++++++++++
 1 file changed, 830 insertions(+)
 create mode 100644 storage/caching_index_client.pb.go

diff --git a/storage/caching_index_client.pb.go b/storage/caching_index_client.pb.go
new file mode 100644
index 0000000000000..96a81e59cbf0c
--- /dev/null
+++ b/storage/caching_index_client.pb.go
@@ -0,0 +1,830 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: caching_index_client.proto
+
+package storage
+
+import (
+	fmt "fmt"
+	_ "github.com/gogo/protobuf/gogoproto"
+	proto "github.com/gogo/protobuf/proto"
+	io "io"
+	math "math"
+	reflect "reflect"
+	strings "strings"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+
+type Entry struct {
+	Column Bytes `protobuf:"bytes,1,opt,name=Column,json=column,proto3,customtype=Bytes" json:"Column"`
+	Value  Bytes `protobuf:"bytes,2,opt,name=Value,json=value,proto3,customtype=Bytes" json:"Value"`
+}
+
+func (m *Entry) Reset()      { *m = Entry{} }
+func (*Entry) ProtoMessage() {}
+func (*Entry) Descriptor() ([]byte, []int) {
+	return fileDescriptor_6a83955bbc783296, []int{0}
+}
+func (m *Entry) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *Entry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_Entry.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalTo(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *Entry) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_Entry.Merge(m, src)
+}
+func (m *Entry) XXX_Size() int {
+	return m.Size()
+}
+func (m *Entry) XXX_DiscardUnknown() {
+	xxx_messageInfo_Entry.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Entry proto.InternalMessageInfo
+
+type ReadBatch struct {
+	Entries []Entry `protobuf:"bytes,1,rep,name=entries,proto3" json:"entries"`
+	Key     string  `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+	// The time at which the key expires.
+	Expiry int64 `protobuf:"varint,3,opt,name=expiry,proto3" json:"expiry,omitempty"`
+	// The number of entries; used for cardinality limiting.
+	// entries will be empty when this is set.
+	Cardinality int32 `protobuf:"varint,4,opt,name=cardinality,proto3" json:"cardinality,omitempty"`
+}
+
+func (m *ReadBatch) Reset()      { *m = ReadBatch{} }
+func (*ReadBatch) ProtoMessage() {}
+func (*ReadBatch) Descriptor() ([]byte, []int) {
+	return fileDescriptor_6a83955bbc783296, []int{1}
+}
+func (m *ReadBatch) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *ReadBatch) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_ReadBatch.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalTo(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *ReadBatch) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_ReadBatch.Merge(m, src)
+}
+func (m *ReadBatch) XXX_Size() int {
+	return m.Size()
+}
+func (m *ReadBatch) XXX_DiscardUnknown() {
+	xxx_messageInfo_ReadBatch.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ReadBatch proto.InternalMessageInfo
+
+func (m *ReadBatch) GetEntries() []Entry {
+	if m != nil {
+		return m.Entries
+	}
+	return nil
+}
+
+func (m *ReadBatch) GetKey() string {
+	if m != nil {
+		return m.Key
+	}
+	return ""
+}
+
+func (m *ReadBatch) GetExpiry() int64 {
+	if m != nil {
+		return m.Expiry
+	}
+	return 0
+}
+
+func (m *ReadBatch) GetCardinality() int32 {
+	if m != nil {
+		return m.Cardinality
+	}
+	return 0
+}
+
+func init() {
+	proto.RegisterType((*Entry)(nil), "storage.Entry")
+	proto.RegisterType((*ReadBatch)(nil), "storage.ReadBatch")
+}
+
+func init() { proto.RegisterFile("caching_index_client.proto", fileDescriptor_6a83955bbc783296) }
+
+var fileDescriptor_6a83955bbc783296 = []byte{
+	// 313 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xb1, 0x4e, 0xf3, 0x30,
+	0x14, 0x85, 0xed, 0x3f, 0x4d, 0xaa, 0xba, 0x3f, 0x08, 0x79, 0x40, 0x51, 0x87, 0xdb, 0xa8, 0x08,
+	0x29, 0x0b, 0xa9, 0x04, 0x3c, 0x41, 0x10, 0x2f, 0x10, 0x24, 0xd6, 0xca, 0x75, 0x4d, 0x6a, 0x91,
+	0xda, 0x55, 0xea, 0xa0, 0x66, 0x63, 0x63, 0xe5, 0x31, 0x78, 0x94, 0x8e, 0x1d, 0x2b, 0x86, 0x8a,
+	0xba, 0x0b, 0x63, 0x1f, 0x01, 0xd5, 0x14, 0x89, 0x81, 0xed, 0x1c, 0x7f, 0xc7, 0xf7, 0x5c, 0x5d,
+	0xd2, 0xe1, 0x8c, 0x8f, 0xa5, 0xca, 0x07, 0x52, 0x8d, 0xc4, 0x7c, 0xc0, 0x0b, 0x29, 0x94, 0x49,
+	0xa6, 0xa5, 0x36, 0x9a, 0x36, 0x67, 0x46, 0x97, 0x2c, 0x17, 0x9d, 0x8b, 0x5c, 0x9a, 0x71, 0x35,
+	0x4c, 0xb8, 0x9e, 0xf4, 0x73, 0x9d, 0xeb, 0xbe, 0xe3, 0xc3, 0xea, 0xc1, 0x39, 0x67, 0x9c, 0xfa,
+	0xfe, 0xd7, 0xbb, 0x23, 0xfe, 0xad, 0x32, 0x65, 0x4d, 0xcf, 0x49, 0x70, 0xa3, 0x8b, 0x6a, 0xa2,
+	0x42, 0x1c, 0xe1, 0xf8, 0x7f, 0x7a, 0xb4, 0x58, 0x77, 0xd1, 0xfb, 0xba, 0xeb, 0xa7, 0xb5, 0x11,
+	0xb3, 0x2c, 0xe0, 0x0e, 0xd2, 0x33, 0xe2, 0xdf, 0xb3, 0xa2, 0x12, 0xe1, 0xbf, 0xbf, 0x52, 0xfe,
+	0xd3, 0x9e, 0xf5, 0x5e, 0x30, 0x69, 0x65, 0x82, 0x8d, 0x52, 0x66, 0xf8, 0x98, 0x26, 0xa4, 0x29,
+	0x94, 0x29, 0xa5, 0x98, 0x85, 0x38, 0xf2, 0xe2, 0xf6, 0xe5, 0x71, 0x72, 0x58, 0x36, 0x71, 0xd5,
+	0x69, 0x63, 0x3f, 0x24, 0xfb, 0x09, 0xd1, 0x13, 0xe2, 0x3d, 0x8a, 0xda, 0x15, 0xb4, 0xb2, 0xbd,
+	0xa4, 0xa7, 0x24, 0x10, 0xf3, 0xa9, 0x2c, 0xeb, 0xd0, 0x8b, 0x70, 0xec, 0x65, 0x07, 0x47, 0x23,
+	0xd2, 0xe6, 0xac, 0x1c, 0x49, 0xc5, 0x0a, 0x69, 0xea, 0xb0, 0x11, 0xe1, 0xd8, 0xcf, 0x7e, 0x3f,
+	0xa5, 0xd7, 0xcb, 0x0d, 0xa0, 0xd5, 0x06, 0xd0, 0x6e, 0x03, 0xf8, 0xd9, 0x02, 0x7e, 0xb3, 0x80,
+	0x17, 0x16, 0xf0, 0xd2, 0x02, 0xfe, 0xb0, 0x80, 0x3f, 0x2d, 0xa0, 0x9d, 0x05, 0xfc, 0xba, 0x05,
+	0xb4, 0xdc, 0x02, 0x5a, 0x6d, 0x01, 0x0d, 0x03, 0x77, 0x9b, 0xab, 0xaf, 0x00, 0x00, 0x00, 0xff,
+	0xff, 0xce, 0x7e, 0x67, 0x82, 0x71, 0x01, 0x00, 0x00,
+}
+
+func (this *Entry) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Entry)
+	if !ok {
+		that2, ok := that.(Entry)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !this.Column.Equal(that1.Column) {
+		return false
+	}
+	if !this.Value.Equal(that1.Value) {
+		return false
+	}
+	return true
+}
+func (this *ReadBatch) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ReadBatch)
+	if !ok {
+		that2, ok := that.(ReadBatch)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Entries) != len(that1.Entries) {
+		return false
+	}
+	for i := range this.Entries {
+		if !this.Entries[i].Equal(&that1.Entries[i]) {
+			return false
+		}
+	}
+	if this.Key != that1.Key {
+		return false
+	}
+	if this.Expiry != that1.Expiry {
+		return false
+	}
+	if this.Cardinality != that1.Cardinality {
+		return false
+	}
+	return true
+}
+func (this *Entry) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&storage.Entry{")
+	s = append(s, "Column: "+fmt.Sprintf("%#v", this.Column)+",\n")
+	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ReadBatch) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 8)
+	s = append(s, "&storage.ReadBatch{")
+	if this.Entries != nil {
+		vs := make([]*Entry, len(this.Entries))
+		for i := range vs {
+			vs[i] = &this.Entries[i]
+		}
+		s = append(s, "Entries: "+fmt.Sprintf("%#v", vs)+",\n")
+	}
+	s = append(s, "Key: "+fmt.Sprintf("%#v", this.Key)+",\n")
+	s = append(s, "Expiry: "+fmt.Sprintf("%#v", this.Expiry)+",\n")
+	s = append(s, "Cardinality: "+fmt.Sprintf("%#v", this.Cardinality)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func valueToGoStringCachingIndexClient(v interface{}, typ string) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
+}
+func (m *Entry) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalTo(dAtA)
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
+	var i int
+	_ = i
+	var l int
+	_ = l
+	dAtA[i] = 0xa
+	i++
+	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Column.Size()))
+	n1, err1 := m.Column.MarshalTo(dAtA[i:])
+	if err1 != nil {
+		return 0, err1
+	}
+	i += n1
+	dAtA[i] = 0x12
+	i++
+	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Value.Size()))
+	n2, err2 := m.Value.MarshalTo(dAtA[i:])
+	if err2 != nil {
+		return 0, err2
+	}
+	i += n2
+	return i, nil
+}
+
+func (m *ReadBatch) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalTo(dAtA)
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ReadBatch) MarshalTo(dAtA []byte) (int, error) {
+	var i int
+	_ = i
+	var l int
+	_ = l
+	if len(m.Entries) > 0 {
+		for _, msg := range m.Entries {
+			dAtA[i] = 0xa
+			i++
+			i = encodeVarintCachingIndexClient(dAtA, i, uint64(msg.Size()))
+			n, err := msg.MarshalTo(dAtA[i:])
+			if err != nil {
+				return 0, err
+			}
+			i += n
+		}
+	}
+	if len(m.Key) > 0 {
+		dAtA[i] = 0x12
+		i++
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(len(m.Key)))
+		i += copy(dAtA[i:], m.Key)
+	}
+	if m.Expiry != 0 {
+		dAtA[i] = 0x18
+		i++
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Expiry))
+	}
+	if m.Cardinality != 0 {
+		dAtA[i] = 0x20
+		i++
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Cardinality))
+	}
+	return i, nil
+}
+
+func encodeVarintCachingIndexClient(dAtA []byte, offset int, v uint64) int {
+	for v >= 1<<7 {
+		dAtA[offset] = uint8(v&0x7f | 0x80)
+		v >>= 7
+		offset++
+	}
+	dAtA[offset] = uint8(v)
+	return offset + 1
+}
+func (m *Entry) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = m.Column.Size()
+	n += 1 + l + sovCachingIndexClient(uint64(l))
+	l = m.Value.Size()
+	n += 1 + l + sovCachingIndexClient(uint64(l))
+	return n
+}
+
+func (m *ReadBatch) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Entries) > 0 {
+		for _, e := range m.Entries {
+			l = e.Size()
+			n += 1 + l + sovCachingIndexClient(uint64(l))
+		}
+	}
+	l = len(m.Key)
+	if l > 0 {
+		n += 1 + l + sovCachingIndexClient(uint64(l))
+	}
+	if m.Expiry != 0 {
+		n += 1 + sovCachingIndexClient(uint64(m.Expiry))
+	}
+	if m.Cardinality != 0 {
+		n += 1 + sovCachingIndexClient(uint64(m.Cardinality))
+	}
+	return n
+}
+
+func sovCachingIndexClient(x uint64) (n int) {
+	for {
+		n++
+		x >>= 7
+		if x == 0 {
+			break
+		}
+	}
+	return n
+}
+func sozCachingIndexClient(x uint64) (n int) {
+	return sovCachingIndexClient(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (this *Entry) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Entry{`,
+		`Column:` + fmt.Sprintf("%v", this.Column) + `,`,
+		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ReadBatch) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForEntries := "[]Entry{"
+	for _, f := range this.Entries {
+		repeatedStringForEntries += strings.Replace(strings.Replace(f.String(), "Entry", "Entry", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForEntries += "}"
+	s := strings.Join([]string{`&ReadBatch{`,
+		`Entries:` + repeatedStringForEntries + `,`,
+		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
+		`Expiry:` + fmt.Sprintf("%v", this.Expiry) + `,`,
+		`Cardinality:` + fmt.Sprintf("%v", this.Cardinality) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func valueToStringCachingIndexClient(v interface{}) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("*%v", pv)
+}
+func (m *Entry) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowCachingIndexClient
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Entry: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Column", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if err := m.Column.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if err := m.Value.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipCachingIndexClient(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ReadBatch) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowCachingIndexClient
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ReadBatch: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ReadBatch: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Entries", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Entries = append(m.Entries, Entry{})
+			if err := m.Entries[len(m.Entries)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Key = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expiry", wireType)
+			}
+			m.Expiry = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Expiry |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Cardinality", wireType)
+			}
+			m.Cardinality = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.Cardinality |= int32(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := skipCachingIndexClient(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthCachingIndexClient
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func skipCachingIndexClient(dAtA []byte) (n int, err error) {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return 0, ErrIntOverflowCachingIndexClient
+			}
+			if iNdEx >= l {
+				return 0, io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		wireType := int(wire & 0x7)
+		switch wireType {
+		case 0:
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				iNdEx++
+				if dAtA[iNdEx-1] < 0x80 {
+					break
+				}
+			}
+			return iNdEx, nil
+		case 1:
+			iNdEx += 8
+			return iNdEx, nil
+		case 2:
+			var length int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowCachingIndexClient
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				length |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if length < 0 {
+				return 0, ErrInvalidLengthCachingIndexClient
+			}
+			iNdEx += length
+			if iNdEx < 0 {
+				return 0, ErrInvalidLengthCachingIndexClient
+			}
+			return iNdEx, nil
+		case 3:
+			for {
+				var innerWire uint64
+				var start int = iNdEx
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return 0, ErrIntOverflowCachingIndexClient
+					}
+					if iNdEx >= l {
+						return 0, io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					innerWire |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				innerWireType := int(innerWire & 0x7)
+				if innerWireType == 4 {
+					break
+				}
+				next, err := skipCachingIndexClient(dAtA[start:])
+				if err != nil {
+					return 0, err
+				}
+				iNdEx = start + next
+				if iNdEx < 0 {
+					return 0, ErrInvalidLengthCachingIndexClient
+				}
+			}
+			return iNdEx, nil
+		case 4:
+			return iNdEx, nil
+		case 5:
+			iNdEx += 4
+			return iNdEx, nil
+		default:
+			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+		}
+	}
+	panic("unreachable")
+}
+
+var (
+	ErrInvalidLengthCachingIndexClient = fmt.Errorf("proto: negative length found during unmarshaling")
+	ErrIntOverflowCachingIndexClient   = fmt.Errorf("proto: integer overflow")
+)

From 20222e1d057ea2f30c91b2949dc05b76776aced2 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Sat, 18 May 2019 17:53:55 +0530
Subject: [PATCH 340/660] Expose table manager config for yaml

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 table_manager.go | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 1da93c51b914f..5404978be5ce7 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -47,39 +47,39 @@ func init() {
 // TableManagerConfig holds config for a TableManager
 type TableManagerConfig struct {
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
-	ThroughputUpdatesDisabled bool
+	ThroughputUpdatesDisabled bool `yaml:"throughput_updates_disabled"`
 
 	// Master 'on-switch' for table retention deletions
-	RetentionDeletesEnabled bool
+	RetentionDeletesEnabled bool `yaml:"retention_deletes_enabled"`
 
 	// How far back tables will be kept before they are deleted
-	RetentionPeriod time.Duration
+	RetentionPeriod time.Duration `yaml:"retention_period"`
 
 	// Period with which the table manager will poll for tables.
-	DynamoDBPollInterval time.Duration
+	DynamoDBPollInterval time.Duration `yaml:"dynamodb_poll_interval"`
 
 	// duration a table will be created before it is needed.
-	CreationGracePeriod time.Duration
+	CreationGracePeriod time.Duration `yaml:"creation_grace_period"`
 
-	IndexTables ProvisionConfig
-	ChunkTables ProvisionConfig
+	IndexTables ProvisionConfig `yaml:"index_tables_provisioning"`
+	ChunkTables ProvisionConfig `yaml:"chunk_tables_provisioning"`
 }
 
 // ProvisionConfig holds config for provisioning capacity (on DynamoDB)
 type ProvisionConfig struct {
-	ProvisionedThroughputOnDemandMode bool
-	ProvisionedWriteThroughput        int64
-	ProvisionedReadThroughput         int64
-	InactiveThroughputOnDemandMode    bool
-	InactiveWriteThroughput           int64
-	InactiveReadThroughput            int64
-
-	WriteScale              AutoScalingConfig
-	InactiveWriteScale      AutoScalingConfig
-	InactiveWriteScaleLastN int64
-	ReadScale               AutoScalingConfig
-	InactiveReadScale       AutoScalingConfig
-	InactiveReadScaleLastN  int64
+	ProvisionedThroughputOnDemandMode bool  `yaml:"provisioned_throughput_on_demand_mode"`
+	ProvisionedWriteThroughput        int64 `yaml:"provisioned_write_throughput"`
+	ProvisionedReadThroughput         int64 `yaml:"provisioned_read_throughput"`
+	InactiveThroughputOnDemandMode    bool  `yaml:"inactive_throughput_on_demand_mode"`
+	InactiveWriteThroughput           int64 `yaml:"inactive_write_throughput"`
+	InactiveReadThroughput            int64 `yaml:"inactive_read_throughput"`
+
+	WriteScale              AutoScalingConfig `yaml:"write_scale"`
+	InactiveWriteScale      AutoScalingConfig `yaml:"inactive_write_scale"`
+	InactiveWriteScaleLastN int64             `yaml:"inactive_write_scale_lastn"`
+	ReadScale               AutoScalingConfig `yaml:"read_scale"`
+	InactiveReadScale       AutoScalingConfig `yaml:"inactive_read_scale"`
+	InactiveReadScaleLastN  int64             `yaml:"inactive_read_scale_lastn"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.

From 0a5b0a74a8ca5699fa0ce0c80bc4090b2c9321c9 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 20 May 2019 18:16:07 +0530
Subject: [PATCH 341/660] Limiting chunks query start time with config Added
 relevant test

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk_store.go      | 25 +++++++++++++++------
 chunk_store_test.go | 53 +++++++++++++++++++++++++++++++++++++++++++++
 series_store.go     |  2 +-
 3 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 31ba240bdf2a9..7ae86fc3de429 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -61,6 +61,9 @@ type StoreConfig struct {
 
 	MinChunkAge           time.Duration `yaml:"min_chunk_age,omitempty"`
 	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than,omitempty"`
+
+	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
+	MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -70,6 +73,7 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
+	f.DurationVar(&cfg.MaxLookBackPeriod, "store.max-look-back-period", 0, "Limit how long back data can be queried")
 
 	// Deprecated.
 	flagext.DeprecatedFlag(f, "store.cardinality-cache-size", "DEPRECATED. Use store.index-cache-read.enable-fifocache and store.index-cache-read.fifocache.size instead.")
@@ -173,7 +177,7 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
 	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, &from, &through, allMatchers)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
@@ -199,7 +203,7 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, from, through mode
 		return nil, err
 	}
 
-	shortcut, err := c.validateQueryTimeRange(ctx, from, &through)
+	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
@@ -230,11 +234,11 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, from, through mode
 	return result, nil
 }
 
-func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, through *model.Time) (bool, error) {
+func (c *store) validateQueryTimeRange(ctx context.Context, from *model.Time, through *model.Time) (bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
 
-	if *through < from {
+	if *through < *from {
 		return false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
 	}
 
@@ -244,8 +248,8 @@ func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, thr
 	}
 
 	maxQueryLength := c.limits.MaxQueryLength(userID)
-	if maxQueryLength > 0 && (*through).Sub(from) > maxQueryLength {
-		return false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(from), maxQueryLength)
+	if maxQueryLength > 0 && (*through).Sub(*from) > maxQueryLength {
+		return false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(*from), maxQueryLength)
 	}
 
 	now := model.Now()
@@ -261,6 +265,13 @@ func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, thr
 		return true, nil
 	}
 
+	if c.cfg.MaxLookBackPeriod != 0 {
+		oldestStartTime := model.Now().Add(-c.cfg.MaxLookBackPeriod)
+		if oldestStartTime.After(*from) {
+			*from = oldestStartTime
+		}
+	}
+
 	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
 		level.Error(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
@@ -270,7 +281,7 @@ func (c *store) validateQueryTimeRange(ctx context.Context, from model.Time, thr
 	return false, nil
 }
 
-func (c *store) validateQuery(ctx context.Context, from model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
+func (c *store) validateQuery(ctx context.Context, from *model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
diff --git a/chunk_store_test.go b/chunk_store_test.go
index c20648b29a514..bd2ea91839da7 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -689,3 +689,56 @@ func TestChunkStoreError(t *testing.T) {
 		}
 	}
 }
+
+func TestStoreMaxLookBack(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	metric := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+	}
+	storeMaker := stores[0]
+	storeCfg := storeMaker.configFn()
+
+	// Creating 2 stores, One with no look back limit and another with 30 Mins look back limit
+	storeWithoutLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
+	defer storeWithoutLookBackLimit.Stop()
+
+	storeCfg.MaxLookBackPeriod = 30 * time.Minute
+	storeWithLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
+	defer storeWithLookBackLimit.Stop()
+
+	now := model.Now()
+
+	// Populating both stores with chunks
+	fooChunk1 := dummyChunkFor(now, metric)
+	err := fooChunk1.Encode()
+	require.NoError(t, err)
+	err = storeWithoutLookBackLimit.Put(ctx, []Chunk{fooChunk1})
+	require.NoError(t, err)
+	err = storeWithLookBackLimit.Put(ctx, []Chunk{fooChunk1})
+	require.NoError(t, err)
+
+	fooChunk2 := dummyChunkFor(now.Add(-time.Hour*1), metric)
+	err = fooChunk2.Encode()
+	require.NoError(t, err)
+	err = storeWithoutLookBackLimit.Put(ctx, []Chunk{fooChunk2})
+	require.NoError(t, err)
+	err = storeWithLookBackLimit.Put(ctx, []Chunk{fooChunk2})
+	require.NoError(t, err)
+
+	matchers, err := promql.ParseMetricSelector(`foo{bar="baz"}`)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Both the chunks should be returned
+	chunks, err := storeWithoutLookBackLimit.Get(ctx, now.Add(-time.Hour), now, matchers...)
+	require.NoError(t, err)
+	require.Equal(t, 2, len(chunks))
+
+	// Single chunk should be returned with newer timestamp
+	chunks, err = storeWithLookBackLimit.Get(ctx, now.Add(-time.Hour), now, matchers...)
+	require.NoError(t, err)
+	require.Equal(t, 1, len(chunks))
+	chunks[0].Through.Equal(now)
+}
diff --git a/series_store.go b/series_store.go
index 2e0d0024decd1..bfd9288d82e76 100644
--- a/series_store.go
+++ b/series_store.go
@@ -153,7 +153,7 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time
 	}
 
 	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, from, &through, allMatchers)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, &from, &through, allMatchers)
 	if err != nil {
 		return nil, nil, err
 	} else if shortcut {

From 726abe5823843bcd70daabba3b6d663f141a5c47 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 20 May 2019 16:27:48 +0200
Subject: [PATCH 342/660] Allow Cassandra config to be passed as YAML.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cassandra/fixtures.go       |  8 ++--
 cassandra/storage_client.go | 82 ++++++++++++++++++-------------------
 cassandra/table_client.go   |  2 +-
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index ef13263e6c9c7..20194fbb927cd 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -41,10 +41,10 @@ func Fixtures() ([]testutils.Fixture, error) {
 	}
 
 	cfg := Config{
-		addresses:         addresses,
-		keyspace:          "test",
-		consistency:       "QUORUM",
-		replicationFactor: 1,
+		Addresses:         addresses,
+		Keyspace:          "test",
+		Consistency:       "QUORUM",
+		ReplicationFactor: 1,
 	}
 
 	// Get a SchemaConfig with the defaults.
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index a3b4f60691084..5d28a11b0bb9a 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -20,40 +20,40 @@ const (
 
 // Config for a StorageClient
 type Config struct {
-	addresses                string
-	port                     int
-	keyspace                 string
-	consistency              string
-	replicationFactor        int
-	disableInitialHostLookup bool
-	ssl                      bool
-	hostVerification         bool
-	caPath                   string
-	auth                     bool
-	username                 string
-	password                 string
-	timeout                  time.Duration
+	Addresses                string        `yaml:"addresses,omitempty"`
+	Port                     int           `yaml:"port,omitempty"`
+	Keyspace                 string        `yaml:"keyspace,omitempty"`
+	Consistency              string        `yaml:"consistency,omitempty"`
+	ReplicationFactor        int           `yaml:"replication_factor,omitempty"`
+	DisableInitialHostLookup bool          `yaml:"disable_initial_host_lookup,omitempty"`
+	SSL                      bool          `yaml:"SSL,omitempty"`
+	HostVerification         bool          `yaml:"host_verification,omitempty"`
+	CAPath                   string        `yaml:"CA_path,omitempty"`
+	Auth                     bool          `yaml:"auth,omitempty"`
+	Username                 string        `yaml:"username,omitempty"`
+	Password                 string        `yaml:"password,omitempty"`
+	Timeout                  time.Duration `yaml:"timeout,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.addresses, "cassandra.addresses", "", "Comma-separated hostnames or ips of Cassandra instances.")
-	f.IntVar(&cfg.port, "cassandra.port", 9042, "Port that Cassandra is running on")
-	f.StringVar(&cfg.keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
-	f.StringVar(&cfg.consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
-	f.IntVar(&cfg.replicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
-	f.BoolVar(&cfg.disableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
-	f.BoolVar(&cfg.ssl, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
-	f.BoolVar(&cfg.hostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
-	f.StringVar(&cfg.caPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
-	f.BoolVar(&cfg.auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
-	f.StringVar(&cfg.username, "cassandra.username", "", "Username to use when connecting to cassandra.")
-	f.StringVar(&cfg.password, "cassandra.password", "", "Password to use when connecting to cassandra.")
-	f.DurationVar(&cfg.timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
+	f.StringVar(&cfg.Addresses, "cassandra.addresses", "", "Comma-separated hostnames or IPs of Cassandra instances.")
+	f.IntVar(&cfg.Port, "cassandra.port", 9042, "Port that Cassandra is running on")
+	f.StringVar(&cfg.Keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
+	f.StringVar(&cfg.Consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
+	f.IntVar(&cfg.ReplicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
+	f.BoolVar(&cfg.DisableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
+	f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
+	f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
+	f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
+	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
+	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
+	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
+	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
 }
 
 func (cfg *Config) session() (*gocql.Session, error) {
-	consistency, err := gocql.ParseConsistencyWrapper(cfg.consistency)
+	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -62,13 +62,13 @@ func (cfg *Config) session() (*gocql.Session, error) {
 		return nil, errors.WithStack(err)
 	}
 
-	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
-	cluster.Port = cfg.port
-	cluster.Keyspace = cfg.keyspace
+	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
+	cluster.Port = cfg.Port
+	cluster.Keyspace = cfg.Keyspace
 	cluster.Consistency = consistency
 	cluster.BatchObserver = observer{}
 	cluster.QueryObserver = observer{}
-	cluster.Timeout = cfg.timeout
+	cluster.Timeout = cfg.Timeout
 	cfg.setClusterConfig(cluster)
 
 	return cluster.CreateSession()
@@ -76,26 +76,26 @@ func (cfg *Config) session() (*gocql.Session, error) {
 
 // apply config settings to a cassandra ClusterConfig
 func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
-	cluster.DisableInitialHostLookup = cfg.disableInitialHostLookup
+	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
 
-	if cfg.ssl {
+	if cfg.SSL {
 		cluster.SslOpts = &gocql.SslOptions{
-			CaPath:                 cfg.caPath,
-			EnableHostVerification: cfg.hostVerification,
+			CaPath:                 cfg.CAPath,
+			EnableHostVerification: cfg.HostVerification,
 		}
 	}
-	if cfg.auth {
+	if cfg.Auth {
 		cluster.Authenticator = gocql.PasswordAuthenticator{
-			Username: cfg.username,
-			Password: cfg.password,
+			Username: cfg.Username,
+			Password: cfg.Password,
 		}
 	}
 }
 
 // createKeyspace will create the desired keyspace if it doesn't exist.
 func (cfg *Config) createKeyspace() error {
-	cluster := gocql.NewCluster(strings.Split(cfg.addresses, ",")...)
-	cluster.Port = cfg.port
+	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
+	cluster.Port = cfg.Port
 	cluster.Keyspace = "system"
 	cluster.Timeout = 20 * time.Second
 
@@ -113,7 +113,7 @@ func (cfg *Config) createKeyspace() error {
 			 'class' : 'SimpleStrategy',
 			 'replication_factor' : %d
 		 }`,
-		cfg.keyspace, cfg.replicationFactor)).Exec()
+		cfg.Keyspace, cfg.ReplicationFactor)).Exec()
 	return errors.WithStack(err)
 }
 
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index e4335632ae45a..48df881d557fb 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -28,7 +28,7 @@ func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error)
 }
 
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
-	md, err := c.session.KeyspaceMetadata(c.cfg.keyspace)
+	md, err := c.session.KeyspaceMetadata(c.cfg.Keyspace)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}

From 8435fbbeaea230fc1e62a20575933a3f6676972c Mon Sep 17 00:00:00 2001
From: mizeng <mizeng@ebay.com>
Date: Tue, 28 May 2019 16:51:46 +0800
Subject: [PATCH 343/660] Cassandra Client miss ConnectTimeout config

Signed-off-by: mizeng <mizeng@ebaysf.com>
---
 cassandra/storage_client.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 5d28a11b0bb9a..4a87f49022b00 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -33,6 +33,7 @@ type Config struct {
 	Username                 string        `yaml:"username,omitempty"`
 	Password                 string        `yaml:"password,omitempty"`
 	Timeout                  time.Duration `yaml:"timeout,omitempty"`
+	ConnectTimeout           time.Duration `yaml:"connect_timeout,omitempty"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -50,6 +51,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
+	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 600*time.Millisecond, "Initial connection timeout, used during initial dial to server.")
 }
 
 func (cfg *Config) session() (*gocql.Session, error) {
@@ -69,6 +71,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.BatchObserver = observer{}
 	cluster.QueryObserver = observer{}
 	cluster.Timeout = cfg.Timeout
+	cluster.ConnectTimeout = cfg.ConnectTimeout
 	cfg.setClusterConfig(cluster)
 
 	return cluster.CreateSession()
@@ -98,6 +101,7 @@ func (cfg *Config) createKeyspace() error {
 	cluster.Port = cfg.Port
 	cluster.Keyspace = "system"
 	cluster.Timeout = 20 * time.Second
+	cluster.ConnectTimeout = 20 * time.Second
 
 	cfg.setClusterConfig(cluster)
 

From fca86573df57bb3017e3a4e9360c44d7e8a679ea Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 27 Apr 2019 16:16:49 +0000
Subject: [PATCH 344/660] Use a rate-limiter to back off on DynamoDB throttling

By using a rate-limiter we can slow down in proportion to the amount
we are being throttled - DynamoDB may only reject one or two items out
of a batch but previously we were not slowing down until the entire
batch was rejected.

DynamoDB internal metrics refer to "Throttled events" being part of a
batch and "Throttled requests" meaning the entire batch was rejected.

ThrottleLimit effectively becomes the rate at which "Throttled events"
will be seen when under load, per process.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 14 +++++++++++---
 aws/fixtures.go                |  3 +++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 3356eaa9ccbcf..fd2b68d2e3feb 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
+	"golang.org/x/time/rate"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
@@ -93,6 +94,7 @@ func init() {
 type DynamoDBConfig struct {
 	DynamoDB               flagext.URLValue
 	APILimit               float64
+	ThrottleLimit          float64
 	ApplicationAutoScaling flagext.URLValue
 	Metrics                MetricsAutoScalingConfig
 	ChunkGangSize          int
@@ -105,6 +107,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.DynamoDB, "dynamodb.url", "DynamoDB endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
+	f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.")
 	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
 	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
 	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
@@ -133,6 +136,8 @@ type dynamoDBStorageClient struct {
 	schemaCfg chunk.SchemaConfig
 
 	DynamoDB dynamodbiface.DynamoDBAPI
+	// These rate-limiters let us slow down when DynamoDB signals provision limits.
+	writeThrottle *rate.Limiter
 
 	// These functions exists for mocking, so we don't have to write a whole load
 	// of boilerplate.
@@ -159,9 +164,10 @@ func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig)
 	}
 
 	client := &dynamoDBStorageClient{
-		cfg:       cfg,
-		schemaCfg: schemaCfg,
-		DynamoDB:  dynamoDB,
+		cfg:           cfg,
+		schemaCfg:     schemaCfg,
+		DynamoDB:      dynamoDB,
+		writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize),
 	}
 	client.queryRequestFn = client.queryRequest
 	client.batchGetItemRequestFn = client.batchGetItemRequest
@@ -236,6 +242,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				logRetry(ctx, requests)
 				unprocessed.TakeReqs(requests, -1)
+				a.writeThrottle.WaitN(ctx, len(requests))
 				backoff.Wait()
 				continue
 			} else if ok && awsErr.Code() == validationException {
@@ -258,6 +265,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 		// If there are unprocessed items, retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
 			logRetry(ctx, dynamoDBWriteBatch(unprocessedItems))
+			a.writeThrottle.WaitN(ctx, len(unprocessedItems))
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}
 
diff --git a/aws/fixtures.go b/aws/fixtures.go
index d05cd738efae3..f67469f2bf6fd 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -4,6 +4,8 @@ import (
 	"fmt"
 	"time"
 
+	"golang.org/x/time/rate"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util"
@@ -75,6 +77,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 					},
 				},
 				DynamoDB:                dynamoDB,
+				writeThrottle:           rate.NewLimiter(10, dynamoDBMaxWriteBatchSize),
 				queryRequestFn:          dynamoDB.queryRequest,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,

From 864d4ee8e6b2e1d1196c70a1fbaaed269d3ea22a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 27 Apr 2019 16:47:15 +0000
Subject: [PATCH 345/660] Add a counter for DynamoDB throttled events

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index fd2b68d2e3feb..f76eeb3a72967 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -62,6 +62,11 @@ var (
 		Name:      "dynamo_consumed_capacity_total",
 		Help:      "The capacity units consumed by operation.",
 	}, []string{"operation", tableNameLabel})
+	dynamoThrottled = prometheus.NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_throttled_total",
+		Help:      "The total number of throttled events.",
+	}, []string{"operation", tableNameLabel})
 	dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_failures_total",
@@ -85,6 +90,7 @@ var (
 func init() {
 	dynamoRequestDuration.Register()
 	prometheus.MustRegister(dynamoConsumedCapacity)
+	prometheus.MustRegister(dynamoThrottled)
 	prometheus.MustRegister(dynamoFailures)
 	prometheus.MustRegister(dynamoQueryPagesCount)
 	prometheus.MustRegister(dynamoDroppedRequests)
@@ -184,9 +190,10 @@ func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
-func logRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
+func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
 	userID, _ := user.ExtractOrgID(ctx)
 	for table, reqs := range unprocessed {
+		dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs)))
 		for _, req := range reqs {
 			item := req.PutRequest.Item
 			var hash, rnge string
@@ -240,7 +247,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
-				logRetry(ctx, requests)
+				logWriteRetry(ctx, requests)
 				unprocessed.TakeReqs(requests, -1)
 				a.writeThrottle.WaitN(ctx, len(requests))
 				backoff.Wait()
@@ -264,7 +271,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 
 		// If there are unprocessed items, retry those items.
 		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
-			logRetry(ctx, dynamoDBWriteBatch(unprocessedItems))
+			logWriteRetry(ctx, dynamoDBWriteBatch(unprocessedItems))
 			a.writeThrottle.WaitN(ctx, len(unprocessedItems))
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}

From ac6f1833c97ba8f02bbaae312667639b7e4a3887 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 27 Apr 2019 17:20:09 +0000
Subject: [PATCH 346/660] Use throttling metric in DynamoDB write autoscaling

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/metrics_autoscaling.go      | 44 ++++++++++++++++-----------------
 aws/metrics_autoscaling_test.go | 30 +++++++++++-----------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 153bf1a344990..0f03b70edf701 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -18,18 +18,18 @@ import (
 )
 
 const (
-	cachePromDataFor       = 30 * time.Second
-	queueObservationPeriod = 2 * time.Minute
-	targetScaledown        = 0.1 // consider scaling down if queue smaller than this times target
-	targetMax              = 10  // always scale up if queue bigger than this times target
-	errorFractionScaledown = 0.1
-	minUsageForScaledown   = 100 // only scale down if usage is > this DynamoDB units/sec
+	cachePromDataFor          = 30 * time.Second
+	queueObservationPeriod    = 2 * time.Minute
+	targetScaledown           = 0.1 // consider scaling down if queue smaller than this times target
+	targetMax                 = 10  // always scale up if queue bigger than this times target
+	throttleFractionScaledown = 0.1
+	minUsageForScaledown      = 100 // only scale down if usage is > this DynamoDB units/sec
 
 	// fetch Ingester queue length
 	// average the queue length over 2 minutes to avoid aliasing with the 1-minute flush period
 	defaultQueueLenQuery = `sum(avg_over_time(cortex_ingester_flush_queue_length{job="cortex/ingester"}[2m]))`
-	// fetch write error rate per DynamoDB table
-	defaultErrorRateQuery = `sum(rate(cortex_dynamo_failures_total{error="ProvisionedThroughputExceededException",operation=~".*Write.*"}[1m])) by (table) > 0`
+	// fetch write throttle rate per DynamoDB table
+	defaultThrottleRateQuery = `sum(rate(cortex_dynamo_throttled_total{operation="DynamoDB.BatchWriteItem"}[1m])) by (table) > 0`
 	// fetch write capacity usage per DynamoDB table
 	// use the rate over 15 minutes so we take a broad average
 	defaultUsageQuery = `sum(rate(cortex_dynamo_consumed_capacity_total{operation="DynamoDB.BatchWriteItem"}[15m])) by (table) > 0`
@@ -45,7 +45,7 @@ type MetricsAutoScalingConfig struct {
 	TargetQueueLen   int64   // Queue length above which we will scale up capacity
 	ScaleUpFactor    float64 // Scale up capacity by this multiple
 	QueueLengthQuery string  // Promql query to fetch ingester queue length
-	ErrorRateQuery   string  // Promql query to fetch error rates per table
+	ThrottleQuery    string  // Promql query to fetch throttle rate per table
 	UsageQuery       string  // Promql query to fetch write capacity usage per table
 	ReadUsageQuery   string  // Promql query to fetch read usage per table
 	ReadErrorQuery   string  // Promql query to fetch read errors per table
@@ -57,7 +57,7 @@ func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Int64Var(&cfg.TargetQueueLen, "metrics.target-queue-length", 100000, "Queue length above which we will scale up capacity")
 	f.Float64Var(&cfg.ScaleUpFactor, "metrics.scale-up-factor", 1.3, "Scale up capacity by this multiple")
 	f.StringVar(&cfg.QueueLengthQuery, "metrics.queue-length-query", defaultQueueLenQuery, "query to fetch ingester queue length")
-	f.StringVar(&cfg.ErrorRateQuery, "metrics.error-rate-query", defaultErrorRateQuery, "query to fetch error rates per table")
+	f.StringVar(&cfg.ThrottleQuery, "metrics.write-throttle-query", defaultThrottleRateQuery, "query to fetch throttle rates per table")
 	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
 	f.StringVar(&cfg.ReadUsageQuery, "metrics.read-usage-query", defaultReadUsageQuery, "query to fetch read capacity usage per table")
 	f.StringVar(&cfg.ReadErrorQuery, "metrics.read-error-query", defaultReadErrorQuery, "query to fetch read errors per table")
@@ -70,7 +70,7 @@ type metricsData struct {
 	tableLastUpdated     map[string]time.Time
 	tableReadLastUpdated map[string]time.Time
 	queueLengths         []float64
-	errorRates           map[string]float64
+	throttleRates        map[string]float64
 	usageRates           map[string]float64
 	usageReadRates       map[string]float64
 	readErrorRates       map[string]float64
@@ -107,15 +107,15 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 		// default if no action is taken is to use the currently provisioned setting
 		expected.ProvisionedWrite = current.ProvisionedWrite
 
-		errorRate := m.errorRates[expected.Name]
+		throttleRate := m.throttleRates[expected.Name]
 		usageRate := m.usageRates[expected.Name]
 
-		level.Info(util.Logger).Log("msg", "checking write metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "errorRate", errorRate, "usageRate", usageRate)
+		level.Info(util.Logger).Log("msg", "checking write metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "throttleRate", throttleRate, "usageRate", usageRate)
 
 		switch {
-		case errorRate < errorFractionScaledown*float64(current.ProvisionedWrite) &&
+		case throttleRate < throttleFractionScaledown*float64(current.ProvisionedWrite) &&
 			m.queueLengths[2] < float64(m.cfg.TargetQueueLen)*targetScaledown:
-			// No big queue, low errors -> scale down
+			// No big queue, low throttling -> scale down
 			expected.ProvisionedWrite = scaleDown(current.Name,
 				current.ProvisionedWrite,
 				expected.WriteScale.MinCapacity,
@@ -125,7 +125,7 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 				"metrics scale-down",
 				"write",
 				m.usageRates)
-		case errorRate == 0 &&
+		case throttleRate == 0 &&
 			m.queueLengths[2] < m.queueLengths[1] && m.queueLengths[1] < m.queueLengths[0]:
 			// zero errors and falling queue -> scale down to current usage
 			expected.ProvisionedWrite = scaleDown(current.Name,
@@ -137,8 +137,8 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 				"zero errors scale-down",
 				"write",
 				m.usageRates)
-		case errorRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
-			// Too big queue, some errors -> scale up
+		case throttleRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
+			// Too big queue, some throttling -> scale up
 			expected.ProvisionedWrite = scaleUp(current.Name,
 				current.ProvisionedWrite,
 				expected.WriteScale.MaxCapacity,
@@ -147,10 +147,10 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 				expected.WriteScale.OutCooldown,
 				"metrics max queue scale-up",
 				"write")
-		case errorRate > 0 &&
+		case throttleRate > 0 &&
 			m.queueLengths[2] > float64(m.cfg.TargetQueueLen) &&
 			m.queueLengths[2] > m.queueLengths[1] && m.queueLengths[1] > m.queueLengths[0]:
-			// Growing queue, some errors -> scale up
+			// Growing queue, some throttling -> scale up
 			expected.ProvisionedWrite = scaleUp(current.Name,
 				current.ProvisionedWrite,
 				expected.WriteScale.MaxCapacity,
@@ -298,11 +298,11 @@ func (m *metricsData) update(ctx context.Context) error {
 		m.queueLengths[i] = float64(v.Value)
 	}
 
-	deMatrix, err := promQuery(ctx, m.promAPI, m.cfg.ErrorRateQuery, 0, time.Second)
+	deMatrix, err := promQuery(ctx, m.promAPI, m.cfg.ThrottleQuery, 0, time.Second)
 	if err != nil {
 		return err
 	}
-	if m.errorRates, err = extractRates(deMatrix); err != nil {
+	if m.throttleRates, err = extractRates(deMatrix); err != nil {
 		return err
 	}
 
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index ae4d899c00fc3..a0ee6fe581bdd 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -73,7 +73,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	mockProm.SetResponseForWrites(0, 100000, 100000, []int{0, 0}, []int{100, 20})
-	test(t, client, tableManager, "Queues but no errors",
+	test(t, client, tableManager, "Queues but no throttling",
 		startTime.Add(time.Minute*10),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, write, read, write)...), // - remain flat
@@ -94,14 +94,14 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	mockProm.SetResponseForWrites(0, 5000000, 5000000, []int{1, 0}, []int{100, 20})
-	test(t, client, tableManager, "Large queues small errors",
+	test(t, client, tableManager, "Large queues small throtttling",
 		startTime.Add(time.Minute*40),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 250, read, write)...), // - scale up index table
 	)
 
 	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{120, 40})
-	test(t, client, tableManager, "No queues no errors",
+	test(t, client, tableManager, "No queues no throttling",
 		startTime.Add(time.Minute*100),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 150, read, 50)...), // - scale down both tables
@@ -115,7 +115,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 0}, []int{90, 10})
-	test(t, client, tableManager, "No queues no errors",
+	test(t, client, tableManager, "No queues no throttling",
 		startTime.Add(time.Minute*200),
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, read, 112, read, 20)...), // - scale down both again
@@ -137,7 +137,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			staticTable(1, read, write, read, write)...),
 	)
 
-	// No errors on last week's index table, still some on chunk table
+	// No throttling on last week's index table, still some on chunk table
 	mockProm.SetResponseForWrites(0, 0, 0, []int{0, 30, 30, 30}, []int{10, 2, 100, 20})
 	test(t, client, tableManager, "Next week plus a bit",
 		startTime.Add(tablePeriod).Add(time.Minute*10),
@@ -146,7 +146,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			staticTable(1, read, write, read, write)...),
 	)
 
-	// No errors on last week's tables but some queueing
+	// No throttling on last week's tables but some queueing
 	mockProm.SetResponseForWrites(20000, 20000, 20000, []int{0, 0, 1, 1}, []int{0, 0, 100, 20})
 	test(t, client, tableManager, "Next week plus a bit",
 		startTime.Add(tablePeriod).Add(time.Minute*20),
@@ -156,7 +156,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	mockProm.SetResponseForWrites(120000, 130000, 140000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
-	test(t, client, tableManager, "next week, queues building, errors on index table",
+	test(t, client, tableManager, "next week, queues building, throttling on index table",
 		startTime.Add(tablePeriod).Add(time.Minute*30),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, inactiveRead, 12, inactiveRead, 20)...), // no scaling back
@@ -164,7 +164,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	)
 
 	mockProm.SetResponseForWrites(140000, 130000, 120000, []int{0, 0, 1, 0}, []int{0, 0, 100, 20})
-	test(t, client, tableManager, "next week, queues shrinking, errors on index table",
+	test(t, client, tableManager, "next week, queues shrinking, throttling on index table",
 		startTime.Add(tablePeriod).Add(time.Minute*40),
 		append(append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, inactiveRead, 5, inactiveRead, 5)...), // scale right back
@@ -286,7 +286,7 @@ type mockPrometheus struct {
 	rangeValues []model.Value
 }
 
-func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, errorRates ...[]int) {
+func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, throttleRates ...[]int) {
 	// Mock metrics from Prometheus
 	m.rangeValues = []model.Value{
 		// Queue lengths
@@ -298,10 +298,10 @@ func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, erro
 			}},
 		},
 	}
-	for _, rates := range errorRates {
-		errorMatrix := model.Matrix{}
+	for _, rates := range throttleRates {
+		throttleMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
-			errorMatrix = append(errorMatrix,
+			throttleMatrix = append(throttleMatrix,
 				&model.SampleStream{
 					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
 					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2])}},
@@ -311,10 +311,10 @@ func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, erro
 					Values: []model.SamplePair{{Timestamp: 30000, Value: model.SampleValue(rates[i*2+1])}},
 				})
 		}
-		m.rangeValues = append(m.rangeValues, errorMatrix)
+		m.rangeValues = append(m.rangeValues, throttleMatrix)
 	}
 	// stub response for usage queries (not used in write tests)
-	for _, rates := range errorRates {
+	for _, rates := range throttleRates {
 		readUsageMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
 
@@ -331,7 +331,7 @@ func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, erro
 		m.rangeValues = append(m.rangeValues, readUsageMatrix)
 	}
 	// stub response for usage error queries (not used in write tests)
-	for _, rates := range errorRates {
+	for _, rates := range throttleRates {
 		readErrorMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
 

From 02098a40976d9a86038c9a6738fe69be42f3dd2c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 15 May 2019 13:01:49 +0000
Subject: [PATCH 347/660] Deprecate but still obey -metrics.error-rate-query

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/metrics_autoscaling.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 0f03b70edf701..ec5acc119b2b8 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -49,6 +49,8 @@ type MetricsAutoScalingConfig struct {
 	UsageQuery       string  // Promql query to fetch write capacity usage per table
 	ReadUsageQuery   string  // Promql query to fetch read usage per table
 	ReadErrorQuery   string  // Promql query to fetch read errors per table
+
+	deprecatedErrorRateQuery string
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -61,6 +63,8 @@ func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
 	f.StringVar(&cfg.ReadUsageQuery, "metrics.read-usage-query", defaultReadUsageQuery, "query to fetch read capacity usage per table")
 	f.StringVar(&cfg.ReadErrorQuery, "metrics.read-error-query", defaultReadErrorQuery, "query to fetch read errors per table")
+
+	f.StringVar(&cfg.deprecatedErrorRateQuery, "metrics.error-rate-query", "", "DEPRECATED: use -metrics.write-throttle-query instead")
 }
 
 type metricsData struct {
@@ -77,6 +81,10 @@ type metricsData struct {
 }
 
 func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
+	if cfg.Metrics.deprecatedErrorRateQuery != "" {
+		level.Warn(util.Logger).Log("msg", "use of deprecated flag -metrics.error-rate-query")
+		cfg.Metrics.ThrottleQuery = cfg.Metrics.deprecatedErrorRateQuery
+	}
 	client, err := promApi.NewClient(promApi.Config{Address: cfg.Metrics.URL})
 	if err != nil {
 		return nil, err

From 15b2d94b7ecc675f5eee5689623cfb9f5bb92524 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 29 May 2019 16:48:58 +0000
Subject: [PATCH 348/660] Encode write-dedupe keys as they are created

This simplifies the code and avoids some allocations.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema.go       |  4 ++++
 series_store.go | 10 +---------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/schema.go b/schema.go
index 307f4c1d60303..c9a8018531e50 100644
--- a/schema.go
+++ b/schema.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"encoding/binary"
+	"encoding/hex"
 	"errors"
 	"fmt"
 	"strings"
@@ -133,6 +134,9 @@ func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string,
 		},
 			"-",
 		)
+		// This is just encoding to remove invalid characters so that we can put them in memcache.
+		// We're not hashing them as the length of the key is well within memcache bounds. tableName + userid + day + 32Byte(seriesID)
+		key = hex.EncodeToString([]byte(key))
 
 		result = append(result, key)
 	}
diff --git a/series_store.go b/series_store.go
index 695e2d24b580f..b75a3929bc78e 100644
--- a/series_store.go
+++ b/series_store.go
@@ -2,7 +2,6 @@ package chunk
 
 import (
 	"context"
-	"encoding/hex"
 	"fmt"
 	"net/http"
 
@@ -362,14 +361,7 @@ func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chun
 
 	keys := c.schema.GetLabelEntryCacheKeys(from, through, chunk.UserID, chunk.Metric)
 
-	cacheKeys := make([]string, 0, len(keys)) // Keys which translate to the strings stored in the cache.
-	for _, key := range keys {
-		// This is just encoding to remove invalid characters so that we can put them in memcache.
-		// We're not hashing them as the length of the key is well within memcache bounds. tableName + userid + day + 32Byte(seriesID)
-		cacheKeys = append(cacheKeys, hex.EncodeToString([]byte(key)))
-	}
-
-	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), cacheKeys)
+	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), keys)
 	if len(missing) != 0 {
 		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
 		if err != nil {

From 85c934723ca15c8659492ee3aa5ec6e6aca6d8d2 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 29 May 2019 16:50:54 +0000
Subject: [PATCH 349/660] Pass userID explicitly to lookup functions

Simpler, more efficient, less surprising.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 series_store.go | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/series_store.go b/series_store.go
index 695e2d24b580f..58a65bd00a3a9 100644
--- a/series_store.go
+++ b/series_store.go
@@ -124,14 +124,14 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	// Fetch the series IDs from the index, based on non-empty matchers from
 	// the query.
 	_, matchers = util.SplitFiltersAndMatchers(matchers)
-	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, metricName, matchers)
+	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, userID, metricName, matchers)
 	if err != nil {
 		return nil, err
 	}
 	level.Debug(log).Log("series-ids", len(seriesIDs))
 
 	// Lookup the series in the index to get the chunks.
-	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, seriesIDs)
+	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, userID, seriesIDs)
 	if err != nil {
 		level.Error(log).Log("msg", "lookupChunksBySeries", "err", err)
 		return nil, err
@@ -168,14 +168,14 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	return filteredChunks, nil
 }
 
-func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, metricName string, matchers []*labels.Matcher) ([]string, error) {
+func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, userID, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
 
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
 		indexLookupsPerQuery.Observe(1)
-		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, nil)
+		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, nil)
 		if err != nil {
 			preIntersectionPerQuery.Observe(float64(len(series)))
 			postIntersectionPerQuery.Observe(float64(len(series)))
@@ -189,7 +189,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	indexLookupsPerQuery.Observe(float64(len(matchers)))
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
-			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, metricName, matcher)
+			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, matcher)
 			if err != nil {
 				incomingErrors <- err
 				return
@@ -240,15 +240,11 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	return ids, nil
 }
 
-func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, metricName string, matcher *labels.Matcher) ([]string, error) {
+func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
 	defer log.Span.Finish()
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
+	var err error
 	var queries []IndexQuery
 	var labelName string
 	if matcher == nil {
@@ -284,14 +280,10 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	return ids, nil
 }
 
-func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, seriesIDs []string) ([]string, error) {
+func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, userID string, seriesIDs []string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupChunksBySeries")
 	defer log.Span.Finish()
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
 	level.Debug(log).Log("seriesIDs", len(seriesIDs))
 
 	queries := make([]IndexQuery, 0, len(seriesIDs))

From e6b006615303f9fe3ebbbcc9b2cd69803af695bf Mon Sep 17 00:00:00 2001
From: sh0rez <me@shorez.de>
Date: Wed, 29 May 2019 21:17:43 +0200
Subject: [PATCH 350/660] fix(chunk/cache): Remove diskcache

According to #1424, diskcache is not required anymore.
It is being removed, because it depends on UNIX specific features (unix.Mmap()),
making it impossible to compile Cortex and dependent projects on Windows.

Signed-off-by: sh0rez <me@shorez.de>
---
 cache/cache.go      |  14 ---
 cache/cache_test.go |  15 ---
 cache/diskcache.go  | 227 --------------------------------------------
 3 files changed, 256 deletions(-)
 delete mode 100644 cache/diskcache.go

diff --git a/cache/cache.go b/cache/cache.go
index 8dbe001be03c9..fb0146b15819d 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -15,7 +15,6 @@ type Cache interface {
 
 // Config for building Caches.
 type Config struct {
-	EnableDiskcache bool `yaml:"enable_diskcache,omitempty"`
 	EnableFifoCache bool `yaml:"enable_fifocache,omitempty"`
 
 	DefaultValidity time.Duration `yaml:"defaul_validity,omitempty"`
@@ -23,7 +22,6 @@ type Config struct {
 	Background     BackgroundConfig      `yaml:"background,omitempty"`
 	Memcache       MemcachedConfig       `yaml:"memcached,omitempty"`
 	MemcacheClient MemcachedClientConfig `yaml:"memcached_client,omitempty"`
-	Diskcache      DiskcacheConfig       `yaml:"diskcache,omitempty"`
 	Fifocache      FifoCacheConfig       `yaml:"fifocache,omitempty"`
 
 	// This is to name the cache metrics properly.
@@ -38,10 +36,8 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f
 	cfg.Background.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.Memcache.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.MemcacheClient.RegisterFlagsWithPrefix(prefix, description, f)
-	cfg.Diskcache.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.Fifocache.RegisterFlagsWithPrefix(prefix, description, f)
 
-	f.BoolVar(&cfg.EnableDiskcache, prefix+"cache.enable-diskcache", false, description+"Enable on-disk cache.")
 	f.BoolVar(&cfg.EnableFifoCache, prefix+"cache.enable-fifocache", false, description+"Enable in-memory cache.")
 	f.DurationVar(&cfg.DefaultValidity, prefix+"default-validity", 0, description+"The default validity of entries for caches unless overridden.")
 
@@ -65,16 +61,6 @@ func New(cfg Config) (Cache, error) {
 		caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
 	}
 
-	if cfg.EnableDiskcache {
-		cache, err := NewDiskcache(cfg.Diskcache)
-		if err != nil {
-			return nil, err
-		}
-
-		cacheName := cfg.Prefix + "diskcache"
-		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
-	}
-
 	if cfg.MemcacheClient.Host != "" {
 		if cfg.Memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
 			cfg.Memcache.Expiration = cfg.DefaultValidity
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 8274408c0f532..3feb22b1ca857 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -3,8 +3,6 @@ package cache_test
 import (
 	"context"
 	"math/rand"
-	"os"
-	"path"
 	"sort"
 	"strconv"
 	"testing"
@@ -156,19 +154,6 @@ func TestMemcache(t *testing.T) {
 	})
 }
 
-func TestDiskcache(t *testing.T) {
-	dirname := os.TempDir()
-	filename := path.Join(dirname, "diskcache")
-	defer os.RemoveAll(filename)
-
-	cache, err := cache.NewDiskcache(cache.DiskcacheConfig{
-		Path: filename,
-		Size: 100 * 1024 * 1024,
-	})
-	require.NoError(t, err)
-	testCache(t, cache)
-}
-
 func TestFifoCache(t *testing.T) {
 	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 1e3, Validity: 1 * time.Hour})
 	testCache(t, cache)
diff --git a/cache/diskcache.go b/cache/diskcache.go
deleted file mode 100644
index 28e3ac4f72d20..0000000000000
--- a/cache/diskcache.go
+++ /dev/null
@@ -1,227 +0,0 @@
-package cache
-
-import (
-	"context"
-	"encoding/binary"
-	"flag"
-	"fmt"
-	"hash/fnv"
-	"os"
-	"sync"
-
-	"github.com/go-kit/kit/log/level"
-	opentracing "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
-	"github.com/pkg/errors"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/prometheus/tsdb/fileutil"
-	"golang.org/x/sys/unix"
-
-	"github.com/cortexproject/cortex/pkg/util"
-)
-
-var (
-	bucketsTotal = promauto.NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
-		Name:      "diskcache_buckets_total",
-		Help:      "Total count of buckets in the cache.",
-	})
-	bucketsInitialized = promauto.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "diskcache_added_new_total",
-		Help:      "total number of entries added to the cache",
-	})
-	collisionsTotal = promauto.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "diskcache_evicted_total",
-		Help:      "total number entries evicted from the cache",
-	})
-
-	globalCache *Diskcache
-	once        sync.Once
-)
-
-// TODO: in the future we could cuckoo hash or linear probe.
-
-const (
-	// Buckets contain chunks (1024) and their metadata (~100)
-	bucketSize = 2048
-
-	// Total number of mutexes shared by the disk cache index
-	numMutexes = 1000
-)
-
-// DiskcacheConfig for the Disk cache.
-type DiskcacheConfig struct {
-	Path string `yaml:"path,omitempty"`
-	Size int    `yaml:"size,omitempty"`
-}
-
-// RegisterFlags adds the flags required to config this to the given FlagSet
-func (cfg *DiskcacheConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.RegisterFlagsWithPrefix("", "", f)
-}
-
-// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
-func (cfg *DiskcacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	f.StringVar(&cfg.Path, prefix+"diskcache.path", "/var/run/chunks", description+"Path to file used to cache chunks.")
-	f.IntVar(&cfg.Size, prefix+"diskcache.size", 1024*1024*1024, description+"Size of file (bytes)")
-}
-
-// Diskcache is an on-disk chunk cache.
-type Diskcache struct {
-	f            *os.File
-	buckets      uint32
-	buf          []byte
-	entries      []string
-	entryMutexes []sync.RWMutex
-}
-
-// NewDiskcache creates a new on-disk cache.
-func NewDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
-	var err error
-	once.Do(func() {
-		globalCache, err = newDiskcache(cfg)
-	})
-	return globalCache, err
-}
-
-func newDiskcache(cfg DiskcacheConfig) (*Diskcache, error) {
-	f, err := os.OpenFile(cfg.Path, os.O_RDWR|os.O_CREATE, 0644)
-	if err != nil {
-		return nil, errors.Wrap(err, "open")
-	}
-
-	if err := fileutil.Preallocate(f, int64(cfg.Size), true); err != nil {
-		return nil, errors.Wrap(err, "preallocate")
-	}
-
-	info, err := f.Stat()
-	if err != nil {
-		return nil, errors.Wrap(err, "stat")
-	}
-
-	buf, err := unix.Mmap(int(f.Fd()), 0, int(info.Size()), unix.PROT_READ|unix.PROT_WRITE, unix.MAP_SHARED)
-	if err != nil {
-		f.Close()
-		return nil, err
-	}
-
-	buckets := uint32(len(buf) / bucketSize)
-	bucketsTotal.Set(float64(buckets)) // Report the number of buckets in the diskcache as a metric
-
-	return &Diskcache{
-		f:            f,
-		buckets:      buckets,
-		buf:          buf,
-		entries:      make([]string, buckets),
-		entryMutexes: make([]sync.RWMutex, numMutexes),
-	}, nil
-}
-
-// Stop closes the file.
-func (d *Diskcache) Stop() error {
-	if err := unix.Munmap(d.buf); err != nil {
-		return err
-	}
-	return d.f.Close()
-}
-
-// Fetch get chunks from the cache.
-func (d *Diskcache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	for _, key := range keys {
-		buf, ok := d.fetch(key)
-		if ok {
-			found = append(found, key)
-			bufs = append(bufs, buf)
-		} else {
-			missed = append(missed, key)
-		}
-	}
-	return
-}
-
-func (d *Diskcache) fetch(key string) ([]byte, bool) {
-	bucket := hash(key) % d.buckets
-	shard := bucket % numMutexes // Get the index of the mutex associated with this bucket
-	d.entryMutexes[shard].RLock()
-	defer d.entryMutexes[shard].RUnlock()
-	if d.entries[bucket] != key {
-		return nil, false
-	}
-
-	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
-	existingValue, _, ok := get(buf, 0)
-	if !ok {
-		return nil, false
-	}
-
-	result := make([]byte, len(existingValue), len(existingValue))
-	copy(result, existingValue)
-	return result, true
-}
-
-// Store puts a chunk into the cache.
-func (d *Diskcache) Store(ctx context.Context, keys []string, bufs [][]byte) {
-	for i := range keys {
-		d.store(ctx, keys[i], bufs[i])
-	}
-}
-
-func (d *Diskcache) store(ctx context.Context, key string, value []byte) {
-	bucket := hash(key) % d.buckets
-	shard := bucket % numMutexes // Get the index of the mutex associated with this bucket
-	d.entryMutexes[shard].Lock()
-	defer d.entryMutexes[shard].Unlock()
-	if d.entries[bucket] == key { // If chunk is already cached return nil
-		return
-	}
-
-	if d.entries[bucket] == "" {
-		bucketsInitialized.Inc()
-	} else {
-		collisionsTotal.Inc()
-	}
-
-	buf := d.buf[bucket*bucketSize : (bucket+1)*bucketSize]
-	_, err := put(value, buf, 0)
-	if err != nil {
-		d.entries[bucket] = ""
-		level.Error(util.Logger).Log("msg", "failed to put key to diskcache", "err", err)
-
-		sp := opentracing.SpanFromContext(ctx)
-		if sp != nil {
-			sp.LogFields(otlog.Error(err))
-		}
-		return
-	}
-
-	d.entries[bucket] = key
-}
-
-// put places a value in the buffer in the following format
-// |u int64 <length of key> | key | uint64 <length of value> | value |
-func put(value []byte, buf []byte, n int) (int, error) {
-	if len(value)+n+4 > len(buf) {
-		return 0, errors.Wrap(fmt.Errorf("value too big: %d > %d", len(value), len(buf)), "put")
-	}
-	m := binary.PutUvarint(buf[n:], uint64(len(value)))
-	copy(buf[n+m:], value)
-	return len(value) + n + m, nil
-}
-
-func get(buf []byte, n int) ([]byte, int, bool) {
-	size, m := binary.Uvarint(buf[n:])
-	end := n + m + int(size)
-	if end > len(buf) {
-		return nil, 0, false
-	}
-	return buf[n+m : end], end, true
-}
-
-func hash(key string) uint32 {
-	h := fnv.New32()
-	h.Write([]byte(key))
-	return h.Sum32()
-}

From babd6799a161672d9550ab2ab2d890d53ce58d47 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 29 May 2019 14:45:33 +0530
Subject: [PATCH 351/660] Add GCS object client specific timeout

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 gcp/gcs_object_client.go | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 0d92a5b49e5aa..e03d0e4243c12 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"flag"
 	"io/ioutil"
+	"time"
 
 	"cloud.google.com/go/storage"
 	"github.com/pkg/errors"
@@ -21,14 +22,16 @@ type gcsObjectClient struct {
 
 // GCSConfig is config for the GCS Chunk Client.
 type GCSConfig struct {
-	BucketName      string `yaml:"bucket_name"`
-	ChunkBufferSize int    `yaml:"chunk_buffer_size"`
+	BucketName      string        `yaml:"bucket_name"`
+	ChunkBufferSize int           `yaml:"chunk_buffer_size"`
+	RequestTimeout  time.Duration `yaml:"request_timeout"`
 }
 
 // RegisterFlags registers flags.
 func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.BucketName, "gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
 	f.IntVar(&cfg.ChunkBufferSize, "gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
+	f.DurationVar(&cfg.RequestTimeout, "gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
 }
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
@@ -86,6 +89,13 @@ func (s *gcsObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([
 }
 
 func (s *gcsObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+	if s.cfg.RequestTimeout > 0 {
+		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
+		var cancel context.CancelFunc
+		ctx, cancel = context.WithTimeout(ctx, s.cfg.RequestTimeout)
+		defer cancel()
+	}
+
 	reader, err := s.bucket.Object(input.ExternalKey()).NewReader(ctx)
 	if err != nil {
 		return chunk.Chunk{}, errors.WithStack(err)

From dc84f45895379a3171d5720c0beab9ee5445c724 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 30 May 2019 17:05:16 +0000
Subject: [PATCH 352/660] Make series IDs backwards-compatible again

PR #1377 caused an unforseen change in the formatting of label
strings, which in turn means that the SHA is different so the seriesID
changes.  It's more efficient to continue with the same IDs across an
upgrade.

I noticed there was an unused function `metricSeriesID()` and a test
for backwards-compatibility which would have caught this mistake, so
have pressed them back into service.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema.go           | 10 +++++-----
 schema_util.go      | 40 +++++++++++++++++++++++++++++++++++++---
 schema_util_test.go | 33 +++++++++++++--------------------
 3 files changed, 55 insertions(+), 28 deletions(-)

diff --git a/schema.go b/schema.go
index 4fe1842ab75a1..58e23e8556f33 100644
--- a/schema.go
+++ b/schema.go
@@ -131,7 +131,7 @@ func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string,
 		key := strings.Join([]string{
 			bucket.tableName,
 			bucket.hashKey,
-			string(sha256bytes(labels.String())),
+			string(labelsSeriesID(labels)),
 		},
 			"-",
 		)
@@ -551,7 +551,7 @@ func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 }
 
 func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := sha256bytes(labels.String())
+	seriesID := labelsSeriesID(labels)
 
 	entries := []IndexEntry{
 		// Entry for metricName -> seriesID
@@ -581,7 +581,7 @@ func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels l
 }
 
 func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := sha256bytes(labels.String())
+	seriesID := labelsSeriesID(labels)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
@@ -647,7 +647,7 @@ func (v10Entries) GetWriteEntries(bucket Bucket, metricName string, labels label
 }
 
 func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := sha256bytes(labels.String())
+	seriesID := labelsSeriesID(labels)
 
 	// read first 32 bits of the hash and use this to calculate the shard
 	shard := binary.BigEndian.Uint32(seriesID) % s.rowShards
@@ -680,7 +680,7 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 }
 
 func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := sha256bytes(labels.String())
+	seriesID := labelsSeriesID(labels)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
diff --git a/schema_util.go b/schema_util.go
index 9d9235c39d697..107bccbddc8e5 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -7,16 +7,50 @@ import (
 	"encoding/binary"
 	"encoding/hex"
 	"encoding/json"
+	"strconv"
+	"strings"
 
 	"fmt"
 
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
 )
 
-func metricSeriesID(m model.Metric) string {
-	h := sha256.Sum256([]byte(m.String()))
-	return string(encodeBase64Bytes(h[:]))
+// Backwards-compatible with model.Metric.String()
+func labelsString(ls labels.Labels) string {
+	metricName := ls.Get(labels.MetricName)
+	if metricName != "" && len(ls) == 1 {
+		return metricName
+	}
+	var b strings.Builder
+	b.Grow(1000)
+
+	b.WriteString(metricName)
+	b.WriteByte('{')
+	i := 0
+	for _, l := range ls {
+		if l.Name == labels.MetricName {
+			continue
+		}
+		if i > 0 {
+			b.WriteByte(',')
+			b.WriteByte(' ')
+		}
+		b.WriteString(l.Name)
+		b.WriteByte('=')
+		var buf [1000]byte
+		b.Write(strconv.AppendQuote(buf[:0], l.Value))
+		i++
+	}
+	b.WriteByte('}')
+
+	return b.String()
+}
+
+func labelsSeriesID(ls labels.Labels) []byte {
+	h := sha256.Sum256([]byte(labelsString(ls)))
+	return encodeBase64Bytes(h[:])
 }
 
 func sha256bytes(s string) []byte {
diff --git a/schema_util_test.go b/schema_util_test.go
index 45ca226ca8fd8..a8096b70dd73f 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -8,45 +8,38 @@ import (
 	"math/rand"
 	"testing"
 
+	"github.com/prometheus/prometheus/pkg/labels"
+
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
-func TestMetricSeriesID(t *testing.T) {
+func TestLabelSeriesID(t *testing.T) {
 	for _, c := range []struct {
-		metric   model.Metric
+		lbls     labels.Labels
 		expected string
 	}{
 		{
-			model.Metric{model.MetricNameLabel: "foo"},
+			labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}},
 			"LCa0a2j/xo/5m0U8HTBBNBNCLXBkg7+g+YpeiGJm564",
 		},
 		{
-			model.Metric{
-				model.MetricNameLabel: "foo",
-				"bar":                 "baz",
-				"toms":                "code",
-				"flip":                "flop",
-			},
-			"KrbXMezYneba+o7wfEdtzOdAWhbfWcDrlVfs1uOCX3M",
-		},
-		{
-			model.Metric{
-				"flip":                "flop",
-				"bar":                 "baz",
-				model.MetricNameLabel: "foo",
-				"toms":                "code",
+			labels.Labels{
+				{Name: model.MetricNameLabel, Value: "foo"},
+				{Name: "bar", Value: "baz"},
+				{Name: "flip", Value: "flop"},
+				{Name: "toms", Value: "code"},
 			},
 			"KrbXMezYneba+o7wfEdtzOdAWhbfWcDrlVfs1uOCX3M",
 		},
 		{
-			model.Metric{},
+			labels.Labels{},
 			"RBNvo1WzZ4oRRq0W9+hknpT7T8If536DEMBg9hyq/4o",
 		},
 	} {
-		seriesID := metricSeriesID(c.metric)
-		assert.Equal(t, c.expected, seriesID)
+		seriesID := string(labelsSeriesID(c.lbls))
+		assert.Equal(t, c.expected, seriesID, labelsString(c.lbls))
 	}
 }
 

From a2216d8a9247a7ac3b2d10a4cf7157971fef337a Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 3 Jun 2019 17:45:26 +0530
Subject: [PATCH 353/660] Fix the bucket bounds

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 series_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/series_store.go b/series_store.go
index 2c1ffb2efc7b1..e91fa8ebe98ea 100644
--- a/series_store.go
+++ b/series_store.go
@@ -43,21 +43,21 @@ var (
 		Name:      "chunk_store_series_pre_intersection_per_query",
 		Help:      "Distribution of #series (pre intersection) per query.",
 		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 5),
+		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_series_post_intersection_per_query",
 		Help:      "Distribution of #series (post intersection) per query.",
 		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 5),
+		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_chunks_per_query",
 		Help:      "Distribution of #chunks per query.",
 		// For 100k series for 7 week, could be 1.2m - 10*(8^6) = 2.6m.
-		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
+		Buckets: prometheus.ExponentialBuckets(10, 8, 7),
 	})
 )
 

From d0c5bcaed105b52a1c0aefd493ce275b17170c60 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 3 Jun 2019 14:04:31 -0700
Subject: [PATCH 354/660] Regenerate the protos with the new version.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 storage/caching_index_client.pb.go | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/storage/caching_index_client.pb.go b/storage/caching_index_client.pb.go
index 96a81e59cbf0c..6f60ba68010ad 100644
--- a/storage/caching_index_client.pb.go
+++ b/storage/caching_index_client.pb.go
@@ -283,17 +283,17 @@ func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
 	dAtA[i] = 0xa
 	i++
 	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Column.Size()))
-	n1, err1 := m.Column.MarshalTo(dAtA[i:])
-	if err1 != nil {
-		return 0, err1
+	n1, err := m.Column.MarshalTo(dAtA[i:])
+	if err != nil {
+		return 0, err
 	}
 	i += n1
 	dAtA[i] = 0x12
 	i++
 	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Value.Size()))
-	n2, err2 := m.Value.MarshalTo(dAtA[i:])
-	if err2 != nil {
-		return 0, err2
+	n2, err := m.Value.MarshalTo(dAtA[i:])
+	if err != nil {
+		return 0, err
 	}
 	i += n2
 	return i, nil
@@ -420,13 +420,8 @@ func (this *ReadBatch) String() string {
 	if this == nil {
 		return "nil"
 	}
-	repeatedStringForEntries := "[]Entry{"
-	for _, f := range this.Entries {
-		repeatedStringForEntries += strings.Replace(strings.Replace(f.String(), "Entry", "Entry", 1), `&`, ``, 1) + ","
-	}
-	repeatedStringForEntries += "}"
 	s := strings.Join([]string{`&ReadBatch{`,
-		`Entries:` + repeatedStringForEntries + `,`,
+		`Entries:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Entries), "Entry", "Entry", 1), `&`, ``, 1) + `,`,
 		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
 		`Expiry:` + fmt.Sprintf("%v", this.Expiry) + `,`,
 		`Cardinality:` + fmt.Sprintf("%v", this.Cardinality) + `,`,

From 951bef4cf417ddc7ee9cef886e1d5de2a2b28b7b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 4 Jun 2019 14:59:04 +0530
Subject: [PATCH 355/660] Fix more metrics, make comments more explict

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 series_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/series_store.go b/series_store.go
index e91fa8ebe98ea..af7343bbfda89 100644
--- a/series_store.go
+++ b/series_store.go
@@ -42,21 +42,21 @@ var (
 		Namespace: "cortex",
 		Name:      "chunk_store_series_pre_intersection_per_query",
 		Help:      "Distribution of #series (pre intersection) per query.",
-		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
+		// A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_series_post_intersection_per_query",
 		Help:      "Distribution of #series (post intersection) per query.",
-		// A reasonable upper bound is around 100k - 10*(8^5) = 327k.
+		// A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_chunks_per_query",
 		Help:      "Distribution of #chunks per query.",
-		// For 100k series for 7 week, could be 1.2m - 10*(8^6) = 2.6m.
+		// For 100k series for 7 week, could be 1.2m - 10*(8^(7-1)) = 2.6m.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 7),
 	})
 )

From 66998ff23c3e04a83e412aeae3389943b5f39fe7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 4 Jun 2019 15:46:21 +0000
Subject: [PATCH 356/660] Fix wait time on DynamoDB throttling

The `UnprocessedItems` field that comes back is a map from table to
items, so we need to count all the items and wait that long, rather
than just waiting one unit per table.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 626ce905686dd..8e676b30e5901 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -270,9 +270,10 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 		}
 
 		// If there are unprocessed items, retry those items.
-		if unprocessedItems := resp.UnprocessedItems; unprocessedItems != nil && dynamoDBWriteBatch(unprocessedItems).Len() > 0 {
-			logWriteRetry(ctx, dynamoDBWriteBatch(unprocessedItems))
-			a.writeThrottle.WaitN(ctx, len(unprocessedItems))
+		unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems)
+		if unprocessedItems != nil && len(unprocessedItems) > 0 {
+			logWriteRetry(ctx, unprocessedItems)
+			a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}
 

From da4967e770891df1d276180d6fdbddc0b72adfef Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 5 Jun 2019 09:02:39 +0000
Subject: [PATCH 357/660] Simplify

Because `unprocessedItems` is a map and `len(nil)` is 0 for maps.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 8e676b30e5901..872568c6bf3cc 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -271,7 +271,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 
 		// If there are unprocessed items, retry those items.
 		unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems)
-		if unprocessedItems != nil && len(unprocessedItems) > 0 {
+		if len(unprocessedItems) > 0 {
 			logWriteRetry(ctx, unprocessedItems)
 			a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
 			unprocessed.TakeReqs(unprocessedItems, -1)

From 49954dc98fa15f0d77711d01263febeb3019c575 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 4 Jun 2019 14:07:28 +0000
Subject: [PATCH 358/660] Avoid duplicate label index writes by combining
 LabelEntryCacheKeys and LabelWriteEntries

This avoids re-writing yesterday's labels when we write a chunk that
spans the day boundary.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 schema.go       | 49 +++++++++++++++++++++----------------------------
 series_store.go | 22 ++++++++++++----------
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/schema.go b/schema.go
index 58e23e8556f33..4e3142c5e5015 100644
--- a/schema.go
+++ b/schema.go
@@ -34,9 +34,9 @@ type Schema interface {
 	GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
 	// Should only be used with the seriesStore. TODO: Make seriesStore implement a different interface altogether.
-	GetLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+	// returns cache key string and []IndexEntry per bucket, matched in order
+	GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error)
 	GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
-	GetLabelEntryCacheKeys(from, through model.Time, userID string, labels labels.Labels) []string
 
 	// When doing a read, use these methods to return the list of entries you should query
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error)
@@ -97,17 +97,31 @@ func (s schema) GetWriteEntries(from, through model.Time, userID string, metricN
 	return result, nil
 }
 
-func (s schema) GetLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	var result []IndexEntry
+// returns cache key string and []IndexEntry per bucket, matched in order
+func (s schema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error) {
+	var keys []string
+	var indexEntries [][]IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
+		key := strings.Join([]string{
+			bucket.tableName,
+			bucket.hashKey,
+			string(labelsSeriesID(labels)),
+		},
+			"-",
+		)
+		// This is just encoding to remove invalid characters so that we can put them in memcache.
+		// We're not hashing them as the length of the key is well within memcache bounds. tableName + userid + day + 32Byte(seriesID)
+		key = hex.EncodeToString([]byte(key))
+		keys = append(keys, key)
+
 		entries, err := s.entries.GetLabelWriteEntries(bucket, metricName, labels, chunkID)
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
-		result = append(result, entries...)
+		indexEntries = append(indexEntries, entries)
 	}
-	return result, nil
+	return keys, indexEntries, nil
 }
 
 func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
@@ -124,27 +138,6 @@ func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, me
 
 }
 
-// Should only used for v9Schema
-func (s schema) GetLabelEntryCacheKeys(from, through model.Time, userID string, labels labels.Labels) []string {
-	var result []string
-	for _, bucket := range s.buckets(from, through, userID) {
-		key := strings.Join([]string{
-			bucket.tableName,
-			bucket.hashKey,
-			string(labelsSeriesID(labels)),
-		},
-			"-",
-		)
-		// This is just encoding to remove invalid characters so that we can put them in memcache.
-		// We're not hashing them as the length of the key is well within memcache bounds. tableName + userid + day + 32Byte(seriesID)
-		key = hex.EncodeToString([]byte(key))
-
-		result = append(result, key)
-	}
-
-	return result
-}
-
 func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
diff --git a/series_store.go b/series_store.go
index af7343bbfda89..11ac736a144b2 100644
--- a/series_store.go
+++ b/series_store.go
@@ -372,23 +372,25 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
 	seenIndexEntries := map[string]struct{}{}
 	entries := []IndexEntry{}
-	keysToCache := []string{}
 
 	metricName := chunk.Metric.Get(labels.MetricName)
 	if metricName == "" {
 		return nil, nil, fmt.Errorf("no MetricNameLabel for chunk")
 	}
-	keys := c.schema.GetLabelEntryCacheKeys(from, through, chunk.UserID, chunk.Metric)
 
+	keys, labelEntries, err := c.schema.GetCacheKeysAndLabelWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
+	if err != nil {
+		return nil, nil, err
+	}
 	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), keys)
-	if len(missing) != 0 {
-		labelEntries, err := c.schema.GetLabelWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
-		if err != nil {
-			return nil, nil, err
+	// keys and labelEntries are matched in order, but Fetch() may
+	// return missing keys in any order so check against all of them.
+	for _, missingKey := range missing {
+		for i, key := range keys {
+			if key == missingKey {
+				entries = append(entries, labelEntries[i]...)
+			}
 		}
-
-		entries = append(entries, labelEntries...)
-		keysToCache = missing
 	}
 
 	chunkEntries, err := c.schema.GetChunkWriteEntries(from, through, chunk.UserID, metricName, chunk.Metric, chunk.ExternalKey())
@@ -410,5 +412,5 @@ func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chun
 		}
 	}
 
-	return result, keysToCache, nil
+	return result, missing, nil
 }

From 9991f745f586145bb7576d41e7d9a8669d8de698 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 11 Jun 2019 16:35:38 +0000
Subject: [PATCH 359/660] Return a NopIterator when bigchunk is empty

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 0282c08e99763..b028d6e8ab031 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -170,9 +170,15 @@ func (b *bigchunk) Size() int {
 }
 
 func (b *bigchunk) NewIterator() Iterator {
+	var it chunkenc.Iterator
+	if len(b.chunks) > 0 {
+		it = b.chunks[0].Iterator()
+	} else {
+		it = chunkenc.NewNopIterator()
+	}
 	return &bigchunkIterator{
 		bigchunk: b,
-		curr:     b.chunks[0].Iterator(),
+		curr:     it,
 	}
 }
 

From 9cc5b318fe6bf03ec7a81d839b70abc7c6226730 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 25 Jun 2019 15:59:50 +0000
Subject: [PATCH 360/660] Avoid writing duplicate chunks by checking the cache
 first

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 series_store.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/series_store.go b/series_store.go
index 11ac736a144b2..5462a0fc38fcf 100644
--- a/series_store.go
+++ b/series_store.go
@@ -341,6 +341,12 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	// If this chunk is in cache it must already be in the database so we don't need to write it again
+	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
+	if len(found) > 0 {
+		return nil
+	}
+
 	chunks := []Chunk{chunk}
 
 	writeReqs, keysToCache, err := c.calculateIndexEntries(from, through, chunk)

From 7e84a55a597a5f60c5d9612f1f391d4e6d7d3281 Mon Sep 17 00:00:00 2001
From: Zeng Ming <mizeng@ebaysf.com>
Date: Thu, 27 Jun 2019 22:08:39 +0800
Subject: [PATCH 361/660] avoid dead loop in boltdb_index_client when obtain
 file lock (#1471)

Signed-off-by: mizeng <mizeng@ebaysf.com>
---
 local/boltdb_index_client.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 55f5416212ab7..624e6ff4edace 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -140,7 +140,8 @@ func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
 	}
 
 	// Open the database.
-	db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, nil)
+	// Set Timeout to avoid obtaining file lock wait indefinitely.
+	db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, &bbolt.Options{Timeout: 5 * time.Second})
 	if err != nil {
 		return nil, err
 	}

From 4f9d36a9586623f0a49e2fa396d6cceab10790f6 Mon Sep 17 00:00:00 2001
From: Zeng Ming <mizeng@ebaysf.com>
Date: Sat, 29 Jun 2019 00:08:19 +0800
Subject: [PATCH 362/660] Support custom endpoint for S3 (#1476)

Signed-off-by: mizeng <mizeng@ebaysf.com>
---
 aws/dynamodb_storage_client.go | 4 +++-
 aws/s3_storage_client.go       | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 872568c6bf3cc..4810bf3cf60d2 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -126,7 +126,8 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 // StorageConfig specifies config for storing data on AWS.
 type StorageConfig struct {
 	DynamoDBConfig
-	S3 flagext.URLValue
+	S3               flagext.URLValue
+	S3ForcePathStyle bool
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -135,6 +136,7 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 
 	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
+	f.BoolVar(&cfg.S3ForcePathStyle, "s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
 }
 
 type dynamoDBStorageClient struct {
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 81a18254e8c8a..dfb74a42584f5 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -47,6 +47,8 @@ func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.O
 		return nil, err
 	}
 
+	s3Config = s3Config.WithS3ForcePathStyle(cfg.S3ForcePathStyle) // support for Path Style S3 url if has the flag
+
 	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
 	s3Client := s3.New(session.New(s3Config))
 	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")

From affc86550142d34dc2023f04e00b5a237365f066 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 26 Apr 2019 09:21:05 +0000
Subject: [PATCH 363/660] Filter out "Tagging is not currently supported"
 errors from DynamoDB Local.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_table_client.go | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 82da393cb425d..667b51110aa54 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -2,12 +2,14 @@ package aws
 
 import (
 	"context"
+	"strings"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"golang.org/x/time/rate"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -182,7 +184,10 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 					ResourceArn: tableARN,
 					Tags:        tags,
 				})
-				return err
+				if relevantError(err) {
+					return err
+				}
+				return nil
 			})
 		})
 	}
@@ -247,14 +252,14 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
 				ResourceArn: tableARN,
 			})
-			if err != nil {
+			if relevantError(err) {
 				return err
 			}
 			desc.Tags = make(map[string]string, len(out.Tags))
 			for _, tag := range out.Tags {
 				desc.Tags[*tag.Key] = *tag.Value
 			}
-			return err
+			return nil
 		})
 	})
 
@@ -264,6 +269,18 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 	return
 }
 
+// Filter out errors that we don't want to see
+// (currently only relevant in integration tests)
+func relevantError(err error) bool {
+	if err == nil {
+		return false
+	}
+	if strings.Contains(err.Error(), "Tagging is not currently supported in DynamoDB Local.") {
+		return false
+	}
+	return true
+}
+
 func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	if d.autoscale != nil {
 		err := d.autoscale.UpdateTable(ctx, current, &expected)
@@ -355,7 +372,10 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 					ResourceArn: tableARN,
 					Tags:        chunkTagsToDynamoDB(expected.Tags),
 				})
-				return err
+				if relevantError(err) {
+					return errors.Wrap(err, "applying tags")
+				}
+				return nil
 			})
 		})
 	}

From ceeee02ac4925c43fd7f2bda1cb0ac7185a2d6b1 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 25 Jun 2019 18:10:38 +0000
Subject: [PATCH 364/660] Optionally write stub entries to the chunk cache from
 ingesters

Designed for the case where you don't want to cache every chunk
written by ingesters, but you do want to take advantage of chunk write
deduplication.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 cache/cache_test.go  |  2 +-
 chunk_store.go       |  6 +++++-
 chunk_store_utils.go | 20 +++++++++++++-------
 series_store.go      |  2 +-
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 3feb22b1ca857..8ef1d4c93319f 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -96,7 +96,7 @@ func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []
 func testChunkFetcher(t *testing.T, c cache.Cache, keys []string, chunks []chunk.Chunk) {
 	fetcher, err := chunk.NewChunkFetcher(cache.Config{
 		Cache: c,
-	}, nil)
+	}, false, nil)
 	require.NoError(t, err)
 	defer fetcher.Stop()
 
diff --git a/chunk_store.go b/chunk_store.go
index 7ae86fc3de429..03e178e9b8206 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -64,11 +64,15 @@ type StoreConfig struct {
 
 	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
 	MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
+
+	// Not visible in yaml because the setting shouldn't be common between ingesters and queriers
+	chunkCacheStubs bool // don't write the full chunk to cache, just a stub entry
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.ChunkCacheConfig.RegisterFlagsWithPrefix("", "Cache config for chunks. ", f)
+	f.BoolVar(&cfg.chunkCacheStubs, "store.chunk-cache-stubs", false, "If true, don't write the full chunk to cache, just a stub entry.")
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
@@ -92,7 +96,7 @@ type store struct {
 }
 
 func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, chunks)
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
 	}
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index c5e7cade69b63..ffa707eaf4457 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -54,8 +54,9 @@ outer:
 // and writing back any misses to the cache.  Also responsible for decoding
 // chunks from the cache, in parallel.
 type Fetcher struct {
-	storage ObjectClient
-	cache   cache.Cache
+	storage    ObjectClient
+	cache      cache.Cache
+	cacheStubs bool
 
 	wait           sync.WaitGroup
 	decodeRequests chan decodeRequest
@@ -72,7 +73,7 @@ type decodeResponse struct {
 }
 
 // NewChunkFetcher makes a new ChunkFetcher.
-func NewChunkFetcher(cfg cache.Config, storage ObjectClient) (*Fetcher, error) {
+func NewChunkFetcher(cfg cache.Config, cacheStubs bool, storage ObjectClient) (*Fetcher, error) {
 	cache, err := cache.New(cfg)
 	if err != nil {
 		return nil, err
@@ -81,6 +82,7 @@ func NewChunkFetcher(cfg cache.Config, storage ObjectClient) (*Fetcher, error) {
 	c := &Fetcher{
 		storage:        storage,
 		cache:          cache,
+		cacheStubs:     cacheStubs,
 		decodeRequests: make(chan decodeRequest),
 	}
 
@@ -149,10 +151,14 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 	keys := make([]string, 0, len(chunks))
 	bufs := make([][]byte, 0, len(chunks))
 	for i := range chunks {
-		encoded, err := chunks[i].Encoded()
-		// TODO don't fail, just log and conitnue?
-		if err != nil {
-			return err
+		var encoded []byte
+		var err error
+		if !c.cacheStubs {
+			encoded, err = chunks[i].Encoded()
+			// TODO don't fail, just log and conitnue?
+			if err != nil {
+				return err
+			}
 		}
 
 		keys = append(keys, chunks[i].ExternalKey())
diff --git a/series_store.go b/series_store.go
index 5462a0fc38fcf..9111d52448d84 100644
--- a/series_store.go
+++ b/series_store.go
@@ -68,7 +68,7 @@ type seriesStore struct {
 }
 
 func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, chunks)
+	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
 	}

From bfdb77f8aba4e620f1a65ac09b1a010f8c8a4188 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Mon, 8 Jul 2019 09:35:39 -0400
Subject: [PATCH 365/660] Add LabelNamesForMetricName for the series store
 (#1346)

* adds LabelNamesForMetricName for the chunk and series store

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go          |  34 +++++++++++++-
 chunk_store_test.go     | 102 ++++++++++++++++++++++++++++++++++++++++
 chunk_store_utils.go    |  34 ++++++++++++++
 composite_store.go      |  15 ++++++
 composite_store_test.go |   4 ++
 series_store.go         |  55 ++++++++++++++++++++++
 6 files changed, 243 insertions(+), 1 deletion(-)

diff --git a/chunk_store.go b/chunk_store.go
index 03e178e9b8206..f93472c09cfcf 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -232,12 +232,44 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, from, through mode
 		}
 		result = append(result, string(labelValue))
 	}
-
 	sort.Strings(result)
 	result = uniqueStrings(result)
 	return result, nil
 }
 
+// LabelNamesForMetricName retrieves all label names for a metric name.
+func (c *store) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelNamesForMetricName")
+	defer log.Span.Finish()
+	level.Debug(log).Log("from", from, "through", through, "metricName", metricName)
+
+	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
+	if err != nil {
+		return nil, err
+	} else if shortcut {
+		return nil, nil
+	}
+
+	chunks, err := c.lookupChunksByMetricName(ctx, from, through, nil, metricName)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("msg", "Chunks in index", "chunks", len(chunks))
+
+	// Filter out chunks that are not in the selected time range and keep a single chunk per fingerprint
+	filtered := filterChunksByTime(from, through, chunks)
+	filtered, keys := filterChunksByUniqueFingerprint(filtered)
+	level.Debug(log).Log("msg", "Chunks post filtering", "chunks", len(chunks))
+
+	// Now fetch the actual chunk data from Memcache / S3
+	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	if err != nil {
+		level.Error(log).Log("msg", "FetchChunks", "err", err)
+		return nil, err
+	}
+	return labelNamesFromChunks(allChunks), nil
+}
+
 func (c *store) validateQueryTimeRange(ctx context.Context, from *model.Time, through *model.Time) (bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
diff --git a/chunk_store_test.go b/chunk_store_test.go
index bd2ea91839da7..9e91c60ca8de6 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -370,6 +370,108 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 
 }
 
+func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+	now := model.Now()
+
+	fooMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+		{Name: "flip", Value: "flop"},
+		{Name: "toms", Value: "code"},
+	}
+	fooMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "beep"},
+		{Name: "toms", Value: "code"},
+	}
+	fooMetric3 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "bop"},
+		{Name: "flip", Value: "flap"},
+	}
+
+	// barMetric1 is a subset of barMetric2 to test over-matching bug.
+	barMetric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
+	}
+	barMetric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "bar"},
+		{Name: "bar", Value: "baz"},
+		{Name: "toms", Value: "code"},
+	}
+
+	fooChunk1 := dummyChunkFor(now, fooMetric1)
+	fooChunk2 := dummyChunkFor(now, fooMetric2)
+	fooChunk3 := dummyChunkFor(now, fooMetric3)
+	fooChunk4 := dummyChunkFor(now.Add(-time.Hour), fooMetric1) // same series but different chunk
+
+	barChunk1 := dummyChunkFor(now, barMetric1)
+	barChunk2 := dummyChunkFor(now, barMetric2)
+
+	for _, tc := range []struct {
+		metricName string
+		expect     []string
+	}{
+		{
+			`foo`,
+			[]string{"bar", "flip", "toms"},
+		},
+		{
+			`bar`,
+			[]string{"bar", "toms"},
+		},
+	} {
+		for _, schema := range schemas {
+			for _, storeCase := range stores {
+				t.Run(fmt.Sprintf("%s / %s / %s ", tc.metricName, schema.name, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running labelNames with metricName", tc.metricName, "with schema", schema.name)
+					storeCfg := storeCase.configFn()
+					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+					defer store.Stop()
+
+					if err := store.Put(ctx, []Chunk{
+						fooChunk1,
+						fooChunk2,
+						fooChunk3,
+						fooChunk4,
+						barChunk1,
+						barChunk2,
+					}); err != nil {
+						t.Fatal(err)
+					}
+
+					// Query with ordinary time-range
+					labelNames1, err := store.LabelNamesForMetricName(ctx, now.Add(-time.Hour), now, tc.metricName)
+					require.NoError(t, err)
+
+					if !reflect.DeepEqual(tc.expect, labelNames1) {
+						t.Fatalf("%s: wrong label name - %s", tc.metricName, test.Diff(tc.expect, labelNames1))
+					}
+
+					// Pushing end of time-range into future should yield exact same resultset
+					labelNames2, err := store.LabelNamesForMetricName(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName)
+					require.NoError(t, err)
+
+					if !reflect.DeepEqual(tc.expect, labelNames2) {
+						t.Fatalf("%s: wrong label name - %s", tc.metricName, test.Diff(tc.expect, labelNames2))
+					}
+
+					// Query with both begin & end of time-range in future should yield empty resultset
+					labelNames3, err := store.LabelNamesForMetricName(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName)
+					require.NoError(t, err)
+					if len(labelNames3) != 0 {
+						t.Fatalf("%s: future query should yield empty resultset ... actually got %v label names: %#v",
+							tc.metricName, len(labelNames3), labelNames3)
+					}
+				})
+			}
+		}
+	}
+
+}
+
 // TestChunkStore_getMetricNameChunks tests if chunks are fetched correctly when we have the metric name
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	ctx := user.InjectOrgID(context.Background(), userID)
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index ffa707eaf4457..49506be76a382 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"sort"
 	"sync"
 
 	"github.com/go-kit/kit/log/level"
@@ -36,6 +37,39 @@ func keysFromChunks(chunks []Chunk) []string {
 	return keys
 }
 
+func labelNamesFromChunks(chunks []Chunk) []string {
+	keys := map[string]struct{}{}
+	var result []string
+	for _, c := range chunks {
+		for _, l := range c.Metric {
+			if l.Name != model.MetricNameLabel {
+				if _, ok := keys[string(l.Name)]; !ok {
+					keys[string(l.Name)] = struct{}{}
+					result = append(result, string(l.Name))
+				}
+			}
+		}
+	}
+	sort.Strings(result)
+	return result
+}
+
+func filterChunksByUniqueFingerprint(chunks []Chunk) ([]Chunk, []string) {
+	filtered := make([]Chunk, 0, len(chunks))
+	keys := make([]string, 0, len(chunks))
+	uniqueFp := map[model.Fingerprint]struct{}{}
+
+	for _, chunk := range chunks {
+		if _, ok := uniqueFp[chunk.Fingerprint]; ok {
+			continue
+		}
+		filtered = append(filtered, chunk)
+		keys = append(keys, chunk.ExternalKey())
+		uniqueFp[chunk.Fingerprint] = struct{}{}
+	}
+	return filtered, keys
+}
+
 func filterChunksByMatchers(chunks []Chunk, filters []*labels.Matcher) []Chunk {
 	filteredChunks := make([]Chunk, 0, len(chunks))
 outer:
diff --git a/composite_store.go b/composite_store.go
index 762aa424e3625..c00c3da8dba46 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -19,6 +19,7 @@ type Store interface {
 	// using the corresponding Fetcher (fetchers[i].FetchChunks(ctx, chunks[i], ...)
 	GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
 	LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error)
+	LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error)
 	Stop()
 }
 
@@ -106,6 +107,20 @@ func (c compositeStore) LabelValuesForMetricName(ctx context.Context, from, thro
 	return result, err
 }
 
+// LabelNamesForMetricName retrieves all label names for a metric name.
+func (c compositeStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+	var result []string
+	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
+		labelNames, err := store.LabelNamesForMetricName(ctx, from, through, metricName)
+		if err != nil {
+			return err
+		}
+		result = append(result, labelNames...)
+		return nil
+	})
+	return result, err
+}
+
 func (c compositeStore) GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	chunkIDs := [][]Chunk{}
 	fetchers := []*Fetcher{}
diff --git a/composite_store_test.go b/composite_store_test.go
index 7afd5fd85d44f..4f0bd50736736 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -32,6 +32,10 @@ func (m mockStore) GetChunkRefs(tx context.Context, from, through model.Time, ma
 	return nil, nil, nil
 }
 
+func (m mockStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+	return nil, nil
+}
+
 func (m mockStore) Stop() {}
 
 func TestCompositeStore(t *testing.T) {
diff --git a/series_store.go b/series_store.go
index 9111d52448d84..38bd313a51cf3 100644
--- a/series_store.go
+++ b/series_store.go
@@ -191,6 +191,61 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time
 	return [][]Chunk{chunks}, []*Fetcher{c.store.Fetcher}, nil
 }
 
+// LabelNamesForMetricName retrieves all label names for a metric name.
+func (c *seriesStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "SeriesStore.LabelNamesForMetricName")
+	defer log.Span.Finish()
+
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		return nil, err
+	}
+
+	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
+	if err != nil {
+		return nil, err
+	} else if shortcut {
+		return nil, nil
+	}
+	level.Debug(log).Log("metric", metricName)
+
+	// Fetch the series IDs from the index
+	seriesIDs, err := c.lookupSeriesByMetricNameMatchers(ctx, from, through, userID, metricName, nil)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("series-ids", len(seriesIDs))
+
+	// Lookup the series in the index to get the chunks.
+	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, userID, seriesIDs)
+	if err != nil {
+		level.Error(log).Log("msg", "lookupChunksBySeries", "err", err)
+		return nil, err
+	}
+	level.Debug(log).Log("chunk-ids", len(chunkIDs))
+
+	chunks, err := c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
+	if err != nil {
+		level.Error(log).Log("err", "convertChunkIDsToChunks", "err", err)
+		return nil, err
+	}
+
+	// Filter out chunks that are not in the selected time range and keep a single chunk per fingerprint
+	filtered := filterChunksByTime(from, through, chunks)
+	filtered, keys := filterChunksByUniqueFingerprint(filtered)
+	level.Debug(log).Log("Chunks post filtering", len(chunks))
+
+	chunksPerQuery.Observe(float64(len(filtered)))
+
+	// Now fetch the actual chunk data from Memcache / S3
+	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	if err != nil {
+		level.Error(log).Log("msg", "FetchChunks", "err", err)
+		return nil, err
+	}
+	return labelNamesFromChunks(allChunks), nil
+}
+
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, userID, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()

From 45b0b75a3fe2187da7ffa305206dc7d6a6fb884b Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 8 Jul 2019 15:00:58 +0000
Subject: [PATCH 366/660] Accept the symbolic name for a chunk encoding type

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/factory.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/encoding/factory.go b/encoding/factory.go
index 26b1308f627fd..3d4b6f12b58ac 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -76,6 +76,14 @@ var encodings = map[Encoding]encoding{
 
 // Set implements flag.Value.
 func (e *Encoding) Set(s string) error {
+	// First see if the name was given
+	for k, v := range encodings {
+		if s == v.Name {
+			*e = k
+			return nil
+		}
+	}
+	// Otherwise, accept a number
 	i, err := strconv.Atoi(s)
 	if err != nil {
 		return err

From 75650907cc2ffbf5b4e8ceefafca6a39adc50876 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 9 Jul 2019 19:32:25 +0200
Subject: [PATCH 367/660] Ensure -dynamodb.url is set when calling
 NewTableClient() in aws mode (#1492)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 storage/factory.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage/factory.go b/storage/factory.go
index 7502dc92c714b..5b65e37cfa00a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -157,6 +157,9 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "aws", "aws-dynamo":
+		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
+			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
+		}
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)

From 902730437f783a8a7a36367647bbb30f8847c3f8 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 10 Jul 2019 21:33:10 +0200
Subject: [PATCH 368/660] Document non-obvious function behaviour (#1504)

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store_utils.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 49506be76a382..fe98dcf64386e 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -150,7 +150,8 @@ func (c *Fetcher) worker() {
 	}
 }
 
-// FetchChunks fetchers a set of chunks from cache and store.
+// FetchChunks fetches a set of chunks from cache and store. Note that the keys passed in must be
+// lexicographically sorted, while the returned chunks are not in the same order as the passed in chunks.
 func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.fetchChunks")
 	defer log.Span.Finish()

From 0d435056effc52e9736e689159545b242b7949b4 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 11 Jul 2019 17:27:24 +0100
Subject: [PATCH 369/660] Etcd KVClient implementation. (#1322)

* Use go.etcd.io/bbolt import and vendor master etcd to we don't import github.com/coreos/...
* Move code around - start a kv package with the interface, and subpackages for consul and etcd.
* Add prefix to the config of the kvstore.  NB This removed the `-consul.prefix` flag.
* Split codec into its own package and prefix client into separate files.
* Add unit tests and etcd mock.
* Remove Client.PutBytes - its only used in one test, can be done with a CAS, and doesn't fit rest of interface with Codecs etc.
* Instrument all ring clients, and add comments.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 local/boltdb_index_client.go | 2 +-
 local/boltdb_reload_test.go  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 624e6ff4edace..dcba167524495 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -10,8 +10,8 @@ import (
 	"sync"
 	"time"
 
-	"github.com/etcd-io/bbolt"
 	"github.com/go-kit/kit/log/level"
+	"go.etcd.io/bbolt"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
diff --git a/local/boltdb_reload_test.go b/local/boltdb_reload_test.go
index 81a864fd6100b..23f27c288b2a7 100644
--- a/local/boltdb_reload_test.go
+++ b/local/boltdb_reload_test.go
@@ -6,8 +6,8 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/etcd-io/bbolt"
 	"github.com/stretchr/testify/require"
+	"go.etcd.io/bbolt"
 )
 
 var (

From 1147b27588166ddd1e7a26d8129e3e256f6ac112 Mon Sep 17 00:00:00 2001
From: Graham Rhodes <1964861+grahamar@users.noreply.github.com>
Date: Wed, 17 Jul 2019 04:47:35 +0100
Subject: [PATCH 370/660] feat(gcs): Add Service Account Support (#1511)

To use GCP service accounts, the scope needs to be provided in the transport options.

Currently when trying to use this GCS client via Loki, by mounting the service account `credentials.json` file from a secret and using the `GOOGLE_APPLICATION_CREDENTIALS` environment variable, results in the following error response:

```
err="Post https://www.googleapis.com/upload/storage/v1/b/my-loki-bucket/o?alt=json&prettyPrint=false&projection=full&uploadType=multipart: oauth2: cannot fetch token: 400 Bad Request\nResponse: {\n  \"error\": \"invalid_scope\",\n  \"error_description\": \"Empty or missing scope not allowed.\"\n}"
```

Signed-off-by: Graham Rhodes <1964861+grahamar@users.noreply.github.com>
---
 gcp/gcs_object_client.go | 2 +-
 gcp/instrumentation.go   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index e03d0e4243c12..c07f25ac94c86 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -36,7 +36,7 @@ func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
 func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
-	option, err := gcsInstrumentation(ctx)
+	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 99b211cdf0aca..1347ee69cef78 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -50,8 +50,8 @@ func bigtableInstrumentation() ([]grpc.UnaryClientInterceptor, []grpc.StreamClie
 		}
 }
 
-func gcsInstrumentation(ctx context.Context) (option.ClientOption, error) {
-	transport, err := google_http.NewTransport(ctx, http.DefaultTransport)
+func gcsInstrumentation(ctx context.Context, scope string) (option.ClientOption, error) {
+	transport, err := google_http.NewTransport(ctx, http.DefaultTransport, option.WithScopes(scope))
 	if err != nil {
 		return nil, err
 	}

From bc4c82b6eac2e465b0cfde3c3c142efc616fdfa3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 30 Jul 2019 12:44:52 +0100
Subject: [PATCH 371/660] Remove cortex_chunk_store_row_writes_distribution
 histogram (#1535)

It's quite complicated but not very useful in practice.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk_store.go  | 14 --------------
 series_store.go |  1 -
 2 files changed, 15 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index f93472c09cfcf..18a0bea7b1e85 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -34,15 +34,6 @@ var (
 		Help:      "Number of entries written to storage per chunk.",
 		Buckets:   prometheus.ExponentialBuckets(1, 2, 5),
 	})
-	rowWrites = util.NewHashBucketHistogram(util.HashBucketHistogramOpts{
-		HistogramOpts: prometheus.HistogramOpts{
-			Namespace: "cortex",
-			Name:      "chunk_store_row_writes_distribution",
-			Help:      "Distribution of writes to individual storage rows",
-			Buckets:   prometheus.DefBuckets,
-		},
-		HashBuckets: 1024,
-	})
 	cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{
 		Namespace: "cortex",
 		Name:      "cache_corrupt_chunks_total",
@@ -50,10 +41,6 @@ var (
 	})
 )
 
-func init() {
-	prometheus.MustRegister(rowWrites)
-}
-
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
 	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config,omitempty"`
@@ -167,7 +154,6 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
 		if _, ok := seenIndexEntries[key]; !ok {
 			seenIndexEntries[key] = struct{}{}
-			rowWrites.Observe(entry.HashValue, 1)
 			result.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
 		}
 	}
diff --git a/series_store.go b/series_store.go
index 38bd313a51cf3..7aa74638bcb87 100644
--- a/series_store.go
+++ b/series_store.go
@@ -468,7 +468,6 @@ func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chun
 		key := fmt.Sprintf("%s:%s:%x", entry.TableName, entry.HashValue, entry.RangeValue)
 		if _, ok := seenIndexEntries[key]; !ok {
 			seenIndexEntries[key] = struct{}{}
-			rowWrites.Observe(entry.HashValue, 1)
 			result.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
 		}
 	}

From 949225e25b033e7355fa9d97a11fde4b0b424d50 Mon Sep 17 00:00:00 2001
From: Bartek Plotka <bwplotka@gmail.com>
Date: Sun, 2 Jun 2019 10:58:08 -0700
Subject: [PATCH 372/660] frontend: (Refactor) Isolated middlewares from
 frontend to allow usage from external projects.

No logic should be changed, just refactor that allows middlewares reuse.

Changes:
* frontend.proto only specifies gRPC service
* queryrange.proto has rest (query API for range) and is in queryrange package.
* Removed util.Logger from queryrange
* Removed usages of overrides, moved to Limits interface in queryrange.
* Some namings.
* Removed log messages there were duplicated with return error (bad error handling).
* Added TODOs
* ResultCache Middleware has now config independent constructor.

TODO in followng PRs: Retry middleware & other TODOs

Signed-off-by: Bartek Plotka <bwplotka@gmail.com>
Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/fifo_cache.go | 4 ++--
 cache/memcached.go  | 5 ++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index bd7b2be44a545..4555b0d417990 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -78,7 +78,6 @@ type FifoCache struct {
 	// indexes into entries to identify the most recent and least recent entry.
 	first, last int
 
-	name            string
 	entriesAdded    prometheus.Counter
 	entriesAddedNew prometheus.Counter
 	entriesEvicted  prometheus.Counter
@@ -95,6 +94,7 @@ type cacheEntry struct {
 }
 
 // NewFifoCache returns a new initialised FifoCache of size.
+// TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
 func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 	return &FifoCache{
 		size:     cfg.Size,
@@ -102,7 +102,7 @@ func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 		entries:  make([]cacheEntry, 0, cfg.Size),
 		index:    make(map[string]int, cfg.Size),
 
-		name:            name,
+		// TODO(bwplotka): There might be simple cache.Cache wrapper for those.
 		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
 		entriesAddedNew: cacheEntriesAddedNew.WithLabelValues(name),
 		entriesEvicted:  cacheEntriesEvicted.WithLabelValues(name),
diff --git a/cache/memcached.go b/cache/memcached.go
index dce4652018784..55c4a26f2dbfa 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -65,7 +65,10 @@ type Memcached struct {
 	inputCh chan *work
 }
 
-// NewMemcached makes a new Memcache
+// NewMemcached makes a new Memcache.
+// TODO(bwplotka): Return error instead of just log line.
+// TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
+// TODO(bwplotka): Remove globals & util packages from cache package entirely (e.g util.Logger).
 func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string) *Memcached {
 	c := &Memcached{
 		cfg:      cfg,

From d7f2b36322e6dd7efb174b97bde26a21a3e8d667 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Sun, 2 Jun 2019 10:54:26 -0700
Subject: [PATCH 373/660] More refactoring, including:

- Get it all to build, test and lint
- Make some methods package private, add comments to others.
- Update the Prometheus metrics autoscalling code to embed the interface it is implementing, and remove the "not implemented" methods.  This technique is much less brittle.
- Separate out the retry logic into middleware.
- Revert some renames.
- Don't deeply nest packages.
- Fix imports from move.
- Tidy up retry middleward and tests.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/metrics_autoscaling_test.go | 45 +--------------------------------
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 2290c924e9eb1..ae5f8dbc00c6d 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -283,6 +283,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 
 // Helper to return pre-canned results to Prometheus queries
 type mockPrometheus struct {
+	promV1.API
 	rangeValues []model.Value
 }
 
@@ -428,10 +429,6 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	}
 }
 
-func (m *mockPrometheus) Query(ctx context.Context, query string, ts time.Time) (model.Value, error) {
-	return nil, errors.New("not implemented")
-}
-
 func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, error) {
 	if len(m.rangeValues) == 0 {
 		return nil, errors.New("mockPrometheus.QueryRange: out of values")
@@ -441,43 +438,3 @@ func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.
 	m.rangeValues = m.rangeValues[1:]
 	return ret, nil
 }
-
-func (m *mockPrometheus) LabelValues(ctx context.Context, label string) (model.LabelValues, error) {
-	return nil, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Series(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) ([]model.LabelSet, error) {
-	return nil, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) AlertManagers(ctx context.Context) (promV1.AlertManagersResult, error) {
-	return promV1.AlertManagersResult{}, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) CleanTombstones(ctx context.Context) error {
-	return errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Config(ctx context.Context) (promV1.ConfigResult, error) {
-	return promV1.ConfigResult{}, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) DeleteSeries(ctx context.Context, matches []string, startTime time.Time, endTime time.Time) error {
-	return errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Flags(ctx context.Context) (promV1.FlagsResult, error) {
-	return nil, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Snapshot(ctx context.Context, skipHead bool) (promV1.SnapshotResult, error) {
-	return promV1.SnapshotResult{}, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Rules(ctx context.Context) (promV1.RulesResult, error) {
-	return promV1.RulesResult{}, errors.New("not implemented")
-}
-
-func (m *mockPrometheus) Targets(ctx context.Context) (promV1.TargetsResult, error) {
-	return promV1.TargetsResult{}, errors.New("not implemented")
-}

From d83e9dd8f9d4274ee4544558089459546f572ae9 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Wed, 31 Jul 2019 11:34:53 +0100
Subject: [PATCH 374/660] Fixes after rebase, and review comments.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/cache.go     | 6 ++++++
 cache/memcached.go | 1 -
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/cache/cache.go b/cache/cache.go
index fb0146b15819d..bb649529a979a 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -7,6 +7,12 @@ import (
 )
 
 // Cache byte arrays by key.
+//
+// NB we intentionally do not return errors in this interface - caching is best
+// effort by definition.  We found that when these methods did return errors,
+// the caller would just log them - so its easier for implementation to do that.
+// Whatsmore, we found partially successful Fetchs were often treated as failed
+// when they returned an error.
 type Cache interface {
 	Store(ctx context.Context, key []string, buf [][]byte)
 	Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string)
diff --git a/cache/memcached.go b/cache/memcached.go
index 55c4a26f2dbfa..5c83ae639a06a 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -66,7 +66,6 @@ type Memcached struct {
 }
 
 // NewMemcached makes a new Memcache.
-// TODO(bwplotka): Return error instead of just log line.
 // TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
 // TODO(bwplotka): Remove globals & util packages from cache package entirely (e.g util.Logger).
 func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string) *Memcached {

From 618a6771f83083daff9bfa6bc593b222458189c8 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Thu, 1 Aug 2019 12:59:50 -0400
Subject: [PATCH 375/660] make the create schema function public

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 composite_store.go | 2 +-
 schema_config.go   | 3 ++-
 schema_test.go     | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index c00c3da8dba46..f225446129c3f 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -46,7 +46,7 @@ func NewCompositeStore() CompositeStore {
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
 func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks ObjectClient, limits *validation.Overrides) error {
-	schema := cfg.createSchema()
+	schema := cfg.CreateSchema()
 	var store Store
 	var err error
 	switch cfg.Schema {
diff --git a/schema_config.go b/schema_config.go
index 40b499a9fa42d..1a7f10bd38468 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -188,7 +188,8 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 	}
 }
 
-func (cfg PeriodConfig) createSchema() Schema {
+// CreateSchema returns the schema defined by the PeriodConfig
+func (cfg PeriodConfig) CreateSchema() Schema {
 	var s schema
 	switch cfg.Schema {
 	case "v1":
diff --git a/schema_test.go b/schema_test.go
index a85061001c8de..18eae083ba112 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -39,7 +39,7 @@ func makeSchema(schemaName string) Schema {
 	return PeriodConfig{
 		Schema:      schemaName,
 		IndexTables: PeriodicTableConfig{Prefix: table},
-	}.createSchema()
+	}.CreateSchema()
 }
 
 func TestSchemaHashKeys(t *testing.T) {

From 3cb6febb4e2ad6afe1796dce1ab02dcf5edb0c46 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Date: Wed, 7 Aug 2019 16:26:05 +0530
Subject: [PATCH 376/660] Vendor update (#1510)

* Vendor update

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix legacy_promql

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Updated weaveworks/common fix

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Vendored API conflicts

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix alertmanager APIs

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* AlertmanagerAPIVersionV2 in ruler

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Use remove-alertmanager branch of Prometheus

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Undo changes to alertmanager

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Modified alertmanager 0.13

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 aws/metrics_autoscaling.go      | 11 ++++++++++-
 aws/metrics_autoscaling_test.go |  7 ++++---
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index ec5acc119b2b8..6b8846528df2e 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -363,10 +363,19 @@ func promQuery(ctx context.Context, promAPI promV1.API, query string, duration,
 		Step:  step,
 	}
 
-	value, err := promAPI.QueryRange(ctx, query, queryRange)
+	value, wrngs, err := promAPI.QueryRange(ctx, query, queryRange)
 	if err != nil {
 		return nil, err
 	}
+	if wrngs != nil {
+		level.Warn(util.Logger).Log(
+			"query", query,
+			"start", queryRange.Start,
+			"end", queryRange.End,
+			"step", queryRange.Step,
+			"warnings", wrngs,
+		)
+	}
 	matrix, ok := value.(model.Matrix)
 	if !ok {
 		return nil, fmt.Errorf("Unable to convert value to matrix: %#v", value)
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index ae5f8dbc00c6d..244ae23a7e40f 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -7,6 +7,7 @@ import (
 	"time"
 
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/api"
 	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
 
@@ -429,12 +430,12 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	}
 }
 
-func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, error) {
+func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, api.Warnings, error) {
 	if len(m.rangeValues) == 0 {
-		return nil, errors.New("mockPrometheus.QueryRange: out of values")
+		return nil, nil, errors.New("mockPrometheus.QueryRange: out of values")
 	}
 	// Take the first value and move the slice up
 	ret := m.rangeValues[0]
 	m.rangeValues = m.rangeValues[1:]
-	return ret, nil
+	return ret, nil, nil
 }

From 73b09ed8fccbfcc174282106572f4509ac386d84 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 7 Aug 2019 17:35:45 +0530
Subject: [PATCH 377/660] Vendor tsdb 0.10.0

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index b028d6e8ab031..f520e4ed518a3 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -114,6 +114,8 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 	}
 
 	b.chunks = make([]smallChunk, 0, numChunks+1) // allow one extra space in case we want to add new data
+	var reuseIter chunkenc.Iterator
+	var start, end int64
 	for i := uint16(0); i < numChunks; i++ {
 		chunkLen, err := r.ReadUint16()
 		if err != nil {
@@ -130,7 +132,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 			return err
 		}
 
-		start, end, err := firstAndLastTimes(chunk)
+		start, end, reuseIter, err = firstAndLastTimes(chunk, reuseIter)
 		if err != nil {
 			return err
 		}
@@ -172,7 +174,7 @@ func (b *bigchunk) Size() int {
 func (b *bigchunk) NewIterator() Iterator {
 	var it chunkenc.Iterator
 	if len(b.chunks) > 0 {
-		it = b.chunks[0].Iterator()
+		it = b.chunks[0].Iterator(it)
 	} else {
 		it = chunkenc.NewNopIterator()
 	}
@@ -257,9 +259,9 @@ func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
 	}
 
 	if it.curr == nil {
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 	} else if t, _ := it.curr.At(); int64(target) <= t {
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 	}
 
 	for it.curr.Next() {
@@ -281,7 +283,7 @@ func (it *bigchunkIterator) Scan() bool {
 
 	for it.i < len(it.chunks)-1 {
 		it.i++
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 		if it.curr.Next() {
 			return true
 		}
@@ -321,13 +323,13 @@ func (it *bigchunkIterator) Err() error {
 	return nil
 }
 
-func firstAndLastTimes(c chunkenc.Chunk) (int64, int64, error) {
+func firstAndLastTimes(c chunkenc.Chunk, iter chunkenc.Iterator) (int64, int64, chunkenc.Iterator, error) {
 	var (
 		first    int64
 		last     int64
 		firstSet bool
-		iter     = c.Iterator()
 	)
+	iter = c.Iterator(iter)
 	for iter.Next() {
 		t, _ := iter.At()
 		if !firstSet {
@@ -336,5 +338,5 @@ func firstAndLastTimes(c chunkenc.Chunk) (int64, int64, error) {
 		}
 		last = t
 	}
-	return first, last, iter.Err()
+	return first, last, iter, iter.Err()
 }

From 068289ff70967e35b35598a99ce0618c22708bf0 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 7 Aug 2019 13:52:56 +0000
Subject: [PATCH 378/660] Pass context to index write cache

So Jaeger traces get the right parent.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 series_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/series_store.go b/series_store.go
index 7aa74638bcb87..56dbd7cb7f262 100644
--- a/series_store.go
+++ b/series_store.go
@@ -404,7 +404,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 
 	chunks := []Chunk{chunk}
 
-	writeReqs, keysToCache, err := c.calculateIndexEntries(from, through, chunk)
+	writeReqs, keysToCache, err := c.calculateIndexEntries(ctx, from, through, chunk)
 	if err != nil {
 		return err
 	}
@@ -430,7 +430,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 }
 
 // calculateIndexEntries creates a set of batched WriteRequests for all the chunks it is given.
-func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
+func (c *seriesStore) calculateIndexEntries(ctx context.Context, from, through model.Time, chunk Chunk) (WriteBatch, []string, error) {
 	seenIndexEntries := map[string]struct{}{}
 	entries := []IndexEntry{}
 
@@ -443,7 +443,7 @@ func (c *seriesStore) calculateIndexEntries(from, through model.Time, chunk Chun
 	if err != nil {
 		return nil, nil, err
 	}
-	_, _, missing := c.writeDedupeCache.Fetch(context.Background(), keys)
+	_, _, missing := c.writeDedupeCache.Fetch(ctx, keys)
 	// keys and labelEntries are matched in order, but Fetch() may
 	// return missing keys in any order so check against all of them.
 	for _, missingKey := range missing {

From 45d659d72298ea1cc0bde9c7b7fc63c7fcb31bd4 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Thu, 8 Aug 2019 15:33:34 +0530
Subject: [PATCH 379/660] More iterator reuse

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 chunk.go                  | 2 +-
 encoding/bigchunk.go      | 7 ++++++-
 encoding/bigchunk_test.go | 4 ++--
 encoding/chunk.go         | 4 ++--
 encoding/chunk_test.go    | 8 ++++----
 encoding/delta.go         | 2 +-
 encoding/doubledelta.go   | 2 +-
 encoding/varbit.go        | 4 ++--
 8 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/chunk.go b/chunk.go
index c69e1be2a18db..299f85a646654 100644
--- a/chunk.go
+++ b/chunk.go
@@ -336,7 +336,7 @@ func equalByKey(a, b Chunk) bool {
 
 // Samples returns all SamplePairs for the chunk.
 func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, error) {
-	it := c.Data.NewIterator()
+	it := c.Data.NewIterator(nil)
 	interval := metric.Interval{OldestInclusive: from, NewestInclusive: through}
 	return prom_chunk.RangeValues(it, interval)
 }
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index f520e4ed518a3..451de57acabed 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -171,13 +171,18 @@ func (b *bigchunk) Size() int {
 	return sum
 }
 
-func (b *bigchunk) NewIterator() Iterator {
+func (b *bigchunk) NewIterator(reuseIter Iterator) Iterator {
 	var it chunkenc.Iterator
 	if len(b.chunks) > 0 {
 		it = b.chunks[0].Iterator(it)
 	} else {
 		it = chunkenc.NewNopIterator()
 	}
+	if bci, ok := reuseIter.(*bigchunkIterator); ok {
+		bci.bigchunk = b
+		bci.curr = it
+		bci.i = 0
+	}
 	return &bigchunkIterator{
 		bigchunk: b,
 		curr:     it,
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index 6c3bd432eaa46..c8910c53f82ba 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -24,7 +24,7 @@ func TestSliceBiggerChunk(t *testing.T) {
 
 	for i := 0; i < (12*3600/15)-480; i += 120 {
 		s := c.Slice(model.Time(i*step), model.Time((i+479)*step))
-		iter := s.NewIterator()
+		iter := s.NewIterator(nil)
 		for j := i; j < i+480; j++ {
 			require.True(t, iter.Scan())
 			sample := iter.Value()
@@ -38,7 +38,7 @@ func TestSliceBiggerChunk(t *testing.T) {
 	// Test for when the slice does not align perfectly with the sub-chunk boundaries.
 	for i := 0; i < (12*3600/15)-500; i += 100 {
 		s := c.Slice(model.Time(i*step), model.Time((i+500)*step))
-		iter := s.NewIterator()
+		iter := s.NewIterator(nil)
 
 		// Consume some samples until we get to where we want to be.
 		for {
diff --git a/encoding/chunk.go b/encoding/chunk.go
index 3a0116a0b2a98..a01dd5b031b2c 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -44,7 +44,7 @@ type Chunk interface {
 	// or a newly allocated version. In any case, take the returned chunk as
 	// the relevant one and discard the original chunk.
 	Add(sample model.SamplePair) ([]Chunk, error)
-	NewIterator() Iterator
+	NewIterator(Iterator) Iterator
 	Marshal(io.Writer) error
 	UnmarshalFromBuf([]byte) error
 	Encoding() Encoding
@@ -141,7 +141,7 @@ func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error)
 		err             error
 	)
 
-	it := src.NewIterator()
+	it := src.NewIterator(nil)
 	for it.Scan() {
 		if NewChunks, err = head.Add(it.Value()); err != nil {
 			return nil, err
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index d35fe15981b15..e0dbd383c6f0d 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -121,7 +121,7 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 	require.NoError(t, err)
 
 	// Check all the samples are in there.
-	iter := chunk.NewIterator()
+	iter := chunk.NewIterator(nil)
 	for i := 0; i < samples; i++ {
 		require.True(t, iter.Scan())
 		sample := iter.Value()
@@ -145,7 +145,7 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 	chunk := mkChunk(t, encoding, samples)
 
-	iter := chunk.NewIterator()
+	iter := chunk.NewIterator(nil)
 	for i := 0; i < samples; i += samples / 10 {
 		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
 		sample := iter.Value()
@@ -167,7 +167,7 @@ func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 func testChunkSeekForward(t *testing.T, encoding Encoding, samples int) {
 	chunk := mkChunk(t, encoding, samples)
 
-	iter := chunk.NewIterator()
+	iter := chunk.NewIterator(nil)
 	for i := 0; i < samples; i += samples / 10 {
 		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
 		sample := iter.Value()
@@ -190,7 +190,7 @@ func testChunkBatch(t *testing.T, encoding Encoding, samples int) {
 	chunk := mkChunk(t, encoding, samples)
 
 	// Check all the samples are in there.
-	iter := chunk.NewIterator()
+	iter := chunk.NewIterator(nil)
 	for i := 0; i < samples; {
 		require.True(t, iter.Scan())
 		batch := iter.Batch(BatchSize)
diff --git a/encoding/delta.go b/encoding/delta.go
index 76ad6914c650a..120f734c363e3 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -185,7 +185,7 @@ func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
 }
 
 // NewIterator implements chunk.
-func (c *deltaEncodedChunk) NewIterator() Iterator {
+func (c *deltaEncodedChunk) NewIterator(_ Iterator) Iterator {
 	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index ac5470abca35c..cdb803a6e0505 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -193,7 +193,7 @@ func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
 }
 
 // NewIterator( implements chunk.
-func (c *doubleDeltaEncodedChunk) NewIterator() Iterator {
+func (c *doubleDeltaEncodedChunk) NewIterator(_ Iterator) Iterator {
 	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 2005053aa78a4..c2663fe9e2473 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -276,7 +276,7 @@ func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
 }
 
 // NewIterator implements chunk.
-func (c varbitChunk) NewIterator() Iterator {
+func (c varbitChunk) NewIterator(_ Iterator) Iterator {
 	return newVarbitChunkIterator(c)
 }
 
@@ -329,7 +329,7 @@ func (c varbitChunk) marshalLen() int {
 
 // Len implements chunk.  Runs in O(n).
 func (c varbitChunk) Len() int {
-	it := c.NewIterator()
+	it := c.NewIterator(nil)
 	i := 0
 	for ; it.Scan(); i++ {
 	}

From 41db2f4cdc7ef97fbf83daeaa0a9704b7dfa3308 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Mon, 12 Aug 2019 12:22:46 +0530
Subject: [PATCH 380/660] Iterator re-use in Add(...)

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 cache/cache_test.go       |  4 +--
 chunk_store_test.go       |  8 ++---
 chunk_test.go             |  2 +-
 encoding/bigchunk.go      |  8 ++---
 encoding/bigchunk_test.go |  8 ++---
 encoding/chunk.go         | 31 ++++++++++++--------
 encoding/chunk_test.go    |  8 ++---
 encoding/delta.go         | 44 +++++++++++++++++++++-------
 encoding/delta_test.go    |  4 +--
 encoding/doubledelta.go   | 51 +++++++++++++++++++++++++-------
 encoding/varbit.go        | 61 +++++++++++++++++++++++++++------------
 testutils/testutils.go    |  2 +-
 12 files changed, 157 insertions(+), 74 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 8ef1d4c93319f..33571d42e399a 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -27,10 +27,10 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 	chunks := []chunk.Chunk{}
 	for i := 0; i < 100; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		promChunk, _ := prom_chunk.New().Add(model.SamplePair{
+		promChunk, _, _ := prom_chunk.New().Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(i),
-		})
+		}, nil)
 		c := chunk.NewChunk(
 			userID,
 			model.Fingerprint(1),
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 9e91c60ca8de6..b9a16bc83b181 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -579,10 +579,10 @@ func TestChunkStoreRandom(t *testing.T) {
 			const chunkLen = 2 * 3600 // in seconds
 			for i := 0; i < 100; i++ {
 				ts := model.TimeFromUnix(int64(i * chunkLen))
-				chunks, _ := encoding.New().Add(model.SamplePair{
+				chunks, _, _ := encoding.New().Add(model.SamplePair{
 					Timestamp: ts,
 					Value:     model.SampleValue(float64(i)),
-				})
+				}, nil)
 				chunk := NewChunk(
 					userID,
 					model.Fingerprint(1),
@@ -644,10 +644,10 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	const chunkLen = 60 // in seconds
 	for i := 0; i < 24; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		chunks, _ := encoding.New().Add(model.SamplePair{
+		chunks, _, _ := encoding.New().Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(float64(i)),
-		})
+		}, nil)
 		chunk := NewChunk(
 			userID,
 			model.Fingerprint(1),
diff --git a/chunk_test.go b/chunk_test.go
index 8a10bb72a1d46..b0a16cdc09757 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -36,7 +36,7 @@ func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.En
 	c, _ := encoding.NewForEncoding(enc)
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
-		cs, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
+		cs, _, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0}, nil)
 		if err != nil {
 			panic(err)
 		}
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 451de57acabed..6b628a2d6bdc4 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -33,20 +33,20 @@ func newBigchunk() *bigchunk {
 	return &bigchunk{}
 }
 
-func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
+func (b *bigchunk) Add(sample model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
 	if b.remainingSamples == 0 {
 		if bigchunkSizeCapBytes > 0 && b.Size() > bigchunkSizeCapBytes {
-			return addToOverflowChunk(b, sample)
+			return addToOverflowChunk(b, sample, reuseIter)
 		}
 		if err := b.addNextChunk(sample.Timestamp); err != nil {
-			return nil, err
+			return nil, reuseIter, err
 		}
 	}
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
 	b.chunks[len(b.chunks)-1].end = int64(sample.Timestamp)
-	return []Chunk{b}, nil
+	return []Chunk{b}, reuseIter, nil
 }
 
 // addNextChunk adds a new XOR "subchunk" to the internal list of chunks.
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index c8910c53f82ba..923179056c046 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -12,10 +12,10 @@ import (
 func TestSliceBiggerChunk(t *testing.T) {
 	var c Chunk = newBigchunk()
 	for i := 0; i < 12*3600/15; i++ {
-		cs, err := c.Add(model.SamplePair{
+		cs, _, err := c.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
-		})
+		}, nil)
 		require.NoError(t, err)
 		c = cs[0]
 	}
@@ -62,10 +62,10 @@ func BenchmarkBiggerChunkMemory(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		var c Chunk = newBigchunk()
 		for i := 0; i < 12*3600/15; i++ {
-			cs, err := c.Add(model.SamplePair{
+			cs, _, err := c.Add(model.SamplePair{
 				Timestamp: model.Time(i * step),
 				Value:     model.SampleValue(i),
-			})
+			}, nil)
 			require.NoError(b, err)
 			c = cs[0]
 		}
diff --git a/encoding/chunk.go b/encoding/chunk.go
index a01dd5b031b2c..8ce1913d1b00f 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -43,7 +43,7 @@ type Chunk interface {
 	// any. The first chunk returned might be the same as the original one
 	// or a newly allocated version. In any case, take the returned chunk as
 	// the relevant one and discard the original chunk.
-	Add(sample model.SamplePair) ([]Chunk, error)
+	Add(sample model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error)
 	NewIterator(Iterator) Iterator
 	Marshal(io.Writer) error
 	UnmarshalFromBuf([]byte) error
@@ -120,40 +120,40 @@ func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
 // addToOverflowChunk is a utility function that creates a new chunk as overflow
 // chunk, adds the provided sample to it, and returns a chunk slice containing
 // the provided old chunk followed by the new overflow chunk.
-func addToOverflowChunk(c Chunk, s model.SamplePair) ([]Chunk, error) {
-	overflowChunks, err := New().Add(s)
+func addToOverflowChunk(c Chunk, s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
+	overflowChunks, reuseIter, err := New().Add(s, reuseIter)
 	if err != nil {
-		return nil, err
+		return nil, reuseIter, err
 	}
-	return []Chunk{c, overflowChunks[0]}, nil
+	return []Chunk{c, overflowChunks[0]}, reuseIter, nil
 }
 
 // transcodeAndAdd is a utility function that transcodes the dst chunk into the
 // provided src chunk (plus the necessary overflow chunks) and then adds the
 // provided sample. It returns the new chunks (transcoded plus overflow) with
 // the new sample at the end.
-func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
+func transcodeAndAdd(dst Chunk, srcIterator Iterator, s model.SamplePair) ([]Chunk, error) {
 	Ops.WithLabelValues(Transcode).Inc()
 
 	var (
 		head            = dst
 		body, NewChunks []Chunk
 		err             error
+		reuseIter       Iterator
 	)
 
-	it := src.NewIterator(nil)
-	for it.Scan() {
-		if NewChunks, err = head.Add(it.Value()); err != nil {
+	for srcIterator.Scan() {
+		if NewChunks, reuseIter, err = head.Add(srcIterator.Value(), reuseIter); err != nil {
 			return nil, err
 		}
 		body = append(body, NewChunks[:len(NewChunks)-1]...)
 		head = NewChunks[len(NewChunks)-1]
 	}
-	if it.Err() != nil {
-		return nil, it.Err()
+	if srcIterator.Err() != nil {
+		return nil, srcIterator.Err()
 	}
 
-	if NewChunks, err = head.Add(s); err != nil {
+	if NewChunks, reuseIter, err = head.Add(s, reuseIter); err != nil {
 		return nil, err
 	}
 	return append(body, NewChunks...), nil
@@ -184,6 +184,13 @@ func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingC
 	}
 }
 
+func (it *indexAccessingChunkIterator) reset(len int, acc indexAccessor) {
+	it.len = len
+	it.pos = -1
+	it.lastValue = model.ZeroSamplePair
+	it.acc = acc
+}
+
 // scan implements Iterator.
 func (it *indexAccessingChunkIterator) Scan() bool {
 	it.pos++
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index e0dbd383c6f0d..a3799b63fb274 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -43,10 +43,10 @@ func TestLen(t *testing.T) {
 				t.Errorf("chunk type %s should have %d samples, had %d", c.Encoding(), i, c.Len())
 			}
 
-			cs, _ := c.Add(model.SamplePair{
+			cs, _, _ := c.Add(model.SamplePair{
 				Timestamp: model.Time(i),
 				Value:     model.SampleValue(i),
-			})
+			}, nil)
 			c = cs[0]
 		}
 	}
@@ -95,10 +95,10 @@ func mkChunk(t *testing.T, encoding Encoding, samples int) Chunk {
 	require.NoError(t, err)
 
 	for i := 0; i < samples; i++ {
-		chunks, err := chunk.Add(model.SamplePair{
+		chunks, _, err := chunk.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
-		})
+		}, nil)
 		require.NoError(t, err)
 		require.Len(t, chunks, 1)
 		chunk = chunks[0]
diff --git a/encoding/delta.go b/encoding/delta.go
index 120f734c363e3..bfedb36fc056b 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -76,7 +76,7 @@ func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncod
 }
 
 // Add implements chunk.
-func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
 	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
 	if c.Len() == 0 {
 		c = c[:deltaHeaderBytes]
@@ -90,13 +90,13 @@ func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	// Do we generally have space for another sample in this chunk? If not,
 	// overflow into a new one.
 	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s)
+		return addToOverflowChunk(&c, s, reuseIter)
 	}
 
 	baseValue := c.baseValue()
 	dt := s.Timestamp - c.baseTime()
 	if dt < 0 {
-		return nil, fmt.Errorf("time delta is less than zero: %v", dt)
+		return nil, reuseIter, fmt.Errorf("time delta is less than zero: %v", dt)
 	}
 
 	dv := s.Value - baseValue
@@ -127,10 +127,12 @@ func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	}
 	if tb != ntb || vb != nvb || isInt != nInt {
 		if len(c)*2 < cap(c) {
-			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+			reuseIter = c.NewIterator(reuseIter)
+			c, err := transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), reuseIter, s)
+			return c, reuseIter, err
 		}
 		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s)
+		return addToOverflowChunk(&c, s, reuseIter)
 	}
 
 	offset := len(c)
@@ -147,7 +149,7 @@ func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 		// Store the absolute value (no delta) in case of d8.
 		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
 	default:
-		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+		return nil, reuseIter, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
 	}
 
 	offset += int(tb)
@@ -164,7 +166,7 @@ func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
-			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+			return nil, reuseIter, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
 		}
 	} else {
 		switch vb {
@@ -174,10 +176,10 @@ func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 			// Store the absolute value (no delta) in case of d8.
 			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
 		default:
-			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+			return nil, reuseIter, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
 		}
 	}
-	return []Chunk{&c}, nil
+	return []Chunk{&c}, reuseIter, nil
 }
 
 func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
@@ -185,7 +187,29 @@ func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
 }
 
 // NewIterator implements chunk.
-func (c *deltaEncodedChunk) NewIterator(_ Iterator) Iterator {
+func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
+	if ia, ok := reuseIter.(*indexAccessingChunkIterator); ok {
+		if deia, ok := ia.acc.(*deltaEncodedIndexAccessor); ok {
+			deia.c = *c
+			deia.baseT = c.baseTime()
+			deia.baseV = c.baseValue()
+			deia.tBytes = c.timeBytes()
+			deia.vBytes = c.valueBytes()
+			deia.isInt = c.isInt()
+			deia.lastErr = nil
+			ia.reset(c.Len(), deia)
+			return ia
+		}
+		ia.reset(c.Len(), &deltaEncodedIndexAccessor{
+			c:      *c,
+			baseT:  c.baseTime(),
+			baseV:  c.baseValue(),
+			tBytes: c.timeBytes(),
+			vBytes: c.valueBytes(),
+			isInt:  c.isInt(),
+		})
+		return ia
+	}
 	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
index 3c014c60e53da..4d385eae6a5e9 100644
--- a/encoding/delta_test.go
+++ b/encoding/delta_test.go
@@ -82,10 +82,10 @@ func TestUnmarshallingCorruptedDeltaReturnsAnError(t *testing.T) {
 	for _, c := range cases {
 		chunk := c.chunkConstructor(d1, d4, false, ChunkLen)
 
-		cs, err := chunk.Add(model.SamplePair{
+		cs, _, err := chunk.Add(model.SamplePair{
 			Timestamp: model.Now(),
 			Value:     model.SampleValue(100),
-		})
+		}, nil)
 		if err != nil {
 			t.Fatalf("Couldn't add sample to empty %s: %s", c.chunkTypeName, err)
 		}
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index cdb803a6e0505..d28d7985cd507 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -84,17 +84,18 @@ func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doub
 }
 
 // Add implements chunk.
-func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_ []Chunk, ri Iterator, _ error) {
 	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
 	if c.Len() == 0 {
-		return c.addFirstSample(s), nil
+		return c.addFirstSample(s), reuseIter, nil
 	}
 
 	tb := c.timeBytes()
 	vb := c.valueBytes()
 
 	if c.Len() == 1 {
-		return c.addSecondSample(s, tb, vb)
+		ch, err := c.addSecondSample(s, tb, vb)
+		return ch, reuseIter, err
 	}
 
 	remainingBytes := cap(c) - len(c)
@@ -103,7 +104,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	// Do we generally have space for another sample in this chunk? If not,
 	// overflow into a new one.
 	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s)
+		return addToOverflowChunk(&c, s, reuseIter)
 	}
 
 	projectedTime := c.baseTime() + model.Time(c.Len())*c.baseTimeDelta()
@@ -134,10 +135,12 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	}
 	if tb != ntb || vb != nvb || c.isInt() != nInt {
 		if len(c)*2 < cap(c) {
-			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+			reuseIter = c.NewIterator(reuseIter)
+			ch, err := transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), reuseIter, s)
+			return ch, reuseIter, err
 		}
 		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s)
+		return addToOverflowChunk(&c, s, reuseIter)
 	}
 
 	offset := len(c)
@@ -154,7 +157,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 		// Store the absolute value (no delta) in case of d8.
 		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
 	default:
-		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+		return nil, reuseIter, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
 	}
 
 	offset += int(tb)
@@ -171,7 +174,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(ddv)))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
-			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+			return nil, reuseIter, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
 		}
 	} else {
 		switch vb {
@@ -181,10 +184,10 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 			// Store the absolute value (no delta) in case of d8.
 			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
 		default:
-			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+			return nil, reuseIter, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
 		}
 	}
-	return []Chunk{&c}, nil
+	return []Chunk{&c}, reuseIter, nil
 }
 
 // FirstTime implements chunk.
@@ -193,7 +196,33 @@ func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
 }
 
 // NewIterator( implements chunk.
-func (c *doubleDeltaEncodedChunk) NewIterator(_ Iterator) Iterator {
+func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
+	if ia, ok := reuseIter.(*indexAccessingChunkIterator); ok {
+		if deia, ok := ia.acc.(*doubleDeltaEncodedIndexAccessor); ok {
+			deia.c = *c
+			deia.baseT = c.baseTime()
+			deia.baseΔT = c.baseTimeDelta()
+			deia.baseV = c.baseValue()
+			deia.baseΔV = c.baseValueDelta()
+			deia.tBytes = c.timeBytes()
+			deia.vBytes = c.valueBytes()
+			deia.isInt = c.isInt()
+			deia.lastErr = nil
+			ia.reset(c.Len(), deia)
+			return ia
+		}
+		ia.reset(c.Len(), &doubleDeltaEncodedIndexAccessor{
+			c:      *c,
+			baseT:  c.baseTime(),
+			baseΔT: c.baseTimeDelta(),
+			baseV:  c.baseValue(),
+			baseΔV: c.baseValueDelta(),
+			tBytes: c.timeBytes(),
+			vBytes: c.valueBytes(),
+			isInt:  c.isInt(),
+		})
+		return ia
+	}
 	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
diff --git a/encoding/varbit.go b/encoding/varbit.go
index c2663fe9e2473..3e56abdce2415 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -260,23 +260,28 @@ func newVarbitChunk(enc varbitValueEncoding) *varbitChunk {
 }
 
 // Add implements chunk.
-func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
+func (c *varbitChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
 	offset := c.nextSampleOffset()
 	switch {
 	case c.closed():
-		return addToOverflowChunk(c, s)
+		return addToOverflowChunk(c, s, reuseIter)
 	case offset > varbitNextSampleBitOffsetThreshold:
-		return c.addLastSample(s), nil
+		return c.addLastSample(s), reuseIter, nil
 	case offset == varbitFirstSampleBitOffset:
-		return c.addFirstSample(s), nil
+		return c.addFirstSample(s), reuseIter, nil
 	case offset == varbitSecondSampleBitOffset:
-		return c.addSecondSample(s)
+		ch, err := c.addSecondSample(s)
+		return ch, reuseIter, err
 	}
-	return c.addLaterSample(s, offset)
+	return c.addLaterSample(s, offset, reuseIter)
 }
 
 // NewIterator implements chunk.
-func (c varbitChunk) NewIterator(_ Iterator) Iterator {
+func (c varbitChunk) NewIterator(reuseIter Iterator) Iterator {
+	if it, ok := reuseIter.(*varbitChunkIterator); ok {
+		it.resetWithChunk(c)
+		return it
+	}
 	return newVarbitChunkIterator(c)
 }
 
@@ -546,7 +551,7 @@ func (c *varbitChunk) addLastSample(s model.SamplePair) []Chunk {
 
 // addLaterSample is a helper method only used by c.add(). It adds a third or
 // later sample.
-func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk, error) {
+func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIter Iterator) ([]Chunk, Iterator, error) {
 	var (
 		lastTime      = c.lastTime()
 		lastTimeDelta = c.lastTimeDelta()
@@ -556,7 +561,7 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 	)
 
 	if newTimeDelta < 0 {
-		return nil, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
+		return nil, reuseIter, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
 	}
 	if offset == varbitThirdSampleBitOffset {
 		offset, encoding = c.prepForThirdSample(lastValue, s.Value, encoding)
@@ -564,12 +569,12 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 	if newTimeDelta > varbitMaxTimeDelta {
 		// A time delta too great. Still, we can add it as a last sample
 		// before overflowing.
-		return c.addLastSample(s), nil
+		return c.addLastSample(s), reuseIter, nil
 	}
 
 	// Analyze worst case, does it fit? If not, set new sample as the last.
 	if int(offset)+varbitWorstCaseBitsPerSample[encoding] > ChunkLen*8 {
-		return c.addLastSample(s), nil
+		return c.addLastSample(s), reuseIter, nil
 	}
 
 	// Transcoding/overflow decisions first.
@@ -577,26 +582,32 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 		// Cannot go on with zero encoding.
 		if offset > ChunkLen*4 {
 			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s)
+			return addToOverflowChunk(c, s, reuseIter)
 		}
 		if isInt32(s.Value - lastValue) {
 			// Trying int encoding looks promising.
-			return transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
+			reuseIter = c.NewIterator(reuseIter)
+			ch, err := transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), reuseIter, s)
+			return ch, reuseIter, err
 		}
-		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+		reuseIter = c.NewIterator(reuseIter)
+		ch, err := transcodeAndAdd(newVarbitChunk(varbitXOREncoding), reuseIter, s)
+		return ch, reuseIter, err
 	}
 	if encoding == varbitIntDoubleDeltaEncoding && !isInt32(s.Value-lastValue) {
 		// Cannot go on with int encoding.
 		if offset > ChunkLen*4 {
 			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s)
+			return addToOverflowChunk(c, s, reuseIter)
 		}
-		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+		reuseIter = c.NewIterator(reuseIter)
+		ch, err := transcodeAndAdd(newVarbitChunk(varbitXOREncoding), reuseIter, s)
+		return ch, reuseIter, err
 	}
 
 	offset, overflow := c.addDDTime(offset, lastTimeDelta, newTimeDelta)
 	if overflow {
-		return c.addLastSample(s), nil
+		return c.addLastSample(s), reuseIter, nil
 	}
 	switch encoding {
 	case varbitZeroEncoding:
@@ -608,12 +619,12 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 	case varbitDirectEncoding:
 		offset = c.addBitPattern(offset, math.Float64bits(float64(s.Value)), 64)
 	default:
-		return nil, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
+		return nil, reuseIter, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
 	}
 
 	c.setNextSampleOffset(offset)
 	c.setLastSample(s)
-	return []Chunk{c}, nil
+	return []Chunk{c}, reuseIter, nil
 }
 
 func (c varbitChunk) prepForThirdSample(
@@ -1159,6 +1170,18 @@ func (it *varbitChunkIterator) reset() {
 	it.rewound = false
 }
 
+func (it *varbitChunkIterator) resetWithChunk(c varbitChunk) {
+	it.reset()
+
+	it.c = c
+	it.len = c.nextSampleOffset()
+	it.enc = c.valueEncoding()
+
+	it.lastError = nil
+	it.nextT = 0
+	it.nextV = 0
+}
+
 // rewind "rewinds" the chunk iterator by one step. Since one cannot simply
 // rewind a Varbit chunk, the old values have to be provided by the
 // caller. Rewinding an already rewound chunk panics. After a call of scan or
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 0a5f1cf9dbecb..0aa15dc6b23c0 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -80,7 +80,7 @@ func dummyChunk(now model.Time) chunk.Chunk {
 }
 
 func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
-	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	cs, _, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0}, nil)
 	chunk := chunk.NewChunk(
 		userID,
 		client.Fingerprint(metric),

From 075ea5f1f81300330936a08a21baeb5ff058cb86 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 12 Aug 2019 17:12:39 +0100
Subject: [PATCH 381/660] Remove very silly flake; random can include 0.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 9e91c60ca8de6..1d4507675beb0 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -603,7 +603,7 @@ func TestChunkStoreRandom(t *testing.T) {
 			// pick two random numbers and do a query
 			for i := 0; i < 100; i++ {
 				start := rand.Int63n(100 * chunkLen)
-				end := start + rand.Int63n((100*chunkLen)-start)
+				end := start + 1 + rand.Int63n((100*chunkLen)-start)
 				assert.True(t, start < end)
 
 				startTime := model.TimeFromUnix(start)

From 539f363d50f369491b513845f84aa73f0fb34e8d Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 12 Aug 2019 19:22:13 +0100
Subject: [PATCH 382/660] Chunk ranges are inclusive.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 schema_config.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 1a7f10bd38468..061c4ba39f6c0 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -259,11 +259,6 @@ func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string)
 		result      = []Bucket{}
 	)
 
-	// If through ends on the hour, don't include the upcoming hour
-	if through.Unix()%secondsInHour == 0 {
-		throughHour--
-	}
-
 	for i := fromHour; i <= throughHour; i++ {
 		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInHour))
 		relativeThrough := util.Min64(millisecondsInHour, int64(through)-(i*millisecondsInHour))
@@ -284,11 +279,6 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 		result     = []Bucket{}
 	)
 
-	// If through ends on 00:00 of the day, don't include the upcoming day
-	if through.Unix()%secondsInDay == 0 {
-		throughDay--
-	}
-
 	for i := fromDay; i <= throughDay; i++ {
 		// The idea here is that the hash key contains the bucket start time (rounded to
 		// the nearest day).  The range key can contain the offset from that, to the

From 5f913fe6c8977686d686ddb289db40034fadf5fd Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 12 Aug 2019 19:22:37 +0100
Subject: [PATCH 383/660] Remove change random ranges will be too small, or too
 long.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 chunk_store_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 1d4507675beb0..829f7e9ddd3e8 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -592,7 +592,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					},
 					chunks[0],
 					ts,
-					ts.Add(chunkLen*time.Second),
+					ts.Add(chunkLen*time.Second).Add(-1*time.Second),
 				)
 				err := chunk.Encode()
 				require.NoError(t, err)
@@ -602,8 +602,8 @@ func TestChunkStoreRandom(t *testing.T) {
 
 			// pick two random numbers and do a query
 			for i := 0; i < 100; i++ {
-				start := rand.Int63n(100 * chunkLen)
-				end := start + 1 + rand.Int63n((100*chunkLen)-start)
+				start := rand.Int63n(99 * chunkLen)
+				end := start + 1 + rand.Int63n((99*chunkLen)-start)
 				assert.True(t, start < end)
 
 				startTime := model.TimeFromUnix(start)

From aef0f93c248ffccc18d42268372c006e6f5e8e4a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tom.wilkie@gmail.com>
Date: Mon, 12 Aug 2019 19:23:00 +0100
Subject: [PATCH 384/660] Update tests & query caching to deal with fall out.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 schema_caching.go      | 97 +++++++++++++++++-------------------------
 schema_caching_test.go | 26 ++++++-----
 schema_config_test.go  | 36 ++++++++++++----
 3 files changed, 78 insertions(+), 81 deletions(-)

diff --git a/schema_caching.go b/schema_caching.go
index c20e74968efb6..a29b00c73d53c 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -14,80 +14,61 @@ type schemaCaching struct {
 }
 
 func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
-
-	cacheableQueries, err := s.Schema.GetReadQueriesForMetric(cFrom, cThrough, userID, metricName)
-	if err != nil {
-		return nil, err
-	}
-
-	activeQueries, err := s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
-	if err != nil {
-		return nil, err
-	}
-
-	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
+		return s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	})
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
-
-	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabel(cFrom, cThrough, userID, metricName, labelName)
-	if err != nil {
-		return nil, err
-	}
-
-	activeQueries, err := s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
-	if err != nil {
-		return nil, err
-	}
-
-	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
+		return s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+	})
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
-
-	cacheableQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(cFrom, cThrough, userID, metricName, labelName, labelValue)
-	if err != nil {
-		return nil, err
-	}
-
-	activeQueries, err := s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
-	if err != nil {
-		return nil, err
-	}
-
-	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
+		return s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+	})
 }
 
 // If the query resulted in series IDs, use this method to find chunks.
 func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	cFrom, cThrough, from, through := splitTimesByCacheability(from, through, model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix()))
-
-	cacheableQueries, err := s.Schema.GetChunksForSeries(cFrom, cThrough, userID, seriesID)
-	if err != nil {
-		return nil, err
-	}
-
-	activeQueries, err := s.Schema.GetChunksForSeries(from, through, userID, seriesID)
-	if err != nil {
-		return nil, err
-	}
-
-	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
+		return s.Schema.GetChunksForSeries(from, through, userID, seriesID)
+	})
 }
 
-func splitTimesByCacheability(from, through model.Time, cacheBefore model.Time) (model.Time, model.Time, model.Time, model.Time) {
+func (s *schemaCaching) splitTimesByCacheability(from, through model.Time, f func(from, through model.Time) ([]IndexQuery, error)) ([]IndexQuery, error) {
+	var (
+		cacheableQueries []IndexQuery
+		activeQueries    []IndexQuery
+		err              error
+		cacheBefore      = model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())
+	)
+
 	if from.After(cacheBefore) {
-		return 0, 0, from, through
-	}
+		activeQueries, err = f(from, through)
+		if err != nil {
+			return nil, err
+		}
+	} else if through.Before(cacheBefore) {
+		cacheableQueries, err = f(from, through)
+		if err != nil {
+			return nil, err
+		}
+	} else {
+		cacheableQueries, err = f(from, cacheBefore)
+		if err != nil {
+			return nil, err
+		}
 
-	if through.Before(cacheBefore) {
-		return from, through, 0, 0
+		activeQueries, err = f(cacheBefore, through)
+		if err != nil {
+			return nil, err
+		}
 	}
 
-	return from, cacheBefore, cacheBefore, through
+	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
 }
 
 func mergeCacheableAndActiveQueries(cacheableQueries []IndexQuery, activeQueries []IndexQuery) []IndexQuery {
diff --git a/schema_caching_test.go b/schema_caching_test.go
index e02993d0a8f33..c9141eb6b4f85 100644
--- a/schema_caching_test.go
+++ b/schema_caching_test.go
@@ -1,10 +1,12 @@
 package chunk
 
 import (
+	"strconv"
 	"testing"
 	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 )
@@ -26,7 +28,7 @@ func TestCachingSchema(t *testing.T) {
 
 	mtime.NowForce(baseTime)
 
-	for _, tc := range []struct {
+	for i, tc := range []struct {
 		from, through time.Time
 
 		cacheableIdx int
@@ -56,20 +58,16 @@ func TestCachingSchema(t *testing.T) {
 			0,
 		},
 	} {
-		have, err := schema.GetReadQueriesForMetric(
-			model.TimeFromUnix(tc.from.Unix()), model.TimeFromUnix(tc.through.Unix()),
-			userID, "foo",
-		)
-		if err != nil {
-			t.Fatal(err)
-		}
+		t.Run(strconv.Itoa(i), func(t *testing.T) {
+			have, err := schema.GetReadQueriesForMetric(
+				model.TimeFromUnix(tc.from.Unix()), model.TimeFromUnix(tc.through.Unix()),
+				userID, "foo",
+			)
+			require.NoError(t, err)
 
-		for i := range have {
-			if i <= tc.cacheableIdx {
-				require.True(t, have[i].Immutable)
-			} else {
-				require.False(t, have[i].Immutable)
+			for i := range have {
+				assert.Equal(t, have[i].Immutable, i <= tc.cacheableIdx, i)
 			}
-		}
+		})
 	}
 }
diff --git a/schema_config_test.go b/schema_config_test.go
index 6e401ec3bd110..f20587ace0ff0 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -1,11 +1,11 @@
 package chunk
 
 import (
-	"reflect"
 	"testing"
 	"time"
 
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
@@ -34,7 +34,12 @@ func TestHourlyBuckets(t *testing.T) {
 				from:    model.TimeFromUnix(0),
 				through: model.TimeFromUnix(0),
 			},
-			[]Bucket{},
+			[]Bucket{{
+				from:      0,
+				through:   0,
+				tableName: "table",
+				hashKey:   "0:0",
+			}},
 		},
 		{
 			"30 minute window",
@@ -60,6 +65,11 @@ func TestHourlyBuckets(t *testing.T) {
 				through:   3600 * 1000, // ms
 				tableName: "table",
 				hashKey:   "0:0",
+			}, {
+				from:      0,
+				through:   0, // ms
+				tableName: "table",
+				hashKey:   "0:1",
 			}},
 		},
 		{
@@ -88,9 +98,8 @@ func TestHourlyBuckets(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if got := cfg.hourlyBuckets(tt.args.from, tt.args.through, userID); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("SchemaConfig.dailyBuckets() = %v, want %v", got, tt.want)
-			}
+			got := cfg.hourlyBuckets(tt.args.from, tt.args.through, userID)
+			assert.Equal(t, tt.want, got)
 		})
 	}
 }
@@ -120,7 +129,12 @@ func TestDailyBuckets(t *testing.T) {
 				from:    model.TimeFromUnix(0),
 				through: model.TimeFromUnix(0),
 			},
-			[]Bucket{},
+			[]Bucket{{
+				from:      0,
+				through:   0,
+				tableName: "table",
+				hashKey:   "0:d0",
+			}},
 		},
 		{
 			"6 hour window",
@@ -146,6 +160,11 @@ func TestDailyBuckets(t *testing.T) {
 				through:   (24 * 3600) * 1000, // ms
 				tableName: "table",
 				hashKey:   "0:d0",
+			}, {
+				from:      0,
+				through:   0,
+				tableName: "table",
+				hashKey:   "0:d1",
 			}},
 		},
 		{
@@ -174,9 +193,8 @@ func TestDailyBuckets(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			if got := cfg.dailyBuckets(tt.args.from, tt.args.through, userID); !reflect.DeepEqual(got, tt.want) {
-				t.Errorf("SchemaConfig.dailyBuckets() = %v, want %v", got, tt.want)
-			}
+			got := cfg.dailyBuckets(tt.args.from, tt.args.through, userID)
+			assert.Equal(t, tt.want, got)
 		})
 	}
 }

From 23f4991d77a38348ad471cd17ce5f37611ca941b Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 13 Aug 2019 10:15:43 +0100
Subject: [PATCH 385/660] Add a histogram for cached value sizes. (#1569)

* Add a histrogram for cached value sizes.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Set the name for the chunk cache metrics, so its not blank.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/instrumented.go | 36 ++++++++++++++++++++++++++++++------
 chunk_store_utils.go  |  1 +
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/cache/instrumented.go b/cache/instrumented.go
index 28b0c6fa934b6..67c994d1c7705 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -29,31 +29,51 @@ var (
 		Name:      "cache_hits",
 		Help:      "Total count of keys found in cache.",
 	}, []string{"name"})
+
+	valueSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "cache_value_size_bytes",
+		Help:      "Size of values in the cache.",
+		// Cached chunks are generally in the KBs, but cached index can
+		// get big.  Histogram goes from 1KB to 4MB.
+		// 1024 * 4^(7-1) = 4MB
+		Buckets: prometheus.ExponentialBuckets(1024, 4, 7),
+	}, []string{"name", "method"})
 )
 
 func init() {
 	requestDuration.Register()
 	prometheus.MustRegister(fetchedKeys)
 	prometheus.MustRegister(hits)
+	prometheus.MustRegister(valueSize)
 }
 
 // Instrument returns an instrumented cache.
 func Instrument(name string, cache Cache) Cache {
 	return &instrumentedCache{
-		name:        name,
-		fetchedKeys: fetchedKeys.WithLabelValues(name),
-		hits:        hits.WithLabelValues(name),
-		Cache:       cache,
+		name:  name,
+		Cache: cache,
+
+		fetchedKeys:      fetchedKeys.WithLabelValues(name),
+		hits:             hits.WithLabelValues(name),
+		storedValueSize:  valueSize.WithLabelValues(name, "store"),
+		fetchedValueSize: valueSize.WithLabelValues(name, "fetch"),
 	}
 }
 
 type instrumentedCache struct {
-	name              string
-	fetchedKeys, hits prometheus.Counter
+	name string
 	Cache
+
+	fetchedKeys, hits                 prometheus.Counter
+	storedValueSize, fetchedValueSize prometheus.Observer
 }
 
 func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	for j := range bufs {
+		i.storedValueSize.Observe(float64(len(bufs[j])))
+	}
+
 	method := i.name + ".store"
 	instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
@@ -82,6 +102,10 @@ func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string,
 
 	i.fetchedKeys.Add(float64(len(keys)))
 	i.hits.Add(float64(len(found)))
+	for j := range bufs {
+		i.fetchedValueSize.Observe(float64(len(bufs[j])))
+	}
+
 	return found, bufs, missing
 }
 
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index fe98dcf64386e..9a5ef76ea3742 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -108,6 +108,7 @@ type decodeResponse struct {
 
 // NewChunkFetcher makes a new ChunkFetcher.
 func NewChunkFetcher(cfg cache.Config, cacheStubs bool, storage ObjectClient) (*Fetcher, error) {
+	cfg.Prefix = "chunks"
 	cache, err := cache.New(cfg)
 	if err != nil {
 		return nil, err

From c990266f291158800ecad5a5099c474d8a778e66 Mon Sep 17 00:00:00 2001
From: Guangming Wang <guangming.wang@daocloud.io>
Date: Wed, 14 Aug 2019 04:27:37 +0800
Subject: [PATCH 386/660] table_manager.go: flagset help message typo fix
 (#1577)

Signed-off-by: Guangming Wang <guangming.wang@daocloud.io>
---
 table_manager.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 5404978be5ce7..52b8534fdffb6 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -98,10 +98,10 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
 	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
-	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand througput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
+	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
 	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
 	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
-	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand througput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
+	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
 
 	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
 	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)

From c29268811ed88b560d68375dd7cafa218c8d34fa Mon Sep 17 00:00:00 2001
From: Robert Fratto <robertfratto@gmail.com>
Date: Wed, 14 Aug 2019 05:08:19 -0400
Subject: [PATCH 387/660] Support consistent hashing in
 cache.NewMemcachedClient (#1554)

* chunk/cache: support consistent hashing in cache.NewMemcachedClient

cache.MemcachedClientConfig has been updated with a new boolean
variable ConsistentHash, available as consistent_hash in yaml and
memcached.consistent-hash as a flag.

When ConsistentHash is true, the MemcachedClient will use the
newly created cache.MemcachedJumpHashSelector for server distribution.

Jump hash is a consistent hashing algorithm that given a key and a
number of buckets, returns a bucket number in the range [0, numBuckets).
Adding or removing a bucket only results in 1/N keys being moved.

A downside to using jump hash is that buckets can not be arbitrarily
removed from the list; it effectively acts as a stack and only supports
adding or removing buckets from the end. Therefore, jump hash is most
effective when the servers are ordered and where the order is
predicable. A good example of this is Kubernete's StatefulSet with a
headless service. DNS names will be in the form memcached-[pod number],
where the pod number will grow and shrink in the way that numBuckets
does. There will never be a gap in the servers when scaling up or down.

Signed-off-by: Robert Fratto <robert.fratto@grafana.com>

* chunk/cache: Add natural sort in jump hash server selector

Signed-off-by: Robert Fratto <robert.fratto@grafana.com>

* chunk/cache: add test to demonstrate natsort works

TestNatSort has been added to validate that the natsort package works as
expected when sorting a list of servers that are returned from SRV
lookups. The example used corresponds to SRV records that would be
returned for a k8s headless service backed by a StatefulSet.

Signed-off-by: Robert Fratto <robert.fratto@grafana.com>

* Add changelog entry.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cache/memcached_client.go               |  21 +++-
 cache/memcached_client_selector.go      | 131 ++++++++++++++++++++++++
 cache/memcached_client_selector_test.go |  65 ++++++++++++
 3 files changed, 213 insertions(+), 4 deletions(-)
 create mode 100644 cache/memcached_client_selector.go
 create mode 100644 cache/memcached_client_selector_test.go

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 43e40cbbbaaa7..d8ece7594dbf7 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -19,11 +19,16 @@ type MemcachedClient interface {
 	Set(item *memcache.Item) error
 }
 
+type serverSelector interface {
+	memcache.ServerSelector
+	SetServers(servers ...string) error
+}
+
 // memcachedClient is a memcache client that gets its server list from SRV
 // records, and periodically updates that ServerList.
 type memcachedClient struct {
 	*memcache.Client
-	serverList *memcache.ServerList
+	serverList serverSelector
 	hostname   string
 	service    string
 
@@ -38,6 +43,7 @@ type MemcachedClientConfig struct {
 	Timeout        time.Duration `yaml:"timeout,omitempty"`
 	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
 	UpdateInterval time.Duration `yaml:"update_interval,omitempty"`
+	ConsistentHash bool          `yaml:"consistent_hash,omitempty"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -47,19 +53,26 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
+	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", false, description+"Use consistent hashing to distribute to memcache servers.")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
 func NewMemcachedClient(cfg MemcachedClientConfig) MemcachedClient {
-	var servers memcache.ServerList
-	client := memcache.NewFromSelector(&servers)
+	var selector serverSelector
+	if cfg.ConsistentHash {
+		selector = &MemcachedJumpHashSelector{}
+	} else {
+		selector = &memcache.ServerList{}
+	}
+
+	client := memcache.NewFromSelector(selector)
 	client.Timeout = cfg.Timeout
 	client.MaxIdleConns = cfg.MaxIdleConns
 
 	newClient := &memcachedClient{
 		Client:     client,
-		serverList: &servers,
+		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
 		quit:       make(chan struct{}),
diff --git a/cache/memcached_client_selector.go b/cache/memcached_client_selector.go
new file mode 100644
index 0000000000000..e3e1c960ca769
--- /dev/null
+++ b/cache/memcached_client_selector.go
@@ -0,0 +1,131 @@
+package cache
+
+import (
+	"net"
+	"strings"
+	"sync"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/cespare/xxhash"
+	"github.com/facette/natsort"
+)
+
+// MemcachedJumpHashSelector implements the memcache.ServerSelector
+// interface. MemcachedJumpHashSelector utilizes a jump hash to
+// distribute keys to servers.
+//
+// While adding or removing servers only requires 1/N keys to move,
+// servers are treated as a stack and can only be pushed/popped.
+// Therefore, MemcachedJumpHashSelector works best for servers
+// with consistent DNS names where the naturally sorted order
+// is predictable.
+type MemcachedJumpHashSelector struct {
+	mu    sync.RWMutex
+	addrs []net.Addr
+}
+
+// staticAddr caches the Network() and String() values from
+// any net.Addr.
+//
+// Copied from github.com/bradfitz/gomemcache/selector.go.
+type staticAddr struct {
+	network, str string
+}
+
+func newStaticAddr(a net.Addr) net.Addr {
+	return &staticAddr{
+		network: a.Network(),
+		str:     a.String(),
+	}
+}
+
+func (a *staticAddr) Network() string { return a.network }
+func (a *staticAddr) String() string  { return a.str }
+
+// SetServers changes a MemcachedJumpHashSelector's set of servers at
+// runtime and is safe for concurrent use by multiple goroutines.
+//
+// Each server is given equal weight. A server is given more weight
+// if it's listed multiple times.
+//
+// SetServers returns an error if any of the server names fail to
+// resolve. No attempt is made to connect to the server. If any
+// error occurs, no changes are made to the internal server list.
+//
+// To minimize the number of rehashes for keys when scaling the
+// number of servers in subsequent calls to SetServers, servers
+// are stored in natural sort order.
+func (s *MemcachedJumpHashSelector) SetServers(servers ...string) error {
+	sortedServers := make([]string, len(servers))
+	copy(sortedServers, servers)
+	natsort.Sort(sortedServers)
+
+	naddrs := make([]net.Addr, len(sortedServers))
+	for i, server := range sortedServers {
+		if strings.Contains(server, "/") {
+			addr, err := net.ResolveUnixAddr("unix", server)
+			if err != nil {
+				return err
+			}
+			naddrs[i] = newStaticAddr(addr)
+		} else {
+			tcpAddr, err := net.ResolveTCPAddr("tcp", server)
+			if err != nil {
+				return err
+			}
+			naddrs[i] = newStaticAddr(tcpAddr)
+		}
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.addrs = naddrs
+	return nil
+}
+
+// jumpHash consistently chooses a hash bucket number in the range [0, numBuckets) for the given key.
+// numBuckets must be >= 1.
+//
+// Copied from github.com/dgryski/go-jump/blob/master/jump.go
+func jumpHash(key uint64, numBuckets int) int32 {
+
+	var b int64 = -1
+	var j int64
+
+	for j < int64(numBuckets) {
+		b = j
+		key = key*2862933555777941757 + 1
+		j = int64(float64(b+1) * (float64(int64(1)<<31) / float64((key>>33)+1)))
+	}
+
+	return int32(b)
+}
+
+// PickServer returns the server address that a given item
+// should be shared onto.
+func (s *MemcachedJumpHashSelector) PickServer(key string) (net.Addr, error) {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	if len(s.addrs) == 0 {
+		return nil, memcache.ErrNoServers
+	} else if len(s.addrs) == 1 {
+		return s.addrs[0], nil
+	}
+	cs := xxhash.Sum64String(key)
+	idx := jumpHash(cs, len(s.addrs))
+	return s.addrs[idx], nil
+}
+
+// Each iterates over each server and calls the given function.
+// If f returns a non-nil error, iteration will stop and that
+// error will be returned.
+func (s *MemcachedJumpHashSelector) Each(f func(net.Addr) error) error {
+	s.mu.RLock()
+	defer s.mu.RUnlock()
+	for _, def := range s.addrs {
+		if err := f(def); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/cache/memcached_client_selector_test.go b/cache/memcached_client_selector_test.go
new file mode 100644
index 0000000000000..f7134ce99845e
--- /dev/null
+++ b/cache/memcached_client_selector_test.go
@@ -0,0 +1,65 @@
+package cache_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/facette/natsort"
+	"github.com/stretchr/testify/require"
+)
+
+func TestNatSort(t *testing.T) {
+	// Validate that the order of SRV records returned by a DNS
+	// lookup for a k8s StatefulSet are ordered as expected when
+	// a natsort is done.
+	input := []string{
+		"memcached-10.memcached.cortex.svc.cluster.local.",
+		"memcached-1.memcached.cortex.svc.cluster.local.",
+		"memcached-6.memcached.cortex.svc.cluster.local.",
+		"memcached-3.memcached.cortex.svc.cluster.local.",
+		"memcached-25.memcached.cortex.svc.cluster.local.",
+	}
+
+	expected := []string{
+		"memcached-1.memcached.cortex.svc.cluster.local.",
+		"memcached-3.memcached.cortex.svc.cluster.local.",
+		"memcached-6.memcached.cortex.svc.cluster.local.",
+		"memcached-10.memcached.cortex.svc.cluster.local.",
+		"memcached-25.memcached.cortex.svc.cluster.local.",
+	}
+
+	natsort.Sort(input)
+	require.Equal(t, expected, input)
+}
+
+func TestMemcachedJumpHashSelector_PickSever(t *testing.T) {
+	s := cache.MemcachedJumpHashSelector{}
+	err := s.SetServers("google.com:80", "microsoft.com:80", "duckduckgo.com:80")
+	require.NoError(t, err)
+
+	// We store the string representation instead of the net.Addr
+	// to make sure different IPs were discovered during SetServers
+	distribution := make(map[string]int)
+
+	for i := 0; i < 100; i++ {
+		key := fmt.Sprintf("key-%d", i)
+		addr, err := s.PickServer(key)
+		require.NoError(t, err)
+		distribution[addr.String()]++
+	}
+
+	// All of the servers should have been returned at least
+	// once
+	require.Len(t, distribution, 3)
+	for _, v := range distribution {
+		require.NotZero(t, v)
+	}
+}
+
+func TestMemcachedJumpHashSelector_PickSever_ErrNoServers(t *testing.T) {
+	s := cache.MemcachedJumpHashSelector{}
+	_, err := s.PickServer("foo")
+	require.Error(t, memcache.ErrNoServers, err)
+}

From ccf8e6c2cfb816c1bf0c5b31d9bbc227c1cdecae Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 14 Aug 2019 19:39:26 +0530
Subject: [PATCH 388/660] Small fixes

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go    |  1 +
 encoding/delta.go       | 29 +++++++++++++++--------------
 encoding/doubledelta.go | 33 +++++++++++++++++----------------
 3 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 6b628a2d6bdc4..e1ea25006a751 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -182,6 +182,7 @@ func (b *bigchunk) NewIterator(reuseIter Iterator) Iterator {
 		bci.bigchunk = b
 		bci.curr = it
 		bci.i = 0
+		return bci
 	}
 	return &bigchunkIterator{
 		bigchunk: b,
diff --git a/encoding/delta.go b/encoding/delta.go
index bfedb36fc056b..2f0ee16c6f327 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -188,19 +188,9 @@ func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
 
 // NewIterator implements chunk.
 func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
-	if ia, ok := reuseIter.(*indexAccessingChunkIterator); ok {
-		if deia, ok := ia.acc.(*deltaEncodedIndexAccessor); ok {
-			deia.c = *c
-			deia.baseT = c.baseTime()
-			deia.baseV = c.baseValue()
-			deia.tBytes = c.timeBytes()
-			deia.vBytes = c.valueBytes()
-			deia.isInt = c.isInt()
-			deia.lastErr = nil
-			ia.reset(c.Len(), deia)
-			return ia
-		}
-		ia.reset(c.Len(), &deltaEncodedIndexAccessor{
+	ia, ok := reuseIter.(*indexAccessingChunkIterator)
+	if !ok {
+		return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
 			c:      *c,
 			baseT:  c.baseTime(),
 			baseV:  c.baseValue(),
@@ -208,9 +198,19 @@ func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 			vBytes: c.valueBytes(),
 			isInt:  c.isInt(),
 		})
+	}
+	if deia, ok := ia.acc.(*deltaEncodedIndexAccessor); ok {
+		deia.c = *c
+		deia.baseT = c.baseTime()
+		deia.baseV = c.baseValue()
+		deia.tBytes = c.timeBytes()
+		deia.vBytes = c.valueBytes()
+		deia.isInt = c.isInt()
+		deia.lastErr = nil
+		ia.reset(c.Len(), deia)
 		return ia
 	}
-	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
+	ia.reset(c.Len(), &deltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
 		baseV:  c.baseValue(),
@@ -218,6 +218,7 @@ func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 		vBytes: c.valueBytes(),
 		isInt:  c.isInt(),
 	})
+	return ia
 }
 
 // Marshal implements chunk.
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index d28d7985cd507..df434fc50074b 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -197,21 +197,9 @@ func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
 
 // NewIterator( implements chunk.
 func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
-	if ia, ok := reuseIter.(*indexAccessingChunkIterator); ok {
-		if deia, ok := ia.acc.(*doubleDeltaEncodedIndexAccessor); ok {
-			deia.c = *c
-			deia.baseT = c.baseTime()
-			deia.baseΔT = c.baseTimeDelta()
-			deia.baseV = c.baseValue()
-			deia.baseΔV = c.baseValueDelta()
-			deia.tBytes = c.timeBytes()
-			deia.vBytes = c.valueBytes()
-			deia.isInt = c.isInt()
-			deia.lastErr = nil
-			ia.reset(c.Len(), deia)
-			return ia
-		}
-		ia.reset(c.Len(), &doubleDeltaEncodedIndexAccessor{
+	ia, ok := reuseIter.(*indexAccessingChunkIterator)
+	if !ok {
+		return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
 			c:      *c,
 			baseT:  c.baseTime(),
 			baseΔT: c.baseTimeDelta(),
@@ -221,9 +209,21 @@ func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 			vBytes: c.valueBytes(),
 			isInt:  c.isInt(),
 		})
+	}
+	if deia, ok := ia.acc.(*doubleDeltaEncodedIndexAccessor); ok {
+		deia.c = *c
+		deia.baseT = c.baseTime()
+		deia.baseΔT = c.baseTimeDelta()
+		deia.baseV = c.baseValue()
+		deia.baseΔV = c.baseValueDelta()
+		deia.tBytes = c.timeBytes()
+		deia.vBytes = c.valueBytes()
+		deia.isInt = c.isInt()
+		deia.lastErr = nil
+		ia.reset(c.Len(), deia)
 		return ia
 	}
-	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
+	ia.reset(c.Len(), &doubleDeltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
 		baseΔT: c.baseTimeDelta(),
@@ -233,6 +233,7 @@ func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 		vBytes: c.valueBytes(),
 		isInt:  c.isInt(),
 	})
+	return ia
 }
 
 func (c *doubleDeltaEncodedChunk) Slice(_, _ model.Time) Chunk {

From be42bf146b3489ae540db4de7b15aac1da5f1094 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 14 Aug 2019 20:11:24 +0530
Subject: [PATCH 389/660] Revert changes to push path

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 cache/cache_test.go       |  4 ++--
 chunk_store_test.go       |  8 ++++----
 chunk_test.go             |  2 +-
 encoding/bigchunk.go      |  8 ++++----
 encoding/bigchunk_test.go |  8 ++++----
 encoding/chunk.go         | 24 +++++++++++-----------
 encoding/chunk_test.go    |  8 ++++----
 encoding/delta.go         | 20 ++++++++----------
 encoding/delta_test.go    |  4 ++--
 encoding/doubledelta.go   | 23 +++++++++------------
 encoding/varbit.go        | 43 ++++++++++++++++-----------------------
 testutils/testutils.go    |  2 +-
 12 files changed, 71 insertions(+), 83 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 33571d42e399a..8ef1d4c93319f 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -27,10 +27,10 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 	chunks := []chunk.Chunk{}
 	for i := 0; i < 100; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		promChunk, _, _ := prom_chunk.New().Add(model.SamplePair{
+		promChunk, _ := prom_chunk.New().Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(i),
-		}, nil)
+		})
 		c := chunk.NewChunk(
 			userID,
 			model.Fingerprint(1),
diff --git a/chunk_store_test.go b/chunk_store_test.go
index b9a16bc83b181..9e91c60ca8de6 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -579,10 +579,10 @@ func TestChunkStoreRandom(t *testing.T) {
 			const chunkLen = 2 * 3600 // in seconds
 			for i := 0; i < 100; i++ {
 				ts := model.TimeFromUnix(int64(i * chunkLen))
-				chunks, _, _ := encoding.New().Add(model.SamplePair{
+				chunks, _ := encoding.New().Add(model.SamplePair{
 					Timestamp: ts,
 					Value:     model.SampleValue(float64(i)),
-				}, nil)
+				})
 				chunk := NewChunk(
 					userID,
 					model.Fingerprint(1),
@@ -644,10 +644,10 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	const chunkLen = 60 // in seconds
 	for i := 0; i < 24; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		chunks, _, _ := encoding.New().Add(model.SamplePair{
+		chunks, _ := encoding.New().Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(float64(i)),
-		}, nil)
+		})
 		chunk := NewChunk(
 			userID,
 			model.Fingerprint(1),
diff --git a/chunk_test.go b/chunk_test.go
index b0a16cdc09757..8a10bb72a1d46 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -36,7 +36,7 @@ func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.En
 	c, _ := encoding.NewForEncoding(enc)
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
-		cs, _, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0}, nil)
+		cs, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
 		if err != nil {
 			panic(err)
 		}
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index e1ea25006a751..eb71417be9bea 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -33,20 +33,20 @@ func newBigchunk() *bigchunk {
 	return &bigchunk{}
 }
 
-func (b *bigchunk) Add(sample model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
+func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 	if b.remainingSamples == 0 {
 		if bigchunkSizeCapBytes > 0 && b.Size() > bigchunkSizeCapBytes {
-			return addToOverflowChunk(b, sample, reuseIter)
+			return addToOverflowChunk(b, sample)
 		}
 		if err := b.addNextChunk(sample.Timestamp); err != nil {
-			return nil, reuseIter, err
+			return nil, err
 		}
 	}
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
 	b.chunks[len(b.chunks)-1].end = int64(sample.Timestamp)
-	return []Chunk{b}, reuseIter, nil
+	return []Chunk{b}, nil
 }
 
 // addNextChunk adds a new XOR "subchunk" to the internal list of chunks.
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index 923179056c046..c8910c53f82ba 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -12,10 +12,10 @@ import (
 func TestSliceBiggerChunk(t *testing.T) {
 	var c Chunk = newBigchunk()
 	for i := 0; i < 12*3600/15; i++ {
-		cs, _, err := c.Add(model.SamplePair{
+		cs, err := c.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
-		}, nil)
+		})
 		require.NoError(t, err)
 		c = cs[0]
 	}
@@ -62,10 +62,10 @@ func BenchmarkBiggerChunkMemory(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		var c Chunk = newBigchunk()
 		for i := 0; i < 12*3600/15; i++ {
-			cs, _, err := c.Add(model.SamplePair{
+			cs, err := c.Add(model.SamplePair{
 				Timestamp: model.Time(i * step),
 				Value:     model.SampleValue(i),
-			}, nil)
+			})
 			require.NoError(b, err)
 			c = cs[0]
 		}
diff --git a/encoding/chunk.go b/encoding/chunk.go
index 8ce1913d1b00f..9ce669a939751 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -43,7 +43,7 @@ type Chunk interface {
 	// any. The first chunk returned might be the same as the original one
 	// or a newly allocated version. In any case, take the returned chunk as
 	// the relevant one and discard the original chunk.
-	Add(sample model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error)
+	Add(sample model.SamplePair) ([]Chunk, error)
 	NewIterator(Iterator) Iterator
 	Marshal(io.Writer) error
 	UnmarshalFromBuf([]byte) error
@@ -120,40 +120,40 @@ func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
 // addToOverflowChunk is a utility function that creates a new chunk as overflow
 // chunk, adds the provided sample to it, and returns a chunk slice containing
 // the provided old chunk followed by the new overflow chunk.
-func addToOverflowChunk(c Chunk, s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
-	overflowChunks, reuseIter, err := New().Add(s, reuseIter)
+func addToOverflowChunk(c Chunk, s model.SamplePair) ([]Chunk, error) {
+	overflowChunks, err := New().Add(s)
 	if err != nil {
-		return nil, reuseIter, err
+		return nil, err
 	}
-	return []Chunk{c, overflowChunks[0]}, reuseIter, nil
+	return []Chunk{c, overflowChunks[0]}, nil
 }
 
 // transcodeAndAdd is a utility function that transcodes the dst chunk into the
 // provided src chunk (plus the necessary overflow chunks) and then adds the
 // provided sample. It returns the new chunks (transcoded plus overflow) with
 // the new sample at the end.
-func transcodeAndAdd(dst Chunk, srcIterator Iterator, s model.SamplePair) ([]Chunk, error) {
+func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
 	Ops.WithLabelValues(Transcode).Inc()
 
 	var (
 		head            = dst
 		body, NewChunks []Chunk
 		err             error
-		reuseIter       Iterator
 	)
 
-	for srcIterator.Scan() {
-		if NewChunks, reuseIter, err = head.Add(srcIterator.Value(), reuseIter); err != nil {
+	it := src.NewIterator(nil)
+	for it.Scan() {
+		if NewChunks, err = head.Add(it.Value()); err != nil {
 			return nil, err
 		}
 		body = append(body, NewChunks[:len(NewChunks)-1]...)
 		head = NewChunks[len(NewChunks)-1]
 	}
-	if srcIterator.Err() != nil {
-		return nil, srcIterator.Err()
+	if it.Err() != nil {
+		return nil, it.Err()
 	}
 
-	if NewChunks, reuseIter, err = head.Add(s, reuseIter); err != nil {
+	if NewChunks, err = head.Add(s); err != nil {
 		return nil, err
 	}
 	return append(body, NewChunks...), nil
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index a3799b63fb274..e0dbd383c6f0d 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -43,10 +43,10 @@ func TestLen(t *testing.T) {
 				t.Errorf("chunk type %s should have %d samples, had %d", c.Encoding(), i, c.Len())
 			}
 
-			cs, _, _ := c.Add(model.SamplePair{
+			cs, _ := c.Add(model.SamplePair{
 				Timestamp: model.Time(i),
 				Value:     model.SampleValue(i),
-			}, nil)
+			})
 			c = cs[0]
 		}
 	}
@@ -95,10 +95,10 @@ func mkChunk(t *testing.T, encoding Encoding, samples int) Chunk {
 	require.NoError(t, err)
 
 	for i := 0; i < samples; i++ {
-		chunks, _, err := chunk.Add(model.SamplePair{
+		chunks, err := chunk.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
-		}, nil)
+		})
 		require.NoError(t, err)
 		require.Len(t, chunks, 1)
 		chunk = chunks[0]
diff --git a/encoding/delta.go b/encoding/delta.go
index 2f0ee16c6f327..e72bf789cb843 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -76,7 +76,7 @@ func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncod
 }
 
 // Add implements chunk.
-func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
+func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
 	if c.Len() == 0 {
 		c = c[:deltaHeaderBytes]
@@ -90,13 +90,13 @@ func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk,
 	// Do we generally have space for another sample in this chunk? If not,
 	// overflow into a new one.
 	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s, reuseIter)
+		return addToOverflowChunk(&c, s)
 	}
 
 	baseValue := c.baseValue()
 	dt := s.Timestamp - c.baseTime()
 	if dt < 0 {
-		return nil, reuseIter, fmt.Errorf("time delta is less than zero: %v", dt)
+		return nil, fmt.Errorf("time delta is less than zero: %v", dt)
 	}
 
 	dv := s.Value - baseValue
@@ -127,12 +127,10 @@ func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk,
 	}
 	if tb != ntb || vb != nvb || isInt != nInt {
 		if len(c)*2 < cap(c) {
-			reuseIter = c.NewIterator(reuseIter)
-			c, err := transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), reuseIter, s)
-			return c, reuseIter, err
+			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
 		}
 		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s, reuseIter)
+		return addToOverflowChunk(&c, s)
 	}
 
 	offset := len(c)
@@ -149,7 +147,7 @@ func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk,
 		// Store the absolute value (no delta) in case of d8.
 		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
 	default:
-		return nil, reuseIter, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
 	}
 
 	offset += int(tb)
@@ -166,7 +164,7 @@ func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk,
 			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
-			return nil, reuseIter, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
 		}
 	} else {
 		switch vb {
@@ -176,10 +174,10 @@ func (c deltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk,
 			// Store the absolute value (no delta) in case of d8.
 			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
 		default:
-			return nil, reuseIter, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
 		}
 	}
-	return []Chunk{&c}, reuseIter, nil
+	return []Chunk{&c}, nil
 }
 
 func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
index 4d385eae6a5e9..3c014c60e53da 100644
--- a/encoding/delta_test.go
+++ b/encoding/delta_test.go
@@ -82,10 +82,10 @@ func TestUnmarshallingCorruptedDeltaReturnsAnError(t *testing.T) {
 	for _, c := range cases {
 		chunk := c.chunkConstructor(d1, d4, false, ChunkLen)
 
-		cs, _, err := chunk.Add(model.SamplePair{
+		cs, err := chunk.Add(model.SamplePair{
 			Timestamp: model.Now(),
 			Value:     model.SampleValue(100),
-		}, nil)
+		})
 		if err != nil {
 			t.Fatalf("Couldn't add sample to empty %s: %s", c.chunkTypeName, err)
 		}
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index df434fc50074b..0e07f4920cbbe 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -84,18 +84,17 @@ func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doub
 }
 
 // Add implements chunk.
-func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_ []Chunk, ri Iterator, _ error) {
+func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
 	if c.Len() == 0 {
-		return c.addFirstSample(s), reuseIter, nil
+		return c.addFirstSample(s), nil
 	}
 
 	tb := c.timeBytes()
 	vb := c.valueBytes()
 
 	if c.Len() == 1 {
-		ch, err := c.addSecondSample(s, tb, vb)
-		return ch, reuseIter, err
+		return c.addSecondSample(s, tb, vb)
 	}
 
 	remainingBytes := cap(c) - len(c)
@@ -104,7 +103,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_
 	// Do we generally have space for another sample in this chunk? If not,
 	// overflow into a new one.
 	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s, reuseIter)
+		return addToOverflowChunk(&c, s)
 	}
 
 	projectedTime := c.baseTime() + model.Time(c.Len())*c.baseTimeDelta()
@@ -135,12 +134,10 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_
 	}
 	if tb != ntb || vb != nvb || c.isInt() != nInt {
 		if len(c)*2 < cap(c) {
-			reuseIter = c.NewIterator(reuseIter)
-			ch, err := transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), reuseIter, s)
-			return ch, reuseIter, err
+			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
 		}
 		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s, reuseIter)
+		return addToOverflowChunk(&c, s)
 	}
 
 	offset := len(c)
@@ -157,7 +154,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_
 		// Store the absolute value (no delta) in case of d8.
 		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
 	default:
-		return nil, reuseIter, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
+		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
 	}
 
 	offset += int(tb)
@@ -174,7 +171,7 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_
 			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(ddv)))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
-			return nil, reuseIter, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
+			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
 		}
 	} else {
 		switch vb {
@@ -184,10 +181,10 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair, reuseIter Iterator) (_
 			// Store the absolute value (no delta) in case of d8.
 			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
 		default:
-			return nil, reuseIter, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
+			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
 		}
 	}
-	return []Chunk{&c}, reuseIter, nil
+	return []Chunk{&c}, nil
 }
 
 // FirstTime implements chunk.
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 3e56abdce2415..c2e00e1cb5d26 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -260,20 +260,19 @@ func newVarbitChunk(enc varbitValueEncoding) *varbitChunk {
 }
 
 // Add implements chunk.
-func (c *varbitChunk) Add(s model.SamplePair, reuseIter Iterator) ([]Chunk, Iterator, error) {
+func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	offset := c.nextSampleOffset()
 	switch {
 	case c.closed():
-		return addToOverflowChunk(c, s, reuseIter)
+		return addToOverflowChunk(c, s)
 	case offset > varbitNextSampleBitOffsetThreshold:
-		return c.addLastSample(s), reuseIter, nil
+		return c.addLastSample(s), nil
 	case offset == varbitFirstSampleBitOffset:
-		return c.addFirstSample(s), reuseIter, nil
+		return c.addFirstSample(s), nil
 	case offset == varbitSecondSampleBitOffset:
-		ch, err := c.addSecondSample(s)
-		return ch, reuseIter, err
+		return c.addSecondSample(s)
 	}
-	return c.addLaterSample(s, offset, reuseIter)
+	return c.addLaterSample(s, offset)
 }
 
 // NewIterator implements chunk.
@@ -551,7 +550,7 @@ func (c *varbitChunk) addLastSample(s model.SamplePair) []Chunk {
 
 // addLaterSample is a helper method only used by c.add(). It adds a third or
 // later sample.
-func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIter Iterator) ([]Chunk, Iterator, error) {
+func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk, error) {
 	var (
 		lastTime      = c.lastTime()
 		lastTimeDelta = c.lastTimeDelta()
@@ -561,7 +560,7 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIte
 	)
 
 	if newTimeDelta < 0 {
-		return nil, reuseIter, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
+		return nil, fmt.Errorf("Δt is less than zero: %v", newTimeDelta)
 	}
 	if offset == varbitThirdSampleBitOffset {
 		offset, encoding = c.prepForThirdSample(lastValue, s.Value, encoding)
@@ -569,12 +568,12 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIte
 	if newTimeDelta > varbitMaxTimeDelta {
 		// A time delta too great. Still, we can add it as a last sample
 		// before overflowing.
-		return c.addLastSample(s), reuseIter, nil
+		return c.addLastSample(s), nil
 	}
 
 	// Analyze worst case, does it fit? If not, set new sample as the last.
 	if int(offset)+varbitWorstCaseBitsPerSample[encoding] > ChunkLen*8 {
-		return c.addLastSample(s), reuseIter, nil
+		return c.addLastSample(s), nil
 	}
 
 	// Transcoding/overflow decisions first.
@@ -582,32 +581,26 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIte
 		// Cannot go on with zero encoding.
 		if offset > ChunkLen*4 {
 			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s, reuseIter)
+			return addToOverflowChunk(c, s)
 		}
 		if isInt32(s.Value - lastValue) {
 			// Trying int encoding looks promising.
-			reuseIter = c.NewIterator(reuseIter)
-			ch, err := transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), reuseIter, s)
-			return ch, reuseIter, err
+			return transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
 		}
-		reuseIter = c.NewIterator(reuseIter)
-		ch, err := transcodeAndAdd(newVarbitChunk(varbitXOREncoding), reuseIter, s)
-		return ch, reuseIter, err
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
 	}
 	if encoding == varbitIntDoubleDeltaEncoding && !isInt32(s.Value-lastValue) {
 		// Cannot go on with int encoding.
 		if offset > ChunkLen*4 {
 			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s, reuseIter)
+			return addToOverflowChunk(c, s)
 		}
-		reuseIter = c.NewIterator(reuseIter)
-		ch, err := transcodeAndAdd(newVarbitChunk(varbitXOREncoding), reuseIter, s)
-		return ch, reuseIter, err
+		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
 	}
 
 	offset, overflow := c.addDDTime(offset, lastTimeDelta, newTimeDelta)
 	if overflow {
-		return c.addLastSample(s), reuseIter, nil
+		return c.addLastSample(s), nil
 	}
 	switch encoding {
 	case varbitZeroEncoding:
@@ -619,12 +612,12 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16, reuseIte
 	case varbitDirectEncoding:
 		offset = c.addBitPattern(offset, math.Float64bits(float64(s.Value)), 64)
 	default:
-		return nil, reuseIter, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
+		return nil, fmt.Errorf("unknown Varbit value encoding: %v", encoding)
 	}
 
 	c.setNextSampleOffset(offset)
 	c.setLastSample(s)
-	return []Chunk{c}, reuseIter, nil
+	return []Chunk{c}, nil
 }
 
 func (c varbitChunk) prepForThirdSample(
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 0aa15dc6b23c0..0a5f1cf9dbecb 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -80,7 +80,7 @@ func dummyChunk(now model.Time) chunk.Chunk {
 }
 
 func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
-	cs, _, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0}, nil)
+	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
 	chunk := chunk.NewChunk(
 		userID,
 		client.Fingerprint(metric),

From 271a9cbe092977e10e1b03622a7cb7f285e3f665 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 14 Aug 2019 20:44:43 +0530
Subject: [PATCH 390/660] check for retention period to be a multiple of
 periodic table duration and relevant test (#1564)

* added check for retention period to be a multiple of periodic table duration and relevant test

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* check for periodic table duration not 0

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* moved update in changelog above features

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 table_manager.go      | 10 ++++++++++
 table_manager_test.go |  5 +++++
 2 files changed, 15 insertions(+)

diff --git a/table_manager.go b/table_manager.go
index 52b8534fdffb6..3f9d8b6269dc5 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"errors"
 	"flag"
 	"fmt"
 	"sort"
@@ -126,6 +127,15 @@ type TableManager struct {
 // NewTableManager makes a new TableManager
 func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient,
 	objectClient BucketClient) (*TableManager, error) {
+
+	if cfg.RetentionPeriod != 0 {
+		// Assume the newest config is the one to use for validation of retention
+		indexTablesPeriod := schemaCfg.Configs[len(schemaCfg.Configs)-1].IndexTables.Period
+		if indexTablesPeriod != 0 && cfg.RetentionPeriod%indexTablesPeriod != 0 {
+			return nil, errors.New("retention period should now be a multiple of periodic table duration")
+		}
+	}
+
 	return &TableManager{
 		cfg:          cfg,
 		schemaCfg:    schemaCfg,
diff --git a/table_manager_test.go b/table_manager_test.go
index e2704669ce2c7..a6951d7ab8f6e 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -691,4 +691,9 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
+
+	// Test table manager retention not multiple of periodic config
+	tbmConfig.RetentionPeriod++
+	_, err = NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	require.Error(t, err)
 }

From 37f0d9c69447610cebd2a4e2c5fe911b06d9d763 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 16 Aug 2019 14:04:00 +0000
Subject: [PATCH 391/660] Pass userID explicitly down to chunk store, not via
 context

This makes the code clearer and more efficient.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go |  4 +--
 chunk_store.go                 | 51 ++++++++++------------------------
 chunk_store_test.go            | 48 ++++++++++++++++----------------
 composite_store.go             | 24 ++++++++--------
 composite_store_test.go        |  8 +++---
 series_store.go                | 28 ++++---------------
 6 files changed, 62 insertions(+), 101 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 4810bf3cf60d2..42498ed0123cf 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -28,7 +28,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
-	"github.com/weaveworks/common/user"
 )
 
 const (
@@ -193,7 +192,6 @@ func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
 }
 
 func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
-	userID, _ := user.ExtractOrgID(ctx)
 	for table, reqs := range unprocessed {
 		dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs)))
 		for _, req := range reqs {
@@ -207,7 +205,7 @@ func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
 			if rangeAttr, ok := item[rangeKey]; ok {
 				rnge = string(rangeAttr.B)
 			}
-			util.Event().Log("msg", "store retry", "table", table, "userID", userID, "hashKey", hash, "rangeKey", rnge)
+			util.Event().Log("msg", "store retry", "table", table, "hashKey", hash, "rangeKey", rnge)
 		}
 	}
 }
diff --git a/chunk_store.go b/chunk_store.go
index 18a0bea7b1e85..9d37dc97329f7 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -24,7 +24,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/weaveworks/common/httpgrpc"
-	"github.com/weaveworks/common/user"
 )
 
 var (
@@ -161,13 +160,13 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 }
 
 // Get implements Store
-func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+func (c *store) Get(ctx context.Context, userID string, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.Get")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
 	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, &from, &through, allMatchers)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, userID, &from, &through, allMatchers)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
@@ -175,25 +174,20 @@ func (c *store) Get(ctx context.Context, from, through model.Time, allMatchers .
 	}
 
 	log.Span.SetTag("metric", metricName)
-	return c.getMetricNameChunks(ctx, from, through, matchers, metricName)
+	return c.getMetricNameChunks(ctx, userID, from, through, matchers, metricName)
 }
 
-func (c *store) GetChunkRefs(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+func (c *store) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	return nil, nil, errors.New("not implemented")
 }
 
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
-func (c *store) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName, labelName string) ([]string, error) {
+func (c *store) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName, labelName string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelValues")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "labelName", labelName)
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
+	shortcut, err := c.validateQueryTimeRange(ctx, userID, &from, &through)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
@@ -224,19 +218,19 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, from, through mode
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
-func (c *store) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+func (c *store) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelNamesForMetricName")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName)
 
-	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
+	shortcut, err := c.validateQueryTimeRange(ctx, userID, &from, &through)
 	if err != nil {
 		return nil, err
 	} else if shortcut {
 		return nil, nil
 	}
 
-	chunks, err := c.lookupChunksByMetricName(ctx, from, through, nil, metricName)
+	chunks, err := c.lookupChunksByMetricName(ctx, userID, from, through, nil, metricName)
 	if err != nil {
 		return nil, err
 	}
@@ -256,7 +250,7 @@ func (c *store) LabelNamesForMetricName(ctx context.Context, from, through model
 	return labelNamesFromChunks(allChunks), nil
 }
 
-func (c *store) validateQueryTimeRange(ctx context.Context, from *model.Time, through *model.Time) (bool, error) {
+func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from *model.Time, through *model.Time) (bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
 
@@ -264,11 +258,6 @@ func (c *store) validateQueryTimeRange(ctx context.Context, from *model.Time, th
 		return false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
 	}
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return false, err
-	}
-
 	maxQueryLength := c.limits.MaxQueryLength(userID)
 	if maxQueryLength > 0 && (*through).Sub(*from) > maxQueryLength {
 		return false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(*from), maxQueryLength)
@@ -303,11 +292,11 @@ func (c *store) validateQueryTimeRange(ctx context.Context, from *model.Time, th
 	return false, nil
 }
 
-func (c *store) validateQuery(ctx context.Context, from *model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
+func (c *store) validateQuery(ctx context.Context, userID string, from *model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
-	shortcut, err := c.validateQueryTimeRange(ctx, from, through)
+	shortcut, err := c.validateQueryTimeRange(ctx, userID, from, through)
 	if err != nil {
 		return "", nil, false, err
 	}
@@ -324,18 +313,13 @@ func (c *store) validateQuery(ctx context.Context, from *model.Time, through *mo
 	return metricNameMatcher.Value, matchers, false, nil
 }
 
-func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
+func (c *store) getMetricNameChunks(ctx context.Context, userID string, from, through model.Time, allMatchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.getMetricNameChunks")
 	defer log.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "matchers", len(allMatchers))
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
-	chunks, err := c.lookupChunksByMetricName(ctx, from, through, matchers, metricName)
+	chunks, err := c.lookupChunksByMetricName(ctx, userID, from, through, matchers, metricName)
 	if err != nil {
 		return nil, err
 	}
@@ -364,15 +348,10 @@ func (c *store) getMetricNameChunks(ctx context.Context, from, through model.Tim
 	return filteredChunks, nil
 }
 
-func (c *store) lookupChunksByMetricName(ctx context.Context, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
+func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, from, through model.Time, matchers []*labels.Matcher, metricName string) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.lookupChunksByMetricName")
 	defer log.Finish()
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
 	// Just get chunks for metric if there are no matchers
 	if len(matchers) == 0 {
 		queries, err := c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 9e91c60ca8de6..88614b3ed1650 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -98,7 +98,7 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 
 // TestChunkStore_Get tests results are returned correctly depending on the type of query
 func TestChunkStore_Get(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	now := model.Now()
 
 	fooMetric1 := labels.Labels{
@@ -232,21 +232,21 @@ func TestChunkStore_Get(t *testing.T) {
 					}
 
 					// Query with ordinary time-range
-					chunks1, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+					chunks1, err := store.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
 					require.NoError(t, err)
 					if !reflect.DeepEqual(tc.expect, chunks1) {
 						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks1))
 					}
 
 					// Pushing end of time-range into future should yield exact same resultset
-					chunks2, err := store.Get(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), matchers...)
+					chunks2, err := store.Get(ctx, userID, now.Add(-time.Hour), now.Add(time.Hour*24*10), matchers...)
 					require.NoError(t, err)
 					if !reflect.DeepEqual(tc.expect, chunks2) {
 						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks2))
 					}
 
 					// Query with both begin & end of time-range in future should yield empty resultset
-					chunks3, err := store.Get(ctx, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
+					chunks3, err := store.Get(ctx, userID, now.Add(time.Hour), now.Add(time.Hour*2), matchers...)
 					require.NoError(t, err)
 					if len(chunks3) != 0 {
 						t.Fatalf("%s: future query should yield empty resultset ... actually got %v chunks: %#v",
@@ -259,7 +259,7 @@ func TestChunkStore_Get(t *testing.T) {
 }
 
 func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	now := model.Now()
 
 	fooMetric1 := labels.Labels{
@@ -341,7 +341,7 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 					}
 
 					// Query with ordinary time-range
-					labelValues1, err := store.LabelValuesForMetricName(ctx, now.Add(-time.Hour), now, tc.metricName, tc.labelName)
+					labelValues1, err := store.LabelValuesForMetricName(ctx, userID, now.Add(-time.Hour), now, tc.metricName, tc.labelName)
 					require.NoError(t, err)
 
 					if !reflect.DeepEqual(tc.expect, labelValues1) {
@@ -349,7 +349,7 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 					}
 
 					// Pushing end of time-range into future should yield exact same resultset
-					labelValues2, err := store.LabelValuesForMetricName(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName, tc.labelName)
+					labelValues2, err := store.LabelValuesForMetricName(ctx, userID, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName, tc.labelName)
 					require.NoError(t, err)
 
 					if !reflect.DeepEqual(tc.expect, labelValues2) {
@@ -357,7 +357,7 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 					}
 
 					// Query with both begin & end of time-range in future should yield empty resultset
-					labelValues3, err := store.LabelValuesForMetricName(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName, tc.labelName)
+					labelValues3, err := store.LabelValuesForMetricName(ctx, userID, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName, tc.labelName)
 					require.NoError(t, err)
 					if len(labelValues3) != 0 {
 						t.Fatalf("%s/%s: future query should yield empty resultset ... actually got %v label values: %#v",
@@ -371,7 +371,7 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 }
 
 func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	now := model.Now()
 
 	fooMetric1 := labels.Labels{
@@ -443,7 +443,7 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 					}
 
 					// Query with ordinary time-range
-					labelNames1, err := store.LabelNamesForMetricName(ctx, now.Add(-time.Hour), now, tc.metricName)
+					labelNames1, err := store.LabelNamesForMetricName(ctx, userID, now.Add(-time.Hour), now, tc.metricName)
 					require.NoError(t, err)
 
 					if !reflect.DeepEqual(tc.expect, labelNames1) {
@@ -451,7 +451,7 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 					}
 
 					// Pushing end of time-range into future should yield exact same resultset
-					labelNames2, err := store.LabelNamesForMetricName(ctx, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName)
+					labelNames2, err := store.LabelNamesForMetricName(ctx, userID, now.Add(-time.Hour), now.Add(time.Hour*24*10), tc.metricName)
 					require.NoError(t, err)
 
 					if !reflect.DeepEqual(tc.expect, labelNames2) {
@@ -459,7 +459,7 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 					}
 
 					// Query with both begin & end of time-range in future should yield empty resultset
-					labelNames3, err := store.LabelNamesForMetricName(ctx, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName)
+					labelNames3, err := store.LabelNamesForMetricName(ctx, userID, now.Add(time.Hour), now.Add(time.Hour*2), tc.metricName)
 					require.NoError(t, err)
 					if len(labelNames3) != 0 {
 						t.Fatalf("%s: future query should yield empty resultset ... actually got %v label names: %#v",
@@ -474,7 +474,7 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 
 // TestChunkStore_getMetricNameChunks tests if chunks are fetched correctly when we have the metric name
 func TestChunkStore_getMetricNameChunks(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	now := model.Now()
 	chunk1 := dummyChunkFor(now, labels.Labels{
 		{Name: labels.MetricName, Value: "foo"},
@@ -547,7 +547,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 						t.Fatal(err)
 					}
 
-					chunks, err := store.Get(ctx, now.Add(-time.Hour), now, matchers...)
+					chunks, err := store.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
 					require.NoError(t, err)
 
 					if !reflect.DeepEqual(tc.expect, chunks) {
@@ -568,7 +568,7 @@ func mustNewLabelMatcher(matchType labels.MatchType, name string, value string)
 }
 
 func TestChunkStoreRandom(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 
 	for _, schema := range schemas {
 		t.Run(schema.name, func(t *testing.T) {
@@ -613,7 +613,7 @@ func TestChunkStoreRandom(t *testing.T) {
 					mustNewLabelMatcher(labels.MatchEqual, labels.MetricName, "foo"),
 					mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
 				}
-				chunks, err := store.Get(ctx, startTime, endTime, matchers...)
+				chunks, err := store.Get(ctx, userID, startTime, endTime, matchers...)
 				require.NoError(t, err)
 
 				// We need to check that each chunk is in the time range
@@ -636,7 +636,7 @@ func TestChunkStoreRandom(t *testing.T) {
 
 func TestChunkStoreLeastRead(t *testing.T) {
 	// Test we don't read too much from the index
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	store := newTestChunkStore(t, "v6")
 	defer store.Stop()
 
@@ -679,7 +679,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 			mustNewLabelMatcher(labels.MatchEqual, "bar", "baz"),
 		}
 
-		chunks, err := store.Get(ctx, startTime, endTime, matchers...)
+		chunks, err := store.Get(ctx, userID, startTime, endTime, matchers...)
 		require.NoError(t, err)
 
 		// We need to check that each chunk is in the time range
@@ -698,7 +698,7 @@ func TestChunkStoreLeastRead(t *testing.T) {
 }
 
 func TestIndexCachingWorks(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	metric := labels.Labels{
 		{Name: labels.MetricName, Value: "foo"},
 		{Name: "bar", Value: "baz"},
@@ -728,7 +728,7 @@ func TestIndexCachingWorks(t *testing.T) {
 }
 
 func BenchmarkIndexCaching(b *testing.B) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	storeMaker := stores[1]
 	storeCfg := storeMaker.configFn()
 
@@ -745,7 +745,7 @@ func BenchmarkIndexCaching(b *testing.B) {
 }
 
 func TestChunkStoreError(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
+	ctx := context.Background()
 	for _, tc := range []struct {
 		query         string
 		from, through model.Time
@@ -785,7 +785,7 @@ func TestChunkStoreError(t *testing.T) {
 				require.NoError(t, err)
 
 				// Query with ordinary time-range
-				_, err = store.Get(ctx, tc.from, tc.through, matchers...)
+				_, err = store.Get(ctx, userID, tc.from, tc.through, matchers...)
 				require.EqualError(t, err, tc.err)
 			})
 		}
@@ -834,12 +834,12 @@ func TestStoreMaxLookBack(t *testing.T) {
 	}
 
 	// Both the chunks should be returned
-	chunks, err := storeWithoutLookBackLimit.Get(ctx, now.Add(-time.Hour), now, matchers...)
+	chunks, err := storeWithoutLookBackLimit.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
 	require.NoError(t, err)
 	require.Equal(t, 2, len(chunks))
 
 	// Single chunk should be returned with newer timestamp
-	chunks, err = storeWithLookBackLimit.Get(ctx, now.Add(-time.Hour), now, matchers...)
+	chunks, err = storeWithLookBackLimit.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
 	require.NoError(t, err)
 	require.Equal(t, 1, len(chunks))
 	chunks[0].Through.Equal(now)
diff --git a/composite_store.go b/composite_store.go
index f225446129c3f..5fc5490d93749 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -14,12 +14,12 @@ import (
 type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
 	PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error
-	Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
+	Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error)
 	// GetChunkRefs returns the un-loaded chunks and the fetchers to be used to load them. You can load each slice of chunks ([]Chunk),
 	// using the corresponding Fetcher (fetchers[i].FetchChunks(ctx, chunks[i], ...)
-	GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
-	LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error)
-	LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error)
+	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
+	LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error)
+	LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error)
 	Stop()
 }
 
@@ -80,10 +80,10 @@ func (c compositeStore) PutOne(ctx context.Context, from, through model.Time, ch
 	})
 }
 
-func (c compositeStore) Get(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+func (c compositeStore) Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
 	var results []Chunk
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		chunks, err := store.Get(ctx, from, through, matchers...)
+		chunks, err := store.Get(ctx, userID, from, through, matchers...)
 		if err != nil {
 			return err
 		}
@@ -94,10 +94,10 @@ func (c compositeStore) Get(ctx context.Context, from, through model.Time, match
 }
 
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
-func (c compositeStore) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error) {
+func (c compositeStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
 	var result []string
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		labelValues, err := store.LabelValuesForMetricName(ctx, from, through, metricName, labelName)
+		labelValues, err := store.LabelValuesForMetricName(ctx, userID, from, through, metricName, labelName)
 		if err != nil {
 			return err
 		}
@@ -108,10 +108,10 @@ func (c compositeStore) LabelValuesForMetricName(ctx context.Context, from, thro
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
-func (c compositeStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+func (c compositeStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	var result []string
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		labelNames, err := store.LabelNamesForMetricName(ctx, from, through, metricName)
+		labelNames, err := store.LabelNamesForMetricName(ctx, userID, from, through, metricName)
 		if err != nil {
 			return err
 		}
@@ -121,11 +121,11 @@ func (c compositeStore) LabelNamesForMetricName(ctx context.Context, from, throu
 	return result, err
 }
 
-func (c compositeStore) GetChunkRefs(ctx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	chunkIDs := [][]Chunk{}
 	fetchers := []*Fetcher{}
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		ids, fetcher, err := store.GetChunkRefs(ctx, from, through, matchers...)
+		ids, fetcher, err := store.GetChunkRefs(ctx, userID, from, through, matchers...)
 		if err != nil {
 			return err
 		}
diff --git a/composite_store_test.go b/composite_store_test.go
index 4f0bd50736736..9169ccef835b7 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -21,18 +21,18 @@ func (m mockStore) PutOne(ctx context.Context, from, through model.Time, chunk C
 	return nil
 }
 
-func (m mockStore) Get(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
+func (m mockStore) Get(tx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
 	return nil, nil
 }
-func (m mockStore) LabelValuesForMetricName(ctx context.Context, from, through model.Time, metricName string, labelName string) ([]string, error) {
+func (m mockStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
 	return nil, nil
 }
 
-func (m mockStore) GetChunkRefs(tx context.Context, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+func (m mockStore) GetChunkRefs(tx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	return nil, nil, nil
 }
 
-func (m mockStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+func (m mockStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	return nil, nil
 }
 
diff --git a/series_store.go b/series_store.go
index 56dbd7cb7f262..db7d4158f5ce0 100644
--- a/series_store.go
+++ b/series_store.go
@@ -11,7 +11,6 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/weaveworks/common/httpgrpc"
-	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
@@ -99,17 +98,12 @@ func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Ob
 }
 
 // Get implements Store
-func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
+func (c *seriesStore) Get(ctx context.Context, userID string, from, through model.Time, allMatchers ...*labels.Matcher) ([]Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.Get")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "matchers", len(allMatchers))
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	chks, fetchers, err := c.GetChunkRefs(ctx, from, through, allMatchers...)
+	chks, fetchers, err := c.GetChunkRefs(ctx, userID, from, through, allMatchers...)
 	if err != nil {
 		return nil, err
 	}
@@ -142,17 +136,12 @@ func (c *seriesStore) Get(ctx context.Context, from, through model.Time, allMatc
 	return filteredChunks, nil
 }
 
-func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
+func (c *seriesStore) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, allMatchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.GetChunkRefs")
 	defer log.Span.Finish()
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, nil, err
-	}
-
 	// Validate the query is within reasonable bounds.
-	metricName, matchers, shortcut, err := c.validateQuery(ctx, &from, &through, allMatchers)
+	metricName, matchers, shortcut, err := c.validateQuery(ctx, userID, &from, &through, allMatchers)
 	if err != nil {
 		return nil, nil, err
 	} else if shortcut {
@@ -192,16 +181,11 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, from, through model.Time
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
-func (c *seriesStore) LabelNamesForMetricName(ctx context.Context, from, through model.Time, metricName string) ([]string, error) {
+func (c *seriesStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.LabelNamesForMetricName")
 	defer log.Span.Finish()
 
-	userID, err := user.ExtractOrgID(ctx)
-	if err != nil {
-		return nil, err
-	}
-
-	shortcut, err := c.validateQueryTimeRange(ctx, &from, &through)
+	shortcut, err := c.validateQueryTimeRange(ctx, userID, &from, &through)
 	if err != nil {
 		return nil, err
 	} else if shortcut {

From c5285951eaba7db37650d5ad1ae3b28288577b34 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Mon, 19 Aug 2019 21:30:01 +0530
Subject: [PATCH 392/660] Vendor prometheus v2.12.0 and remove vendoring of
 tsdb repo

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index eb71417be9bea..a0e9d511a7da9 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -7,7 +7,7 @@ import (
 	"io"
 
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
 )
 
 const samplesPerChunk = 120

From d37e5f2f374c7e694f3619e6ac89ba77ce7ef7ad Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Tue, 20 Aug 2019 17:33:50 +0530
Subject: [PATCH 393/660] Undo some reuse

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/chunk.go       |  7 -------
 encoding/delta.go       | 27 ++-------------------------
 encoding/doubledelta.go | 33 +++------------------------------
 encoding/varbit.go      | 18 +-----------------
 4 files changed, 6 insertions(+), 79 deletions(-)

diff --git a/encoding/chunk.go b/encoding/chunk.go
index 9ce669a939751..a01dd5b031b2c 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -184,13 +184,6 @@ func newIndexAccessingChunkIterator(len int, acc indexAccessor) *indexAccessingC
 	}
 }
 
-func (it *indexAccessingChunkIterator) reset(len int, acc indexAccessor) {
-	it.len = len
-	it.pos = -1
-	it.lastValue = model.ZeroSamplePair
-	it.acc = acc
-}
-
 // scan implements Iterator.
 func (it *indexAccessingChunkIterator) Scan() bool {
 	it.pos++
diff --git a/encoding/delta.go b/encoding/delta.go
index e72bf789cb843..120f734c363e3 100644
--- a/encoding/delta.go
+++ b/encoding/delta.go
@@ -185,30 +185,8 @@ func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
 }
 
 // NewIterator implements chunk.
-func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
-	ia, ok := reuseIter.(*indexAccessingChunkIterator)
-	if !ok {
-		return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
-			c:      *c,
-			baseT:  c.baseTime(),
-			baseV:  c.baseValue(),
-			tBytes: c.timeBytes(),
-			vBytes: c.valueBytes(),
-			isInt:  c.isInt(),
-		})
-	}
-	if deia, ok := ia.acc.(*deltaEncodedIndexAccessor); ok {
-		deia.c = *c
-		deia.baseT = c.baseTime()
-		deia.baseV = c.baseValue()
-		deia.tBytes = c.timeBytes()
-		deia.vBytes = c.valueBytes()
-		deia.isInt = c.isInt()
-		deia.lastErr = nil
-		ia.reset(c.Len(), deia)
-		return ia
-	}
-	ia.reset(c.Len(), &deltaEncodedIndexAccessor{
+func (c *deltaEncodedChunk) NewIterator(_ Iterator) Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
 		baseV:  c.baseValue(),
@@ -216,7 +194,6 @@ func (c *deltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 		vBytes: c.valueBytes(),
 		isInt:  c.isInt(),
 	})
-	return ia
 }
 
 // Marshal implements chunk.
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index 0e07f4920cbbe..b9ca6f0402b8c 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -192,35 +192,9 @@ func (c doubleDeltaEncodedChunk) FirstTime() model.Time {
 	return c.baseTime()
 }
 
-// NewIterator( implements chunk.
-func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
-	ia, ok := reuseIter.(*indexAccessingChunkIterator)
-	if !ok {
-		return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
-			c:      *c,
-			baseT:  c.baseTime(),
-			baseΔT: c.baseTimeDelta(),
-			baseV:  c.baseValue(),
-			baseΔV: c.baseValueDelta(),
-			tBytes: c.timeBytes(),
-			vBytes: c.valueBytes(),
-			isInt:  c.isInt(),
-		})
-	}
-	if deia, ok := ia.acc.(*doubleDeltaEncodedIndexAccessor); ok {
-		deia.c = *c
-		deia.baseT = c.baseTime()
-		deia.baseΔT = c.baseTimeDelta()
-		deia.baseV = c.baseValue()
-		deia.baseΔV = c.baseValueDelta()
-		deia.tBytes = c.timeBytes()
-		deia.vBytes = c.valueBytes()
-		deia.isInt = c.isInt()
-		deia.lastErr = nil
-		ia.reset(c.Len(), deia)
-		return ia
-	}
-	ia.reset(c.Len(), &doubleDeltaEncodedIndexAccessor{
+// NewIterator implements chunk.
+func (c *doubleDeltaEncodedChunk) NewIterator(_ Iterator) Iterator {
+	return newIndexAccessingChunkIterator(c.Len(), &doubleDeltaEncodedIndexAccessor{
 		c:      *c,
 		baseT:  c.baseTime(),
 		baseΔT: c.baseTimeDelta(),
@@ -230,7 +204,6 @@ func (c *doubleDeltaEncodedChunk) NewIterator(reuseIter Iterator) Iterator {
 		vBytes: c.valueBytes(),
 		isInt:  c.isInt(),
 	})
-	return ia
 }
 
 func (c *doubleDeltaEncodedChunk) Slice(_, _ model.Time) Chunk {
diff --git a/encoding/varbit.go b/encoding/varbit.go
index c2e00e1cb5d26..c2663fe9e2473 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -276,11 +276,7 @@ func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
 }
 
 // NewIterator implements chunk.
-func (c varbitChunk) NewIterator(reuseIter Iterator) Iterator {
-	if it, ok := reuseIter.(*varbitChunkIterator); ok {
-		it.resetWithChunk(c)
-		return it
-	}
+func (c varbitChunk) NewIterator(_ Iterator) Iterator {
 	return newVarbitChunkIterator(c)
 }
 
@@ -1163,18 +1159,6 @@ func (it *varbitChunkIterator) reset() {
 	it.rewound = false
 }
 
-func (it *varbitChunkIterator) resetWithChunk(c varbitChunk) {
-	it.reset()
-
-	it.c = c
-	it.len = c.nextSampleOffset()
-	it.enc = c.valueEncoding()
-
-	it.lastError = nil
-	it.nextT = 0
-	it.nextV = 0
-}
-
 // rewind "rewinds" the chunk iterator by one step. Since one cannot simply
 // rewind a Varbit chunk, the old values have to be provided by the
 // caller. Rewinding an already rewound chunk panics. After a call of scan or

From 410ff917debdd26e85cfbe55b337d62b935136e1 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 21 Aug 2019 20:23:09 +0530
Subject: [PATCH 394/660] Remove prometheus/tsdb

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index b028d6e8ab031..7452f38156ecf 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -7,7 +7,7 @@ import (
 	"io"
 
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
 )
 
 const samplesPerChunk = 120
@@ -172,7 +172,7 @@ func (b *bigchunk) Size() int {
 func (b *bigchunk) NewIterator() Iterator {
 	var it chunkenc.Iterator
 	if len(b.chunks) > 0 {
-		it = b.chunks[0].Iterator()
+		it = b.chunks[0].Iterator(nil)
 	} else {
 		it = chunkenc.NewNopIterator()
 	}
@@ -257,9 +257,9 @@ func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
 	}
 
 	if it.curr == nil {
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(nil)
 	} else if t, _ := it.curr.At(); int64(target) <= t {
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(nil)
 	}
 
 	for it.curr.Next() {
@@ -281,7 +281,7 @@ func (it *bigchunkIterator) Scan() bool {
 
 	for it.i < len(it.chunks)-1 {
 		it.i++
-		it.curr = it.chunks[it.i].Iterator()
+		it.curr = it.chunks[it.i].Iterator(nil)
 		if it.curr.Next() {
 			return true
 		}
@@ -326,7 +326,7 @@ func firstAndLastTimes(c chunkenc.Chunk) (int64, int64, error) {
 		first    int64
 		last     int64
 		firstSet bool
-		iter     = c.Iterator()
+		iter     = c.Iterator(nil)
 	)
 	for iter.Next() {
 		t, _ := iter.At()

From 412028867c8cf874a26e70ba0d14b50b49512413 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Thu, 22 Aug 2019 07:32:38 +0000
Subject: [PATCH 395/660] In metrics autoscaling, ignore throttling below some
 level

Random overlaps of hash keys can cause small amounts of throttling
when capacity is otherwise fine, so put a floor on the level we care
about.

Note we don't apply this heuristic if the queue is at the 10x level -
assume things are bad and scale up anyway.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/metrics_autoscaling.go | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 6b8846528df2e..871516fdb38fd 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -44,6 +44,7 @@ type MetricsAutoScalingConfig struct {
 	URL              string  // URL to contact Prometheus store on
 	TargetQueueLen   int64   // Queue length above which we will scale up capacity
 	ScaleUpFactor    float64 // Scale up capacity by this multiple
+	MinThrottling    float64 // Ignore throttling below this level
 	QueueLengthQuery string  // Promql query to fetch ingester queue length
 	ThrottleQuery    string  // Promql query to fetch throttle rate per table
 	UsageQuery       string  // Promql query to fetch write capacity usage per table
@@ -58,6 +59,7 @@ func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.URL, "metrics.url", "", "Use metrics-based autoscaling, via this query URL")
 	f.Int64Var(&cfg.TargetQueueLen, "metrics.target-queue-length", 100000, "Queue length above which we will scale up capacity")
 	f.Float64Var(&cfg.ScaleUpFactor, "metrics.scale-up-factor", 1.3, "Scale up capacity by this multiple")
+	f.Float64Var(&cfg.MinThrottling, "metrics.ignore-throttle-below", 1, "Ignore throttling below this level (rate per second)")
 	f.StringVar(&cfg.QueueLengthQuery, "metrics.queue-length-query", defaultQueueLenQuery, "query to fetch ingester queue length")
 	f.StringVar(&cfg.ThrottleQuery, "metrics.write-throttle-query", defaultThrottleRateQuery, "query to fetch throttle rates per table")
 	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
@@ -146,7 +148,7 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 				"write",
 				m.usageRates)
 		case throttleRate > 0 && m.queueLengths[2] > float64(m.cfg.TargetQueueLen)*targetMax:
-			// Too big queue, some throttling -> scale up
+			// Too big queue, some throttling -> scale up (note we don't apply MinThrottling in this case)
 			expected.ProvisionedWrite = scaleUp(current.Name,
 				current.ProvisionedWrite,
 				expected.WriteScale.MaxCapacity,
@@ -155,7 +157,7 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 				expected.WriteScale.OutCooldown,
 				"metrics max queue scale-up",
 				"write")
-		case throttleRate > 0 &&
+		case throttleRate > m.cfg.MinThrottling &&
 			m.queueLengths[2] > float64(m.cfg.TargetQueueLen) &&
 			m.queueLengths[2] > m.queueLengths[1] && m.queueLengths[1] > m.queueLengths[0]:
 			// Growing queue, some throttling -> scale up

From f919fd84d795d0c8075321886a1aa8a09d7169f9 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 22 Aug 2019 15:07:56 +0530
Subject: [PATCH 396/660] Spread the sync load over time (#1587)

* Spread the sync load over time

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Move seed to main

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 table_manager.go | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/table_manager.go b/table_manager.go
index 3f9d8b6269dc5..27865b7c57912 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"flag"
 	"fmt"
+	"math/rand"
 	"sort"
 	"strings"
 	"sync"
@@ -166,6 +167,9 @@ func (m *TableManager) Stop() {
 func (m *TableManager) loop() {
 	defer m.wait.Done()
 
+	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
+	time.Sleep(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval))))
+
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 

From fbcc2b21d91bd947a3e3c1a7158e5c55a07d8d66 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Fri, 23 Aug 2019 13:16:06 -0700
Subject: [PATCH 397/660] bugfix: memcachedClient.updateLoop() does not exit
 infinite loop when the client is stopped.

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/memcached_client.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index d8ece7594dbf7..73ece246cf16b 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -93,19 +93,19 @@ func (c *memcachedClient) Stop() {
 	c.wait.Wait()
 }
 
-func (c *memcachedClient) updateLoop(updateInterval time.Duration) error {
+func (c *memcachedClient) updateLoop(updateInterval time.Duration) {
 	defer c.wait.Done()
 	ticker := time.NewTicker(updateInterval)
-	var err error
 	for {
 		select {
 		case <-ticker.C:
-			err = c.updateMemcacheServers()
+			err := c.updateMemcacheServers()
 			if err != nil {
 				level.Warn(util.Logger).Log("msg", "error updating memcache servers", "err", err)
 			}
 		case <-c.quit:
 			ticker.Stop()
+			return
 		}
 	}
 }

From b18388381e61b5225d17dd3393256f7536065c42 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 28 Aug 2019 12:20:28 +0200
Subject: [PATCH 398/660] added interfaces for store specific limits (#1602)

* added interfaces for store spceific limits

NewStore method in pkg/storage uses Overrides type defined in Cortex itself.
Added an interface to let consumers of that method pass any type which implements required methods
This is done as part of way to let projects using Cortex, define their own limits

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* renamed interface CompositeStoreLimits to StoreLimits

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk_store.go                  |  4 ++--
 composite_store.go              | 11 ++++++++---
 series_store.go                 |  3 +--
 storage/caching_index_client.go |  5 ++---
 storage/factory.go              | 10 ++++++++--
 5 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 18a0bea7b1e85..43f3bd14728ab 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -78,11 +78,11 @@ type store struct {
 	index  IndexClient
 	chunks ObjectClient
 	schema Schema
-	limits *validation.Overrides
+	limits StoreLimits
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
+func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits StoreLimits) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
diff --git a/composite_store.go b/composite_store.go
index f225446129c3f..3d0f9b568a14a 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -3,13 +3,18 @@ package chunk
 import (
 	"context"
 	"sort"
+	"time"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-
-	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
+// StoreLimits helps get Limits specific to Queries for Stores
+type StoreLimits interface {
+	MaxChunksPerQuery(userID string) int
+	MaxQueryLength(userID string) time.Duration
+}
+
 // Store for chunks.
 type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
@@ -45,7 +50,7 @@ func NewCompositeStore() CompositeStore {
 }
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
-func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks ObjectClient, limits *validation.Overrides) error {
+func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks ObjectClient, limits StoreLimits) error {
 	schema := cfg.CreateSchema()
 	var store Store
 	var err error
diff --git a/series_store.go b/series_store.go
index 56dbd7cb7f262..a811300afa02e 100644
--- a/series_store.go
+++ b/series_store.go
@@ -16,7 +16,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
-	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 // CardinalityExceededError is returned when the user reads a row that
@@ -67,7 +66,7 @@ type seriesStore struct {
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits *validation.Overrides) (Store, error) {
+func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits StoreLimits) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 5513a5d6e83db..563a32c93a644 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -16,7 +16,6 @@ import (
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
-	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 var (
@@ -46,10 +45,10 @@ type cachingIndexClient struct {
 	chunk.IndexClient
 	cache    cache.Cache
 	validity time.Duration
-	limits   *validation.Overrides
+	limits   StoreLimits
 }
 
-func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits *validation.Overrides) chunk.IndexClient {
+func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits StoreLimits) chunk.IndexClient {
 	if c == nil {
 		return client
 	}
diff --git a/storage/factory.go b/storage/factory.go
index 5b65e37cfa00a..cce20b93e1db9 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -14,11 +14,17 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/cortexproject/cortex/pkg/util/validation"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 )
 
+// StoreLimits helps get Limits specific to Queries for Stores
+type StoreLimits interface {
+	CardinalityLimit(userID string) int
+	MaxChunksPerQuery(userID string) int
+	MaxQueryLength(userID string) time.Duration
+}
+
 // Config chooses which storage client to use.
 type Config struct {
 	AWSStorageConfig       aws.StorageConfig  `yaml:"aws"`
@@ -47,7 +53,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewStore makes the storage clients based on the configuration.
-func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits *validation.Overrides) (chunk.Store, error) {
+func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits) (chunk.Store, error) {
 	tieredCache, err := cache.New(cfg.IndexQueriesCacheConfig)
 	if err != nil {
 		return nil, err

From 21b65eb5bf82539fbc8172fe4e4f7f233b1dc684 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Fri, 30 Aug 2019 09:00:08 -0700
Subject: [PATCH 399/660] added unittest

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/memcached_test.go | 44 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index 3a6c8982d158f..8ccca5595a056 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -3,8 +3,10 @@ package cache_test
 import (
 	"context"
 	"errors"
+	"sync"
 	"sync/atomic"
 	"testing"
+	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
@@ -151,3 +153,45 @@ func testMemcacheFailing(t *testing.T, memcache *cache.Memcached) {
 		}
 	}
 }
+
+// TestUpdateLoop simulates memcachedClient.updateLoop and verifies its completeness
+func TestUpdateLoop(t *testing.T) {
+	var (
+		wait sync.WaitGroup
+		err  error
+	)
+	quit := make(chan struct{})
+
+	updateInterval := 50 * time.Millisecond
+	sleepDuration := 100 * time.Millisecond
+	errTimeout := 150 * time.Millisecond
+
+	wait.Add(1)
+
+	go func() {
+		defer wait.Done()
+		ticker := time.NewTicker(updateInterval)
+		timer := time.NewTimer(errTimeout)
+		for {
+			select {
+			case <-ticker.C:
+				// nop
+			case <-quit:
+				ticker.Stop()
+				return
+			case <-timer.C:
+				err = errors.New("updateLoop didn't finish on time")
+				return
+			}
+		}
+	}()
+
+	time.Sleep(sleepDuration)
+
+	close(quit)
+	wait.Wait()
+
+	if err != nil {
+		t.Fatal(err)
+	}
+}

From d5fd77d208ba67147a8f156ffd7449e2d570dc2e Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Fri, 30 Aug 2019 10:58:32 -0700
Subject: [PATCH 400/660] Revert "added unittest"

This reverts commit 499e91a2af12c456f8fccab7405e25c105094caf.

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/memcached_test.go | 44 -----------------------------------------
 1 file changed, 44 deletions(-)

diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index 8ccca5595a056..3a6c8982d158f 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -3,10 +3,8 @@ package cache_test
 import (
 	"context"
 	"errors"
-	"sync"
 	"sync/atomic"
 	"testing"
-	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
@@ -153,45 +151,3 @@ func testMemcacheFailing(t *testing.T, memcache *cache.Memcached) {
 		}
 	}
 }
-
-// TestUpdateLoop simulates memcachedClient.updateLoop and verifies its completeness
-func TestUpdateLoop(t *testing.T) {
-	var (
-		wait sync.WaitGroup
-		err  error
-	)
-	quit := make(chan struct{})
-
-	updateInterval := 50 * time.Millisecond
-	sleepDuration := 100 * time.Millisecond
-	errTimeout := 150 * time.Millisecond
-
-	wait.Add(1)
-
-	go func() {
-		defer wait.Done()
-		ticker := time.NewTicker(updateInterval)
-		timer := time.NewTimer(errTimeout)
-		for {
-			select {
-			case <-ticker.C:
-				// nop
-			case <-quit:
-				ticker.Stop()
-				return
-			case <-timer.C:
-				err = errors.New("updateLoop didn't finish on time")
-				return
-			}
-		}
-	}()
-
-	time.Sleep(sleepDuration)
-
-	close(quit)
-	wait.Wait()
-
-	if err != nil {
-		t.Fatal(err)
-	}
-}

From 6a3655de102eb999d62d1ac05d5262dff7d66455 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Tue, 3 Sep 2019 22:00:32 +0530
Subject: [PATCH 401/660] tsdb iterator re-use

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 7452f38156ecf..c5928550c142c 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -257,9 +257,9 @@ func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
 	}
 
 	if it.curr == nil {
-		it.curr = it.chunks[it.i].Iterator(nil)
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 	} else if t, _ := it.curr.At(); int64(target) <= t {
-		it.curr = it.chunks[it.i].Iterator(nil)
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 	}
 
 	for it.curr.Next() {
@@ -281,7 +281,7 @@ func (it *bigchunkIterator) Scan() bool {
 
 	for it.i < len(it.chunks)-1 {
 		it.i++
-		it.curr = it.chunks[it.i].Iterator(nil)
+		it.curr = it.chunks[it.i].Iterator(it.curr)
 		if it.curr.Next() {
 			return true
 		}

From dbdd8c625c6bd0e540e7523fc9fa6b006849156e Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Wed, 11 Sep 2019 11:57:43 +0530
Subject: [PATCH 402/660] Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 encoding/bigchunk.go | 18 +++++++++++-------
 encoding/chunk.go    |  3 +++
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index a0e9d511a7da9..bd74b4eaba2b7 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -115,7 +115,6 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 
 	b.chunks = make([]smallChunk, 0, numChunks+1) // allow one extra space in case we want to add new data
 	var reuseIter chunkenc.Iterator
-	var start, end int64
 	for i := uint16(0); i < numChunks; i++ {
 		chunkLen, err := r.ReadUint16()
 		if err != nil {
@@ -132,6 +131,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 			return err
 		}
 
+		var start, end int64
 		start, end, reuseIter, err = firstAndLastTimes(chunk, reuseIter)
 		if err != nil {
 			return err
@@ -172,18 +172,22 @@ func (b *bigchunk) Size() int {
 }
 
 func (b *bigchunk) NewIterator(reuseIter Iterator) Iterator {
+	if bci, ok := reuseIter.(*bigchunkIterator); ok {
+		bci.bigchunk = b
+		bci.i = 0
+		if len(b.chunks) > 0 {
+			bci.curr = b.chunks[0].Iterator(bci.curr)
+		} else {
+			bci.curr = chunkenc.NewNopIterator()
+		}
+		return bci
+	}
 	var it chunkenc.Iterator
 	if len(b.chunks) > 0 {
 		it = b.chunks[0].Iterator(it)
 	} else {
 		it = chunkenc.NewNopIterator()
 	}
-	if bci, ok := reuseIter.(*bigchunkIterator); ok {
-		bci.bigchunk = b
-		bci.curr = it
-		bci.i = 0
-		return bci
-	}
 	return &bigchunkIterator{
 		bigchunk: b,
 		curr:     it,
diff --git a/encoding/chunk.go b/encoding/chunk.go
index a01dd5b031b2c..f36e4d3597f20 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -44,6 +44,9 @@ type Chunk interface {
 	// or a newly allocated version. In any case, take the returned chunk as
 	// the relevant one and discard the original chunk.
 	Add(sample model.SamplePair) ([]Chunk, error)
+	// NewIterator returns an iterator for the chunks.
+	// The iterator passed as argument is for re-use. Depending on implementation,
+	// the iterator can be re-used or a new iterator can be allocated.
 	NewIterator(Iterator) Iterator
 	Marshal(io.Writer) error
 	UnmarshalFromBuf([]byte) error

From 3f27ff3edb8519a0f2a09abe081d278340bec3d7 Mon Sep 17 00:00:00 2001
From: Thor <8681572+thorfour@users.noreply.github.com>
Date: Thu, 12 Sep 2019 21:42:05 +0000
Subject: [PATCH 403/660] s3.buckets : allow spreading chunks across buckets
 (#1625)

Signed-off-by: Thor <thansen@digitalocean.com>
---
 aws/dynamodb_storage_client.go |  2 ++
 aws/s3_storage_client.go       | 38 +++++++++++++++++++++++++++-------
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 42498ed0123cf..a81ef7323bca7 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -126,6 +126,7 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 type StorageConfig struct {
 	DynamoDBConfig
 	S3               flagext.URLValue
+	BucketNames      string
 	S3ForcePathStyle bool
 }
 
@@ -136,6 +137,7 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 	f.BoolVar(&cfg.S3ForcePathStyle, "s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
+	f.StringVar(&cfg.BucketNames, "s3.buckets", "", "Comma separated list of bucket names to evenly distribute chunks over. Overrides any buckets specified in s3.url flag")
 }
 
 type dynamoDBStorageClient struct {
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index dfb74a42584f5..7b05128e4a18a 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"hash/fnv"
 	"io/ioutil"
 	"strings"
 
@@ -33,8 +34,8 @@ func init() {
 }
 
 type s3ObjectClient struct {
-	bucketName string
-	S3         s3iface.S3API
+	bucketNames []string
+	S3          s3iface.S3API
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
@@ -51,10 +52,13 @@ func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.O
 
 	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
 	s3Client := s3.New(session.New(s3Config))
-	bucketName := strings.TrimPrefix(cfg.S3.URL.Path, "/")
+	bucketNames := []string{strings.TrimPrefix(cfg.S3.URL.Path, "/")}
+	if cfg.BucketNames != "" {
+		bucketNames = strings.Split(cfg.BucketNames, ",") // comma separated list of bucket names
+	}
 	client := s3ObjectClient{
-		S3:         s3Client,
-		bucketName: bucketName,
+		S3:          s3Client,
+		bucketNames: bucketNames,
 	}
 	return client, nil
 }
@@ -68,11 +72,16 @@ func (a s3ObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]
 
 func (a s3ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
 	var resp *s3.GetObjectOutput
+
+	// Map the key into a bucket
+	key := c.ExternalKey()
+	bucket := a.bucketFromKey(key)
+
 	err := instrument.CollectedRequest(ctx, "S3.GetObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		var err error
 		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
-			Bucket: aws.String(a.bucketName),
-			Key:    aws.String(c.ExternalKey()),
+			Bucket: aws.String(bucket),
+			Key:    aws.String(key),
 		})
 		return err
 	})
@@ -128,9 +137,22 @@ func (a s3ObjectClient) putS3Chunk(ctx context.Context, key string, buf []byte)
 	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
 			Body:   bytes.NewReader(buf),
-			Bucket: aws.String(a.bucketName),
+			Bucket: aws.String(a.bucketFromKey(key)),
 			Key:    aws.String(key),
 		})
 		return err
 	})
 }
+
+// bucketFromKey maps a key to a bucket name
+func (a s3ObjectClient) bucketFromKey(key string) string {
+	if len(a.bucketNames) == 0 {
+		return ""
+	}
+
+	hasher := fnv.New32a()
+	hasher.Write([]byte(key))
+	hash := hasher.Sum32()
+
+	return a.bucketNames[hash%uint32(len(a.bucketNames))]
+}

From f6df5e77e941c00cd4a52c80e7d7e95bb8242f7c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 17 Sep 2019 23:07:19 +0100
Subject: [PATCH 404/660] Reduce default DynamoDB write capacity from 3000 to
 1000 (#1673)

We've made a lot of efficiency improvements, so reduce the default
capacity.

No single figure will work for everyone, but we can assume most people
running with the default are running a small installation to check it
out, so let's give them a smaller bill.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 table_manager.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/table_manager.go b/table_manager.go
index 27865b7c57912..86234cf6d4ecc 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -98,7 +98,7 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 3000, "DynamoDB table default write throughput.")
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 1000, "DynamoDB table default write throughput.")
 	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
 	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
 	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")

From 06c2064db0fa0eda74a11dcec5d68896c38d7e86 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 4 Sep 2019 09:02:11 +0000
Subject: [PATCH 405/660] Remove pointer indirection on contents of bigchunk,
 for performance

This will save memory and a few CPU cycles.
We have to be careful to create the Appender after the chunk is
appended to our slice, since it has a pointer to the chunk memory.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index bd74b4eaba2b7..70a8bb880f77a 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -15,7 +15,7 @@ const samplesPerChunk = 120
 var errOutOfBounds = errors.New("out of bounds")
 
 type smallChunk struct {
-	*chunkenc.XORChunk
+	chunkenc.XORChunk
 	start int64
 	end   int64
 }
@@ -63,22 +63,20 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 			if err != nil {
 				return err
 			}
-			b.chunks[l-1].XORChunk = compacted.(*chunkenc.XORChunk)
+			b.chunks[l-1].XORChunk = *compacted.(*chunkenc.XORChunk)
 		}
 	}
 
-	chunk := chunkenc.NewXORChunk()
-	appender, err := chunk.Appender()
-	if err != nil {
-		return err
-	}
-
 	b.chunks = append(b.chunks, smallChunk{
-		XORChunk: chunk,
+		XORChunk: *chunkenc.NewXORChunk(),
 		start:    int64(start),
 		end:      int64(start),
 	})
 
+	appender, err := b.chunks[len(b.chunks)-1].Appender()
+	if err != nil {
+		return err
+	}
 	b.appender = appender
 	b.remainingSamples = samplesPerChunk
 	return nil
@@ -138,7 +136,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 		}
 
 		b.chunks = append(b.chunks, smallChunk{
-			XORChunk: chunk.(*chunkenc.XORChunk),
+			XORChunk: *chunk.(*chunkenc.XORChunk),
 			start:    int64(start),
 			end:      int64(end),
 		})

From 69b6b396bac5aa65fea6e6ea9d0842fcdd6ba384 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 4 Sep 2019 10:02:12 +0000
Subject: [PATCH 406/660] Don't need to store the end time of every small chunk

We can look at the start time of the next one since they don't
overlap.
This makes the data structure slightly smaller, and speeds up
unmarshalling since we don't need to seek through every value.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 44 +++++++++++++++-----------------------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 70a8bb880f77a..6a0daeb92b4b2 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -17,13 +17,13 @@ var errOutOfBounds = errors.New("out of bounds")
 type smallChunk struct {
 	chunkenc.XORChunk
 	start int64
-	end   int64
 }
 
 // bigchunk is a set of prometheus/tsdb chunks.  It grows over time and has no
 // upperbound on number of samples it can contain.
 type bigchunk struct {
 	chunks []smallChunk
+	end    int64
 
 	appender         chunkenc.Appender
 	remainingSamples int
@@ -45,7 +45,7 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
-	b.chunks[len(b.chunks)-1].end = int64(sample.Timestamp)
+	b.end = int64(sample.Timestamp)
 	return []Chunk{b}, nil
 }
 
@@ -70,7 +70,6 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 	b.chunks = append(b.chunks, smallChunk{
 		XORChunk: *chunkenc.NewXORChunk(),
 		start:    int64(start),
-		end:      int64(start),
 	})
 
 	appender, err := b.chunks[len(b.chunks)-1].Appender()
@@ -129,8 +128,8 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 			return err
 		}
 
-		var start, end int64
-		start, end, reuseIter, err = firstAndLastTimes(chunk, reuseIter)
+		var start int64
+		start, reuseIter, err = firstTime(chunk, reuseIter)
 		if err != nil {
 			return err
 		}
@@ -138,7 +137,6 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 		b.chunks = append(b.chunks, smallChunk{
 			XORChunk: *chunk.(*chunkenc.XORChunk),
 			start:    int64(start),
-			end:      int64(end),
 		})
 	}
 	return nil
@@ -195,8 +193,8 @@ func (b *bigchunk) NewIterator(reuseIter Iterator) Iterator {
 func (b *bigchunk) Slice(start, end model.Time) Chunk {
 	i, j := 0, len(b.chunks)
 	for k := 0; k < len(b.chunks); k++ {
-		if b.chunks[k].end < int64(start) {
-			i = k + 1
+		if b.chunks[k].start <= int64(start) {
+			i = k
 		}
 		if b.chunks[k].start > int64(end) {
 			j = k
@@ -250,22 +248,19 @@ type bigchunkIterator struct {
 }
 
 func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
-	if it.i >= len(it.chunks) {
+	if it.i >= len(it.chunks) || int64(target) > it.end {
 		return false
 	}
 
 	// If the seek is outside the current chunk, use the index to find the right
 	// chunk.
-	if int64(target) < it.chunks[it.i].start || int64(target) > it.chunks[it.i].end {
+	if int64(target) < it.chunks[it.i].start ||
+		(it.i+1 < len(it.chunks) && int64(target) >= it.chunks[it.i+1].start) {
 		it.curr = nil
-		for it.i = 0; it.i < len(it.chunks) && int64(target) > it.chunks[it.i].end; it.i++ {
+		for it.i = 0; it.i+1 < len(it.chunks) && int64(target) >= it.chunks[it.i+1].start; it.i++ {
 		}
 	}
 
-	if it.i >= len(it.chunks) {
-		return false
-	}
-
 	if it.curr == nil {
 		it.curr = it.chunks[it.i].Iterator(it.curr)
 	} else if t, _ := it.curr.At(); int64(target) <= t {
@@ -331,20 +326,11 @@ func (it *bigchunkIterator) Err() error {
 	return nil
 }
 
-func firstAndLastTimes(c chunkenc.Chunk, iter chunkenc.Iterator) (int64, int64, chunkenc.Iterator, error) {
-	var (
-		first    int64
-		last     int64
-		firstSet bool
-	)
+func firstTime(c chunkenc.Chunk, iter chunkenc.Iterator) (int64, chunkenc.Iterator, error) {
+	var first int64
 	iter = c.Iterator(iter)
-	for iter.Next() {
-		t, _ := iter.At()
-		if !firstSet {
-			first = t
-			firstSet = true
-		}
-		last = t
+	if iter.Next() {
+		first, _ = iter.At()
 	}
-	return first, last, iter, iter.Err()
+	return first, iter, iter.Err()
 }

From 2ba829fd484ace341d5f3954c025b973c5207870 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Sat, 7 Sep 2019 13:37:28 +0000
Subject: [PATCH 407/660] Fix FindAtOrAfter where seek position is between two
 smallchunks

and add a test that covers that case.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go   | 8 ++++++++
 encoding/chunk_test.go | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 6a0daeb92b4b2..221640cd3b282 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -273,6 +273,14 @@ func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
 			return true
 		}
 	}
+	// Timestamp is after the end of that chunk - if there is another chunk
+	// then the position we need is at the beginning of it.
+	if it.i+1 < len(it.chunks) {
+		it.i++
+		it.curr = it.chunks[it.i].Iterator(it.curr)
+		it.curr.Next()
+		return true
+	}
 	return false
 }
 
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index e0dbd383c6f0d..ef127dd5585b2 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -147,6 +147,14 @@ func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 
 	iter := chunk.NewIterator(nil)
 	for i := 0; i < samples; i += samples / 10 {
+		if i > 0 {
+			// Seek one millisecond before the actual time
+			require.True(t, iter.FindAtOrAfter(model.Time(i*step-1)), "1ms before step %d not found", i)
+			sample := iter.Value()
+			require.EqualValues(t, model.Time(i*step), sample.Timestamp)
+			require.EqualValues(t, model.SampleValue(i), sample.Value)
+		}
+		// Now seek to exactly the right time
 		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
 		sample := iter.Value()
 		require.EqualValues(t, model.Time(i*step), sample.Timestamp)

From c689090d2d9cc82fdf8e3e4bfeac30ebccad5495 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 25 Sep 2019 10:07:19 +0000
Subject: [PATCH 408/660] Add more test cases calling FindAtOrAfter()

Check it works after slice and unmarshal, and check it fails when you
seek off the end of the chunk.

Signed-off-by: Bryan Boreham <bryan@weave.works>

fix test
---
 encoding/bigchunk_test.go | 7 +++++++
 encoding/chunk_test.go    | 8 ++++++++
 2 files changed, 15 insertions(+)

diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index c8910c53f82ba..7e5e6e8761cc5 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -55,6 +55,13 @@ func TestSliceBiggerChunk(t *testing.T) {
 			require.Equal(t, sample.Value, model.SampleValue(j))
 			require.True(t, iter.Scan())
 		}
+
+		// Now try via seek
+		iter = s.NewIterator(iter)
+		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
+		sample := iter.Value()
+		require.Equal(t, sample.Timestamp, model.Time(i*step))
+		require.Equal(t, sample.Value, model.SampleValue(i))
 	}
 }
 
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index ef127dd5585b2..c7bf8a3c0f197 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -131,6 +131,12 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 	require.False(t, iter.Scan())
 	require.NoError(t, iter.Err())
 
+	// Check seek works after unmarshal
+	iter = chunk.NewIterator(iter)
+	for i := 0; i < samples; i += samples / 10 {
+		require.True(t, iter.FindAtOrAfter(model.Time(i*step)))
+	}
+
 	// Check the byte representation after another Marshall is the same.
 	buf = bytes.Buffer{}
 	err = chunk.Marshal(&buf)
@@ -170,6 +176,8 @@ func testChunkSeek(t *testing.T, encoding Encoding, samples int) {
 		require.False(t, iter.Scan())
 		require.NoError(t, iter.Err())
 	}
+	// Check seek past the end of the chunk returns failure
+	require.False(t, iter.FindAtOrAfter(model.Time(samples*step+1)))
 }
 
 func testChunkSeekForward(t *testing.T, encoding Encoding, samples int) {

From 79ddb141c41e54215eaacf292762b18dc3effdaa Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 25 Sep 2019 10:08:18 +0000
Subject: [PATCH 409/660] Remove unnecessary 'end' member from bigchunk

It is only used to shortcut the case where FindAtOrAfter() is called
with a target past the end of the chunk, and this never happens
because we have from/through times on each chunk at a higher level.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 221640cd3b282..deea2b35ce6aa 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -23,7 +23,6 @@ type smallChunk struct {
 // upperbound on number of samples it can contain.
 type bigchunk struct {
 	chunks []smallChunk
-	end    int64
 
 	appender         chunkenc.Appender
 	remainingSamples int
@@ -45,7 +44,6 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
-	b.end = int64(sample.Timestamp)
 	return []Chunk{b}, nil
 }
 
@@ -248,7 +246,7 @@ type bigchunkIterator struct {
 }
 
 func (it *bigchunkIterator) FindAtOrAfter(target model.Time) bool {
-	if it.i >= len(it.chunks) || int64(target) > it.end {
+	if it.i >= len(it.chunks) {
 		return false
 	}
 

From 69629479f4ab6700e3998f4ea18694a07b8fa338 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 25 Sep 2019 10:20:23 +0000
Subject: [PATCH 410/660] Use human-readable names for chunk encodings

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/factory.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/encoding/factory.go b/encoding/factory.go
index 3d4b6f12b58ac..83ab9f2602d57 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -28,6 +28,9 @@ func (Config) RegisterFlags(f *flag.FlagSet) {
 
 // String implements flag.Value.
 func (e Encoding) String() string {
+	if known, found := encodings[e]; found {
+		return known.Name
+	}
 	return fmt.Sprintf("%d", e)
 }
 

From a1bc1eb07d1ba611536cf327ab826935fa5f6218 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 27 Sep 2019 06:42:15 +0000
Subject: [PATCH 411/660] Explicitly reallocate slice to avoid up to 2x
 overhead

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 encoding/bigchunk.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index deea2b35ce6aa..66dff5b1cdee8 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -65,6 +65,12 @@ func (b *bigchunk) addNextChunk(start model.Time) error {
 		}
 	}
 
+	// Explicitly reallocate slice to avoid up to 2x overhead if we let append() do it
+	if len(b.chunks)+1 > cap(b.chunks) {
+		newChunks := make([]smallChunk, len(b.chunks), len(b.chunks)+1)
+		copy(newChunks, b.chunks)
+		b.chunks = newChunks
+	}
 	b.chunks = append(b.chunks, smallChunk{
 		XORChunk: *chunkenc.NewXORChunk(),
 		start:    int64(start),

From 21c87e7f3a4f7a456cfedf3d48d502a909bd87bf Mon Sep 17 00:00:00 2001
From: Guangming Wang <guangming.wang@daocloud.io>
Date: Thu, 3 Oct 2019 23:26:35 +0800
Subject: [PATCH 412/660] code cleanup following staticcheck (#1701)

Signed-off-by: Guangming Wang <guangming.wang@daocloud.io>
---
 inmemory_storage_client.go      | 4 +---
 storage/caching_index_client.go | 1 -
 table_manager.go                | 2 +-
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index cd29bf4e34582..56251a19f328f 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -266,9 +266,7 @@ func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func
 	}
 
 	result := mockReadBatch{}
-	for _, item := range items {
-		result.items = append(result.items, item)
-	}
+	result.items = append(result.items, items...)
 
 	callback(&result)
 	return nil
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 563a32c93a644..f1863653b792a 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -233,7 +233,6 @@ func (s *cachingIndexClient) cacheStore(ctx context.Context, keys []string, batc
 	}
 
 	s.cache.Store(ctx, hashed, bufs)
-	return
 }
 
 func (s *cachingIndexClient) cacheFetch(ctx context.Context, keys []string) (batches []ReadBatch, missed []string) {
diff --git a/table_manager.go b/table_manager.go
index 86234cf6d4ecc..ccdb146fad14e 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -152,7 +152,7 @@ func (m *TableManager) Start() {
 	m.wait.Add(1)
 	go m.loop()
 
-	if m.bucketClient != nil && m.cfg.RetentionPeriod != 0 && m.cfg.RetentionDeletesEnabled == true {
+	if m.bucketClient != nil && m.cfg.RetentionPeriod != 0 && m.cfg.RetentionDeletesEnabled {
 		m.wait.Add(1)
 		go m.bucketRetentionLoop()
 	}

From e77f3ef7286076b67c1045a52a0e295ac3b4e653 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Thu, 3 Oct 2019 09:52:38 -0700
Subject: [PATCH 413/660] Add Redis support (#1612)

* adding support for Redis cache

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fix lint errors

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* minor bugfix

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* proper use of pool connections

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fix lint errors

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments
added unit test

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* update unit tests

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* reuse context in request timeout

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use correct function names in logs

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use common config for cache storage

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* added missing module dependency

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* delete extra newline

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* optimize redis SET

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* adding support for Redis cache

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fix lint errors

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* minor bugfix

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* proper use of pool connections

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fix lint errors

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments
added unit test

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* update unit tests

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* reuse context in request timeout

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use correct function names in logs

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use common config for cache storage

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* added missing module dependency

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* delete extra newline

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* optimize redis SET

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* undo common config for storage systems

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* restore changes in CHANGELOG.md

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed lint error

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed mod-check error

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fix metric labels

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/cache.go            |  16 ++++
 cache/redis_cache.go      | 154 ++++++++++++++++++++++++++++++++++++++
 cache/redis_cache_test.go |  88 ++++++++++++++++++++++
 3 files changed, 258 insertions(+)
 create mode 100644 cache/redis_cache.go
 create mode 100644 cache/redis_cache_test.go

diff --git a/cache/cache.go b/cache/cache.go
index bb649529a979a..540b03de4539f 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -2,6 +2,7 @@ package cache
 
 import (
 	"context"
+	"errors"
 	"flag"
 	"time"
 )
@@ -28,6 +29,7 @@ type Config struct {
 	Background     BackgroundConfig      `yaml:"background,omitempty"`
 	Memcache       MemcachedConfig       `yaml:"memcached,omitempty"`
 	MemcacheClient MemcachedClientConfig `yaml:"memcached_client,omitempty"`
+	Redis          RedisConfig           `yaml:"redis,omitempty"`
 	Fifocache      FifoCacheConfig       `yaml:"fifocache,omitempty"`
 
 	// This is to name the cache metrics properly.
@@ -42,6 +44,7 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f
 	cfg.Background.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.Memcache.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.MemcacheClient.RegisterFlagsWithPrefix(prefix, description, f)
+	cfg.Redis.RegisterFlagsWithPrefix(prefix, description, f)
 	cfg.Fifocache.RegisterFlagsWithPrefix(prefix, description, f)
 
 	f.BoolVar(&cfg.EnableFifoCache, prefix+"cache.enable-fifocache", false, description+"Enable in-memory cache.")
@@ -67,6 +70,10 @@ func New(cfg Config) (Cache, error) {
 		caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
 	}
 
+	if cfg.MemcacheClient.Host != "" && cfg.Redis.Endpoint != "" {
+		return nil, errors.New("use of multiple cache storage systems is not supported")
+	}
+
 	if cfg.MemcacheClient.Host != "" {
 		if cfg.Memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
 			cfg.Memcache.Expiration = cfg.DefaultValidity
@@ -79,6 +86,15 @@ func New(cfg Config) (Cache, error) {
 		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
 	}
 
+	if cfg.Redis.Endpoint != "" {
+		if cfg.Redis.Expiration == 0 && cfg.DefaultValidity != 0 {
+			cfg.Redis.Expiration = cfg.DefaultValidity
+		}
+		cacheName := cfg.Prefix + "redis"
+		cache := NewRedisCache(cfg.Redis, cacheName, nil)
+		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
+	}
+
 	cache := NewTiered(caches)
 	if len(caches) > 1 {
 		cache = Instrument(cfg.Prefix+"tiered", cache)
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
new file mode 100644
index 0000000000000..878033b89a944
--- /dev/null
+++ b/cache/redis_cache.go
@@ -0,0 +1,154 @@
+package cache
+
+import (
+	"context"
+	"flag"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/go-kit/kit/log/level"
+	"github.com/gomodule/redigo/redis"
+)
+
+// RedisCache type caches chunks in redis
+type RedisCache struct {
+	name       string
+	expiration int
+	timeout    time.Duration
+	pool       *redis.Pool
+}
+
+// RedisConfig defines how a RedisCache should be constructed.
+type RedisConfig struct {
+	Endpoint       string        `yaml:"endpoint,omitempty"`
+	Timeout        time.Duration `yaml:"timeout,omitempty"`
+	Expiration     time.Duration `yaml:"expiration,omitempty"`
+	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
+	MaxActiveConns int           `yaml:"max_active_conns,omitempty"`
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
+	f.StringVar(&cfg.Endpoint, prefix+"redis.endpoint", "", description+"Redis service endpoint to use when caching chunks. If empty, no redis will be used.")
+	f.DurationVar(&cfg.Timeout, prefix+"redis.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on redis requests.")
+	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
+	f.IntVar(&cfg.MaxIdleConns, prefix+"redis.max-idle-conns", 80, description+"Maximum number of idle connections in pool.")
+	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
+}
+
+// NewRedisCache creates a new RedisCache
+func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
+	// pool != nil only in unit tests
+	if pool == nil {
+		pool = &redis.Pool{
+			MaxIdle:   cfg.MaxIdleConns,
+			MaxActive: cfg.MaxActiveConns,
+			Dial: func() (redis.Conn, error) {
+				c, err := redis.Dial("tcp", cfg.Endpoint)
+				if err != nil {
+					return nil, err
+				}
+				return c, err
+			},
+		}
+	}
+
+	cache := &RedisCache{
+		expiration: int(cfg.Expiration.Seconds()),
+		timeout:    cfg.Timeout,
+		name:       name,
+		pool:       pool,
+	}
+
+	if err := cache.ping(context.Background()); err != nil {
+		level.Error(util.Logger).Log("msg", "error connecting to redis", "endpoint", cfg.Endpoint, "err", err)
+	}
+
+	return cache
+}
+
+// Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
+func (c *RedisCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
+	data, err := c.mget(ctx, keys)
+
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "failed to get from redis", "name", c.name, "err", err)
+		missed = make([]string, len(keys))
+		copy(missed, keys)
+		return
+	}
+	for i, key := range keys {
+		if data[i] != nil {
+			found = append(found, key)
+			bufs = append(bufs, data[i])
+		} else {
+			missed = append(missed, key)
+		}
+	}
+	return
+}
+
+// Store stores the key in the cache.
+func (c *RedisCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
+	err := c.mset(ctx, keys, bufs, c.expiration)
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "failed to put to redis", "name", c.name, "err", err)
+	}
+}
+
+// Stop stops the redis client.
+func (c *RedisCache) Stop() error {
+	return c.pool.Close()
+}
+
+// mset adds key-value pairs to the cache.
+func (c *RedisCache) mset(ctx context.Context, keys []string, bufs [][]byte, ttl int) error {
+	conn := c.pool.Get()
+	defer conn.Close()
+
+	if err := conn.Send("MULTI"); err != nil {
+		return err
+	}
+	for i := range keys {
+		if err := conn.Send("SETEX", keys[i], ttl, bufs[i]); err != nil {
+			return err
+		}
+	}
+	_, err := redis.DoWithTimeout(conn, c.timeout, "EXEC")
+	return err
+}
+
+// mget retrieves values from the cache.
+func (c *RedisCache) mget(ctx context.Context, keys []string) ([][]byte, error) {
+	intf := make([]interface{}, len(keys))
+	for i, key := range keys {
+		intf[i] = key
+	}
+
+	conn := c.pool.Get()
+	defer conn.Close()
+
+	return redis.ByteSlices(redis.DoWithTimeout(conn, c.timeout, "MGET", intf...))
+}
+
+func (c *RedisCache) ping(ctx context.Context) error {
+	conn := c.pool.Get()
+	defer conn.Close()
+
+	pong, err := redis.DoWithTimeout(conn, c.timeout, "PING")
+	if err == nil {
+		_, err = redis.String(pong, err)
+	}
+	return err
+}
+
+func redisStatusCode(err error) string {
+	switch err {
+	case nil:
+		return "200"
+	case redis.ErrNil:
+		return "404"
+	default:
+		return "500"
+	}
+}
diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
new file mode 100644
index 0000000000000..20410a8d9177d
--- /dev/null
+++ b/cache/redis_cache_test.go
@@ -0,0 +1,88 @@
+package cache_test
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/gomodule/redigo/redis"
+	"github.com/rafaeljusto/redigomock"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRedisCache(t *testing.T) {
+	cfg := cache.RedisConfig{
+		Timeout: 10 * time.Millisecond,
+	}
+
+	conn := redigomock.NewConn()
+	conn.Clear()
+	pool := redis.NewPool(func() (redis.Conn, error) {
+		return conn, nil
+	}, 10)
+
+	keys := []string{"key1", "key2", "key3"}
+	bufs := [][]byte{[]byte("data1"), []byte("data2"), []byte("data3")}
+	miss := []string{"miss1", "miss2"}
+
+	// ensure input correctness
+	nHit := len(keys)
+	require.Len(t, bufs, nHit)
+
+	// mock Redis Store
+	mockRedisStore(conn, keys, bufs)
+
+	//mock cache hit
+	keyIntf := make([]interface{}, nHit)
+	bufIntf := make([]interface{}, nHit)
+
+	for i := 0; i < nHit; i++ {
+		keyIntf[i] = keys[i]
+		bufIntf[i] = bufs[i]
+	}
+	conn.Command("MGET", keyIntf...).Expect(bufIntf)
+
+	// mock cache miss
+	nMiss := len(miss)
+	missIntf := make([]interface{}, nMiss)
+	for i, s := range miss {
+		missIntf[i] = s
+	}
+	conn.Command("MGET", missIntf...).ExpectError(nil)
+
+	// mock the cache
+	c := cache.NewRedisCache(cfg, "mock", pool)
+	ctx := context.Background()
+
+	c.Store(ctx, keys, bufs)
+
+	// test hits
+	found, data, missed := c.Fetch(ctx, keys)
+
+	require.Len(t, found, nHit)
+	require.Len(t, missed, 0)
+	for i := 0; i < nHit; i++ {
+		require.Equal(t, keys[i], found[i])
+		require.Equal(t, bufs[i], data[i])
+	}
+
+	// test misses
+	found, _, missed = c.Fetch(ctx, miss)
+
+	require.Len(t, found, 0)
+	require.Len(t, missed, nMiss)
+	for i := 0; i < nMiss; i++ {
+		require.Equal(t, miss[i], missed[i])
+	}
+}
+
+func mockRedisStore(conn *redigomock.Conn, keys []string, bufs [][]byte) {
+	conn.Command("MULTI")
+	ret := []interface{}{}
+	for i := range keys {
+		conn.Command("SETEX", keys[i], 0, bufs[i])
+		ret = append(ret, "OK")
+	}
+	conn.Command("EXEC").Expect(ret)
+}

From 4ce3d0f93448b1eb2955d1ff086fa9d18106b0e9 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Sat, 5 Oct 2019 17:41:36 +0200
Subject: [PATCH 414/660] Fixed on-demand billing mode on DynamoDB table
 creation

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 aws/dynamodb_table_client.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 667b51110aa54..58c000df83254 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -151,11 +151,18 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 						KeyType:       aws.String(dynamodb.KeyTypeRange),
 					},
 				},
-				ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
+			}
+
+			if desc.UseOnDemandIOMode {
+				input.BillingMode = aws.String(dynamodb.BillingModePayPerRequest)
+			} else {
+				input.BillingMode = aws.String(dynamodb.BillingModeProvisioned)
+				input.ProvisionedThroughput = &dynamodb.ProvisionedThroughput{
 					ReadCapacityUnits:  aws.Int64(desc.ProvisionedRead),
 					WriteCapacityUnits: aws.Int64(desc.ProvisionedWrite),
-				},
+				}
 			}
+
 			output, err := d.DynamoDB.CreateTableWithContext(ctx, input)
 			if err != nil {
 				return err

From edc7020196d3d91d242f60ceb2363b12434f5c51 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 4 Oct 2019 10:24:07 +0200
Subject: [PATCH 415/660] Validate schema config to ensure table period is a
 multiple of buckets period

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 schema.go             |   4 +-
 schema_config.go      | 105 ++++++++++++++++++++++++++++++++-------
 schema_config_test.go | 113 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 203 insertions(+), 19 deletions(-)

diff --git a/schema.go b/schema.go
index 4e3142c5e5015..8e2f5b8465dc3 100644
--- a/schema.go
+++ b/schema.go
@@ -78,9 +78,11 @@ type IndexEntry struct {
 	Value []byte
 }
 
+type schemaBucketsFunc func(from, through model.Time, userID string) []Bucket
+
 // schema implements Schema given a bucketing function and and set of range key callbacks
 type schema struct {
-	buckets func(from, through model.Time, userID string) []Bucket
+	buckets schemaBucketsFunc
 	entries entries
 }
 
diff --git a/schema_config.go b/schema_config.go
index 061c4ba39f6c0..1b630a615c809 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"errors"
 	"flag"
 	"fmt"
 	"os"
@@ -23,6 +24,11 @@ const (
 	millisecondsInDay  = int64(24 * time.Hour / time.Millisecond)
 )
 
+var (
+	errInvalidSchemaVersion = errors.New("invalid schema version")
+	errInvalidTablePeriod   = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
+)
+
 // PeriodConfig defines the schema and tables to use for a period of time
 type PeriodConfig struct {
 	From        DayTime             `yaml:"from"`         // used when working with config
@@ -116,8 +122,7 @@ func (cfg *LegacySchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
 }
 
-// translate from command-line parameters into new config data structure
-func (cfg *SchemaConfig) translate() error {
+func (cfg *SchemaConfig) loadFromFlags() error {
 	cfg.Configs = []PeriodConfig{}
 
 	add := func(t string, f model.Time) {
@@ -172,6 +177,30 @@ func (cfg *SchemaConfig) translate() error {
 	return nil
 }
 
+// loadFromFile loads the schema config from a yaml file
+func (cfg *SchemaConfig) loadFromFile() error {
+	f, err := os.Open(cfg.fileName)
+	if err != nil {
+		return err
+	}
+
+	decoder := yaml.NewDecoder(f)
+	decoder.SetStrict(true)
+	return decoder.Decode(&cfg)
+}
+
+// Validate the schema config and returns an error if the validation
+// doesn't pass
+func (cfg *SchemaConfig) Validate() error {
+	for _, periodCfg := range cfg.Configs {
+		if err := periodCfg.validate(); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 // ForEachAfter will call f() on every entry after t, splitting
 // entries if necessary so there is an entry starting at t
 func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
@@ -190,33 +219,70 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() Schema {
-	var s schema
+	var e entries
+
 	switch cfg.Schema {
 	case "v1":
-		s = schema{cfg.hourlyBuckets, originalEntries{}}
+		e = originalEntries{}
 	case "v2":
-		s = schema{cfg.dailyBuckets, originalEntries{}}
+		e = originalEntries{}
 	case "v3":
-		s = schema{cfg.dailyBuckets, base64Entries{originalEntries{}}}
+		e = base64Entries{originalEntries{}}
 	case "v4":
-		s = schema{cfg.dailyBuckets, labelNameInHashKeyEntries{}}
+		e = labelNameInHashKeyEntries{}
 	case "v5":
-		s = schema{cfg.dailyBuckets, v5Entries{}}
+		e = v5Entries{}
 	case "v6":
-		s = schema{cfg.dailyBuckets, v6Entries{}}
+		e = v6Entries{}
 	case "v9":
-		s = schema{cfg.dailyBuckets, v9Entries{}}
+		e = v9Entries{}
 	case "v10":
 		rowShards := uint32(16)
 		if cfg.RowShards > 0 {
 			rowShards = cfg.RowShards
 		}
 
-		s = schema{cfg.dailyBuckets, v10Entries{
+		e = v10Entries{
 			rowShards: rowShards,
-		}}
+		}
+	default:
+		return nil
+	}
+
+	buckets, _ := cfg.createBucketsFunc()
+
+	return schema{buckets, e}
+}
+
+func (cfg PeriodConfig) createBucketsFunc() (schemaBucketsFunc, time.Duration) {
+	switch cfg.Schema {
+	case "v1":
+		return cfg.hourlyBuckets, 1 * time.Hour
+	default:
+		return cfg.dailyBuckets, 24 * time.Hour
+	}
+}
+
+// validate the period config
+func (cfg PeriodConfig) validate() error {
+	// Ensure the schema version exists
+	schema := cfg.CreateSchema()
+	if schema == nil {
+		return errInvalidSchemaVersion
+	}
+
+	// Ensure the tables period is a multiple of the bucket period
+	_, bucketsPeriod := cfg.createBucketsFunc()
+
+	if cfg.IndexTables.Period > 0 && cfg.IndexTables.Period%bucketsPeriod != 0 {
+		return errInvalidTablePeriod
+	}
+
+	if cfg.ChunkTables.Period > 0 && cfg.ChunkTables.Period%bucketsPeriod != 0 {
+		return errInvalidTablePeriod
 	}
-	return s
+
+	return nil
 }
 
 // Load the yaml file, or build the config from legacy command-line flags
@@ -224,18 +290,21 @@ func (cfg *SchemaConfig) Load() error {
 	if len(cfg.Configs) > 0 {
 		return nil
 	}
+
+	// Load config from file (if provided), falling back to CLI flags
+	var err error
+
 	if cfg.fileName == "" {
-		return cfg.translate()
+		err = cfg.loadFromFlags()
+	} else {
+		err = cfg.loadFromFile()
 	}
 
-	f, err := os.Open(cfg.fileName)
 	if err != nil {
 		return err
 	}
 
-	decoder := yaml.NewDecoder(f)
-	decoder.SetStrict(true)
-	return decoder.Decode(&cfg)
+	return cfg.Validate()
 }
 
 // PrintYaml dumps the yaml to stdout, to aid in migration
diff --git a/schema_config_test.go b/schema_config_test.go
index f20587ace0ff0..54af767041890 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -288,6 +288,119 @@ func TestChunkTableFor(t *testing.T) {
 	}
 }
 
+func TestSchemaConfig_Validate(t *testing.T) {
+	t.Parallel()
+
+	tests := map[string]struct {
+		config   *SchemaConfig
+		expected error
+	}{
+		"should pass the default config (ie. used cortex runs with a target not requiring the schema config)": {
+			config:   &SchemaConfig{},
+			expected: nil,
+		},
+		"should fail on invalid schema version": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{Schema: "v0"},
+				},
+			},
+			expected: errInvalidSchemaVersion,
+		},
+		"should fail on index table period not multiple of 1h for schema v1": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v1",
+						IndexTables: PeriodicTableConfig{Period: 30 * time.Minute},
+					},
+				},
+			},
+			expected: errInvalidTablePeriod,
+		},
+		"should fail on chunk table period not multiple of 1h for schema v1": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v1",
+						IndexTables: PeriodicTableConfig{Period: 6 * time.Hour},
+						ChunkTables: PeriodicTableConfig{Period: 30 * time.Minute},
+					},
+				},
+			},
+			expected: errInvalidTablePeriod,
+		},
+		"should pass on index and chunk table period multiple of 1h for schema v1": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v1",
+						IndexTables: PeriodicTableConfig{Period: 6 * time.Hour},
+						ChunkTables: PeriodicTableConfig{Period: 6 * time.Hour},
+					},
+				},
+			},
+			expected: nil,
+		},
+		"should fail on index table period not multiple of 24h for schema v10": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexTables: PeriodicTableConfig{Period: 6 * time.Hour},
+					},
+				},
+			},
+			expected: errInvalidTablePeriod,
+		},
+		"should fail on chunk table period not multiple of 24h for schema v10": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+						ChunkTables: PeriodicTableConfig{Period: 6 * time.Hour},
+					},
+				},
+			},
+			expected: errInvalidTablePeriod,
+		},
+		"should pass on index and chunk table period multiple of 24h for schema v10": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+						ChunkTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			expected: nil,
+		},
+		"should pass on index and chunk table period set to zero (no period tables)": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexTables: PeriodicTableConfig{Period: 0},
+						ChunkTables: PeriodicTableConfig{Period: 0},
+					},
+				},
+			},
+			expected: nil,
+		},
+	}
+
+	for testName, testData := range tests {
+		testData := testData
+
+		t.Run(testName, func(t *testing.T) {
+			actual := testData.config.Validate()
+			assert.Equal(t, testData.expected, actual)
+		})
+	}
+}
+
 func MustParseDayTime(s string) DayTime {
 	t, err := time.Parse("2006-01-02", s)
 	if err != nil {

From 57998dfe7ae32392a2f1114b3a52709204f81f4c Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 25 Sep 2019 21:40:28 +0530
Subject: [PATCH 416/660] Simplify long-term caching

An attempt to fix https://github.com/cortexproject/cortex/issues/1698

We don't mix things when the time-range for the query overlaps the
"active" time-range. We consider all index entries as active. This is
because the `IndexQuery` fields depend on the `from` value and changing
it might mess things up.

This is kinda only effective when paired with query-frontend as most
queries issued fall in the active-range, but the query-frontend with
it's splitting would make sure the queriers actually only see some
queries that are totally in the non-active range.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 schema_caching.go      | 93 +++++++++++++++---------------------------
 schema_caching_test.go |  2 +-
 2 files changed, 33 insertions(+), 62 deletions(-)

diff --git a/schema_caching.go b/schema_caching.go
index a29b00c73d53c..6db7105d93d1a 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -14,81 +14,52 @@ type schemaCaching struct {
 }
 
 func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
-	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
-		return s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
-	})
+	queries, err := s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	if err != nil {
+		return nil, err
+	}
+	return s.setImmutability(from, through, queries), nil
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
-	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
-		return s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
-	})
+	queries, err := s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+	if err != nil {
+		return nil, err
+	}
+	return s.setImmutability(from, through, queries), nil
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
-	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
-		return s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
-	})
+	queries, err := s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+	if err != nil {
+		return nil, err
+	}
+	return s.setImmutability(from, through, queries), nil
 }
 
 // If the query resulted in series IDs, use this method to find chunks.
 func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	return s.splitTimesByCacheability(from, through, func(from, through model.Time) ([]IndexQuery, error) {
-		return s.Schema.GetChunksForSeries(from, through, userID, seriesID)
-	})
-}
-
-func (s *schemaCaching) splitTimesByCacheability(from, through model.Time, f func(from, through model.Time) ([]IndexQuery, error)) ([]IndexQuery, error) {
-	var (
-		cacheableQueries []IndexQuery
-		activeQueries    []IndexQuery
-		err              error
-		cacheBefore      = model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())
-	)
-
-	if from.After(cacheBefore) {
-		activeQueries, err = f(from, through)
-		if err != nil {
-			return nil, err
-		}
-	} else if through.Before(cacheBefore) {
-		cacheableQueries, err = f(from, through)
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		cacheableQueries, err = f(from, cacheBefore)
-		if err != nil {
-			return nil, err
-		}
-
-		activeQueries, err = f(cacheBefore, through)
-		if err != nil {
-			return nil, err
-		}
+	queries, err := s.Schema.GetChunksForSeries(from, through, userID, seriesID)
+	if err != nil {
+		return nil, err
 	}
-
-	return mergeCacheableAndActiveQueries(cacheableQueries, activeQueries), nil
+	return s.setImmutability(from, through, queries), nil
 }
 
-func mergeCacheableAndActiveQueries(cacheableQueries []IndexQuery, activeQueries []IndexQuery) []IndexQuery {
-	finalQueries := make([]IndexQuery, 0, len(cacheableQueries)+len(activeQueries))
-
-Outer:
-	for _, cq := range cacheableQueries {
-		for _, aq := range activeQueries {
-			// When deduping, the bucket values only influence TableName and HashValue
-			// and just checking those is enough.
-			if cq.TableName == aq.TableName && cq.HashValue == aq.HashValue {
-				continue Outer
-			}
+func (s *schemaCaching) setImmutability(from, through model.Time, queries []IndexQuery) []IndexQuery {
+	cacheBefore := model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())
+
+	// If the entire query is cacheable then cache it.
+	// While not super effective stand-alone, when combined with query-frontend and splitting,
+	// old queries will mostly be all behind boundary.
+	// To cleanly split cacheable and non-cacheable ranges, we'd need bucket start and end times
+	// which we don't know.
+	// See: https://github.com/cortexproject/cortex/issues/1698
+	if through.Before(cacheBefore) {
+		for i := range queries {
+			queries[i].Immutable = true
 		}
-
-		cq.Immutable = true
-		finalQueries = append(finalQueries, cq)
 	}
 
-	finalQueries = append(finalQueries, activeQueries...)
-
-	return finalQueries
+	return queries
 }
diff --git a/schema_caching_test.go b/schema_caching_test.go
index c9141eb6b4f85..4cd3d232555e2 100644
--- a/schema_caching_test.go
+++ b/schema_caching_test.go
@@ -55,7 +55,7 @@ func TestCachingSchema(t *testing.T) {
 			// Mix of both.
 			baseTime.Add(-50 * time.Hour),
 			baseTime.Add(-2 * time.Hour),
-			0,
+			-1,
 		},
 	} {
 		t.Run(strconv.Itoa(i), func(t *testing.T) {

From c8488828b452c20734a01c9e0d87eb03ad09e2bd Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Wed, 16 Oct 2019 07:34:16 -0400
Subject: [PATCH 417/660] Cache Bigtable Table Info (#1651)

* cache table info for bigtable tables in tablemanager

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* Add expiration to the bigtable table cache

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix order of adding table to cache

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* refactor based on review comments

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add ListTables comment

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 gcp/bigtable_index_client.go |  6 ++++++
 gcp/table_client.go          | 25 +++++++++++++++++++++----
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 0dfe256d8af21..14ddc1bd6cf37 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -8,6 +8,7 @@ import (
 	"flag"
 	"fmt"
 	"strings"
+	"time"
 
 	"cloud.google.com/go/bigtable"
 	ot "github.com/opentracing/opentracing-go"
@@ -38,12 +39,17 @@ type Config struct {
 
 	ColumnKey      bool
 	DistributeKeys bool
+
+	TableCacheEnabled    bool
+	TableCacheExpiration time.Duration
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.")
 	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
+	f.BoolVar(&cfg.TableCacheEnabled, "bigtable.table-cache.enabled", true, "If enabled, once a tables info is fetched, it is cached.")
+	f.DurationVar(&cfg.TableCacheExpiration, "bigtable.table-cache.expiration", 30*time.Minute, "Duration to cache tables before checking again.")
 
 	cfg.GRPCClientConfig.RegisterFlags("bigtable", f)
 }
diff --git a/gcp/table_client.go b/gcp/table_client.go
index bb6c7e2ca70a8..dd315bdc91b3b 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -2,6 +2,7 @@ package gcp
 
 import (
 	"context"
+	"time"
 
 	"google.golang.org/grpc/codes"
 
@@ -15,6 +16,9 @@ import (
 type tableClient struct {
 	cfg    Config
 	client *bigtable.AdminClient
+
+	tableInfo       map[string]*bigtable.TableInfo
+	tableExpiration time.Time
 }
 
 // NewTableClient returns a new TableClient.
@@ -27,25 +31,37 @@ func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error)
 	return &tableClient{
 		cfg:    cfg,
 		client: client,
+
+		tableInfo: map[string]*bigtable.TableInfo{},
 	}, nil
 }
 
+// ListTables lists all of the correctly specified cortex tables in bigtable
 func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	tables, err := c.client.Tables(ctx)
 	if err != nil {
 		return nil, errors.Wrap(err, "client.Tables")
 	}
 
-	// Check each table has the right column family.  If not, omit it.
+	if c.tableExpiration.Before(time.Now()) {
+		c.tableInfo = map[string]*bigtable.TableInfo{}
+		c.tableExpiration = time.Now().Add(c.cfg.TableCacheExpiration)
+	}
+
 	output := make([]string, 0, len(tables))
 	for _, table := range tables {
-		info, err := c.client.TableInfo(ctx, table)
-		if err != nil {
-			return nil, errors.Wrap(err, "client.TableInfo")
+		info, exists := c.tableInfo[table]
+		if !c.cfg.TableCacheEnabled || !exists {
+			info, err = c.client.TableInfo(ctx, table)
+			if err != nil {
+				return nil, errors.Wrap(err, "client.TableInfo")
+			}
 		}
 
+		// Check each table has the right column family.  If not, omit it.
 		if hasColumnFamily(info.FamilyInfos) {
 			output = append(output, table)
+			c.tableInfo[table] = info
 		}
 	}
 
@@ -86,6 +102,7 @@ func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
 	if err := c.client.DeleteTable(ctx, name); err != nil {
 		return errors.Wrap(err, "client.DeleteTable")
 	}
+	delete(c.tableInfo, name)
 
 	return nil
 }

From 7910fa4be077aacfbd8b76503ac08a447a8aebe2 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Date: Fri, 18 Oct 2019 19:31:40 +0200
Subject: [PATCH 418/660] Refactor enocding.Chunk.Add (#1706)

* Refactor enocding.Chunk.Add

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Undo pointer-to-slice changes in varbit

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Error on setting Delta encoding

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Don't replace Delta with DoubleDelta

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 cache/cache_test.go       |   9 +-
 chunk_store_test.go       |  18 +-
 chunk_test.go             |   6 +-
 encoding/bigchunk.go      |   6 +-
 encoding/bigchunk_test.go |   8 +-
 encoding/chunk.go         |  39 +++--
 encoding/chunk_test.go    |  12 +-
 encoding/delta.go         | 355 --------------------------------------
 encoding/delta_test.go    | 113 ------------
 encoding/doubledelta.go   | 102 ++++++-----
 encoding/factory.go       |  19 +-
 encoding/varbit.go        | 112 ++++++++----
 testutils/testutils.go    |   5 +-
 13 files changed, 216 insertions(+), 588 deletions(-)
 delete mode 100644 encoding/delta.go
 delete mode 100644 encoding/delta_test.go

diff --git a/cache/cache_test.go b/cache/cache_test.go
index 8ef1d4c93319f..37664adad06af 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -27,10 +27,13 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 	chunks := []chunk.Chunk{}
 	for i := 0; i < 100; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		promChunk, _ := prom_chunk.New().Add(model.SamplePair{
+		promChunk := prom_chunk.New()
+		nc, err := promChunk.Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(i),
 		})
+		require.NoError(t, err)
+		require.Nil(t, nc)
 		c := chunk.NewChunk(
 			userID,
 			model.Fingerprint(1),
@@ -38,12 +41,12 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 				{Name: model.MetricNameLabel, Value: "foo"},
 				{Name: "bar", Value: "baz"},
 			},
-			promChunk[0],
+			promChunk,
 			ts,
 			ts.Add(chunkLen),
 		)
 
-		err := c.Encode()
+		err = c.Encode()
 		require.NoError(t, err)
 		buf, err := c.Encoded()
 		require.NoError(t, err)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 80d380d7aaa33..00e943bff2a6d 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -579,10 +579,13 @@ func TestChunkStoreRandom(t *testing.T) {
 			const chunkLen = 2 * 3600 // in seconds
 			for i := 0; i < 100; i++ {
 				ts := model.TimeFromUnix(int64(i * chunkLen))
-				chunks, _ := encoding.New().Add(model.SamplePair{
+				ch := encoding.New()
+				nc, err := ch.Add(model.SamplePair{
 					Timestamp: ts,
 					Value:     model.SampleValue(float64(i)),
 				})
+				require.NoError(t, err)
+				require.Nil(t, nc)
 				chunk := NewChunk(
 					userID,
 					model.Fingerprint(1),
@@ -590,11 +593,11 @@ func TestChunkStoreRandom(t *testing.T) {
 						{Name: labels.MetricName, Value: "foo"},
 						{Name: "bar", Value: "baz"},
 					},
-					chunks[0],
+					ch,
 					ts,
 					ts.Add(chunkLen*time.Second).Add(-1*time.Second),
 				)
-				err := chunk.Encode()
+				err = chunk.Encode()
 				require.NoError(t, err)
 				err = store.Put(ctx, []Chunk{chunk})
 				require.NoError(t, err)
@@ -644,10 +647,13 @@ func TestChunkStoreLeastRead(t *testing.T) {
 	const chunkLen = 60 // in seconds
 	for i := 0; i < 24; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
-		chunks, _ := encoding.New().Add(model.SamplePair{
+		ch := encoding.New()
+		nc, err := ch.Add(model.SamplePair{
 			Timestamp: ts,
 			Value:     model.SampleValue(float64(i)),
 		})
+		require.NoError(t, err)
+		require.Nil(t, nc)
 		chunk := NewChunk(
 			userID,
 			model.Fingerprint(1),
@@ -655,12 +661,12 @@ func TestChunkStoreLeastRead(t *testing.T) {
 				{Name: labels.MetricName, Value: "foo"},
 				{Name: "bar", Value: "baz"},
 			},
-			chunks[0],
+			ch,
 			ts,
 			ts.Add(chunkLen*time.Second),
 		)
 		t.Logf("Loop %d", i)
-		err := chunk.Encode()
+		err = chunk.Encode()
 		require.NoError(t, err)
 		err = store.Put(ctx, []Chunk{chunk})
 		require.NoError(t, err)
diff --git a/chunk_test.go b/chunk_test.go
index 8a10bb72a1d46..c040ba31c3dcf 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -36,11 +36,13 @@ func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.En
 	c, _ := encoding.NewForEncoding(enc)
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
-		cs, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
+		nc, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
 		if err != nil {
 			panic(err)
 		}
-		c = cs[0]
+		if nc != nil {
+			panic("returned chunk was not nil")
+		}
 	}
 	chunk := NewChunk(
 		userID,
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 66dff5b1cdee8..8683ebc5a00bc 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -32,10 +32,10 @@ func newBigchunk() *bigchunk {
 	return &bigchunk{}
 }
 
-func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
+func (b *bigchunk) Add(sample model.SamplePair) (Chunk, error) {
 	if b.remainingSamples == 0 {
 		if bigchunkSizeCapBytes > 0 && b.Size() > bigchunkSizeCapBytes {
-			return addToOverflowChunk(b, sample)
+			return addToOverflowChunk(sample)
 		}
 		if err := b.addNextChunk(sample.Timestamp); err != nil {
 			return nil, err
@@ -44,7 +44,7 @@ func (b *bigchunk) Add(sample model.SamplePair) ([]Chunk, error) {
 
 	b.appender.Append(int64(sample.Timestamp), float64(sample.Value))
 	b.remainingSamples--
-	return []Chunk{b}, nil
+	return nil, nil
 }
 
 // addNextChunk adds a new XOR "subchunk" to the internal list of chunks.
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index 7e5e6e8761cc5..b0c2db12d846d 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -12,12 +12,12 @@ import (
 func TestSliceBiggerChunk(t *testing.T) {
 	var c Chunk = newBigchunk()
 	for i := 0; i < 12*3600/15; i++ {
-		cs, err := c.Add(model.SamplePair{
+		nc, err := c.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
 		})
 		require.NoError(t, err)
-		c = cs[0]
+		require.Nil(t, nc)
 	}
 
 	// Test for when the slice aligns perfectly with the sub-chunk boundaries.
@@ -69,12 +69,12 @@ func BenchmarkBiggerChunkMemory(b *testing.B) {
 	for i := 0; i < b.N; i++ {
 		var c Chunk = newBigchunk()
 		for i := 0; i < 12*3600/15; i++ {
-			cs, err := c.Add(model.SamplePair{
+			nc, err := c.Add(model.SamplePair{
 				Timestamp: model.Time(i * step),
 				Value:     model.SampleValue(i),
 			})
 			require.NoError(b, err)
-			c = cs[0]
+			require.Nil(b, nc)
 		}
 
 		c.(*bigchunk).printSize()
diff --git a/encoding/chunk.go b/encoding/chunk.go
index f36e4d3597f20..c5963ee5c34fa 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -38,12 +38,10 @@ var (
 // goroutine-safe.
 type Chunk interface {
 	// Add adds a SamplePair to the chunks, performs any necessary
-	// re-encoding, and adds any necessary overflow chunks. It returns the
-	// new version of the original chunk, followed by overflow chunks, if
-	// any. The first chunk returned might be the same as the original one
-	// or a newly allocated version. In any case, take the returned chunk as
-	// the relevant one and discard the original chunk.
-	Add(sample model.SamplePair) ([]Chunk, error)
+	// re-encoding, and creates any necessary overflow chunk.
+	// The returned Chunk is the overflow chunk if it was created.
+	// The returned Chunk is nil if the sample got appended to the same chunk.
+	Add(sample model.SamplePair) (Chunk, error)
 	// NewIterator returns an iterator for the chunks.
 	// The iterator passed as argument is for re-use. Depending on implementation,
 	// the iterator can be re-used or a new iterator can be allocated.
@@ -123,12 +121,13 @@ func RangeValues(it Iterator, in metric.Interval) ([]model.SamplePair, error) {
 // addToOverflowChunk is a utility function that creates a new chunk as overflow
 // chunk, adds the provided sample to it, and returns a chunk slice containing
 // the provided old chunk followed by the new overflow chunk.
-func addToOverflowChunk(c Chunk, s model.SamplePair) ([]Chunk, error) {
-	overflowChunks, err := New().Add(s)
+func addToOverflowChunk(s model.SamplePair) (Chunk, error) {
+	overflowChunk := New()
+	_, err := overflowChunk.Add(s)
 	if err != nil {
 		return nil, err
 	}
-	return []Chunk{c, overflowChunks[0]}, nil
+	return overflowChunk, nil
 }
 
 // transcodeAndAdd is a utility function that transcodes the dst chunk into the
@@ -139,27 +138,33 @@ func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error)
 	Ops.WithLabelValues(Transcode).Inc()
 
 	var (
-		head            = dst
-		body, NewChunks []Chunk
-		err             error
+		head     = dst
+		newChunk Chunk
+		body     = []Chunk{head}
+		err      error
 	)
 
 	it := src.NewIterator(nil)
 	for it.Scan() {
-		if NewChunks, err = head.Add(it.Value()); err != nil {
+		if newChunk, err = head.Add(it.Value()); err != nil {
 			return nil, err
 		}
-		body = append(body, NewChunks[:len(NewChunks)-1]...)
-		head = NewChunks[len(NewChunks)-1]
+		if newChunk != nil {
+			body = append(body, newChunk)
+			head = newChunk
+		}
 	}
 	if it.Err() != nil {
 		return nil, it.Err()
 	}
 
-	if NewChunks, err = head.Add(s); err != nil {
+	if newChunk, err = head.Add(s); err != nil {
 		return nil, err
 	}
-	return append(body, NewChunks...), nil
+	if newChunk != nil {
+		body = append(body, newChunk)
+	}
+	return body, nil
 }
 
 // indexAccessor allows accesses to samples by index.
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index c7bf8a3c0f197..f3038941d8f92 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -29,7 +29,7 @@ import (
 
 func TestLen(t *testing.T) {
 	chunks := []Chunk{}
-	for _, encoding := range []Encoding{Delta, DoubleDelta, Varbit} {
+	for _, encoding := range []Encoding{DoubleDelta, Varbit, Bigchunk} {
 		c, err := NewForEncoding(encoding)
 		if err != nil {
 			t.Fatal(err)
@@ -43,11 +43,12 @@ func TestLen(t *testing.T) {
 				t.Errorf("chunk type %s should have %d samples, had %d", c.Encoding(), i, c.Len())
 			}
 
-			cs, _ := c.Add(model.SamplePair{
+			cs, err := c.Add(model.SamplePair{
 				Timestamp: model.Time(i),
 				Value:     model.SampleValue(i),
 			})
-			c = cs[0]
+			require.NoError(t, err)
+			require.Nil(t, cs)
 		}
 	}
 }
@@ -95,13 +96,12 @@ func mkChunk(t *testing.T, encoding Encoding, samples int) Chunk {
 	require.NoError(t, err)
 
 	for i := 0; i < samples; i++ {
-		chunks, err := chunk.Add(model.SamplePair{
+		newChunk, err := chunk.Add(model.SamplePair{
 			Timestamp: model.Time(i * step),
 			Value:     model.SampleValue(i),
 		})
 		require.NoError(t, err)
-		require.Len(t, chunks, 1)
-		chunk = chunks[0]
+		require.Nil(t, newChunk)
 	}
 
 	return chunk
diff --git a/encoding/delta.go b/encoding/delta.go
deleted file mode 100644
index 120f734c363e3..0000000000000
--- a/encoding/delta.go
+++ /dev/null
@@ -1,355 +0,0 @@
-// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
-// The original license header is included below:
-//
-// Copyright 2014 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package encoding
-
-import (
-	"encoding/binary"
-	"fmt"
-	"io"
-	"math"
-
-	"github.com/prometheus/common/model"
-)
-
-// The 21-byte header of a delta-encoded chunk looks like:
-//
-// - time delta bytes:  1 bytes
-// - value delta bytes: 1 bytes
-// - is integer:        1 byte
-// - base time:         8 bytes
-// - base value:        8 bytes
-// - used buf bytes:    2 bytes
-const (
-	deltaHeaderBytes = 21
-
-	deltaHeaderTimeBytesOffset  = 0
-	deltaHeaderValueBytesOffset = 1
-	deltaHeaderIsIntOffset      = 2
-	deltaHeaderBaseTimeOffset   = 3
-	deltaHeaderBaseValueOffset  = 11
-	deltaHeaderBufLenOffset     = 19
-)
-
-// A deltaEncodedChunk adaptively stores sample timestamps and values with a
-// delta encoding of various types (int, float) and bit widths. However, once 8
-// bytes would be needed to encode a delta value, a fall-back to the absolute
-// numbers happens (so that timestamps are saved directly as int64 and values as
-// float64). It implements the chunk interface.
-type deltaEncodedChunk []byte
-
-// newDeltaEncodedChunk returns a newly allocated deltaEncodedChunk.
-func newDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *deltaEncodedChunk {
-	if tb < 1 {
-		panic("need at least 1 time delta byte")
-	}
-	if length < deltaHeaderBytes+16 {
-		panic(fmt.Errorf(
-			"chunk length %d bytes is insufficient, need at least %d",
-			length, deltaHeaderBytes+16,
-		))
-	}
-	c := make(deltaEncodedChunk, deltaHeaderIsIntOffset+1, length)
-
-	c[deltaHeaderTimeBytesOffset] = byte(tb)
-	c[deltaHeaderValueBytesOffset] = byte(vb)
-	if vb < d8 && isInt { // Only use int for fewer than 8 value delta bytes.
-		c[deltaHeaderIsIntOffset] = 1
-	} else {
-		c[deltaHeaderIsIntOffset] = 0
-	}
-
-	return &c
-}
-
-// Add implements chunk.
-func (c deltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
-	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
-	if c.Len() == 0 {
-		c = c[:deltaHeaderBytes]
-		binary.LittleEndian.PutUint64(c[deltaHeaderBaseTimeOffset:], uint64(s.Timestamp))
-		binary.LittleEndian.PutUint64(c[deltaHeaderBaseValueOffset:], math.Float64bits(float64(s.Value)))
-	}
-
-	remainingBytes := cap(c) - len(c)
-	sampleSize := c.sampleSize()
-
-	// Do we generally have space for another sample in this chunk? If not,
-	// overflow into a new one.
-	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s)
-	}
-
-	baseValue := c.baseValue()
-	dt := s.Timestamp - c.baseTime()
-	if dt < 0 {
-		return nil, fmt.Errorf("time delta is less than zero: %v", dt)
-	}
-
-	dv := s.Value - baseValue
-	tb := c.timeBytes()
-	vb := c.valueBytes()
-	isInt := c.isInt()
-
-	// If the new sample is incompatible with the current encoding, reencode the
-	// existing chunk data into new chunk(s).
-
-	ntb, nvb, nInt := tb, vb, isInt
-	if isInt && !isInt64(dv) {
-		// int->float.
-		nvb = d4
-		nInt = false
-	} else if !isInt && vb == d4 && baseValue+model.SampleValue(float32(dv)) != s.Value {
-		// float32->float64.
-		nvb = d8
-	} else {
-		if tb < d8 {
-			// Maybe more bytes for timestamp.
-			ntb = max(tb, bytesNeededForUnsignedTimestampDelta(dt))
-		}
-		if c.isInt() && vb < d8 {
-			// Maybe more bytes for sample value.
-			nvb = max(vb, bytesNeededForIntegerSampleValueDelta(dv))
-		}
-	}
-	if tb != ntb || vb != nvb || isInt != nInt {
-		if len(c)*2 < cap(c) {
-			return transcodeAndAdd(newDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
-		}
-		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s)
-	}
-
-	offset := len(c)
-	c = c[:offset+sampleSize]
-
-	switch tb {
-	case d1:
-		c[offset] = byte(dt)
-	case d2:
-		binary.LittleEndian.PutUint16(c[offset:], uint16(dt))
-	case d4:
-		binary.LittleEndian.PutUint32(c[offset:], uint32(dt))
-	case d8:
-		// Store the absolute value (no delta) in case of d8.
-		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
-	default:
-		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
-	}
-
-	offset += int(tb)
-
-	if c.isInt() {
-		switch vb {
-		case d0:
-			// No-op. Constant value is stored as base value.
-		case d1:
-			c[offset] = byte(int8(dv))
-		case d2:
-			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(dv)))
-		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(dv)))
-		// d8 must not happen. Those samples are encoded as float64.
-		default:
-			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
-		}
-	} else {
-		switch vb {
-		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(dv)))
-		case d8:
-			// Store the absolute value (no delta) in case of d8.
-			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
-		default:
-			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
-		}
-	}
-	return []Chunk{&c}, nil
-}
-
-func (c *deltaEncodedChunk) Slice(_, _ model.Time) Chunk {
-	return c
-}
-
-// NewIterator implements chunk.
-func (c *deltaEncodedChunk) NewIterator(_ Iterator) Iterator {
-	return newIndexAccessingChunkIterator(c.Len(), &deltaEncodedIndexAccessor{
-		c:      *c,
-		baseT:  c.baseTime(),
-		baseV:  c.baseValue(),
-		tBytes: c.timeBytes(),
-		vBytes: c.valueBytes(),
-		isInt:  c.isInt(),
-	})
-}
-
-// Marshal implements chunk.
-func (c deltaEncodedChunk) Marshal(w io.Writer) error {
-	if len(c) > math.MaxUint16 {
-		panic("chunk buffer length would overflow a 16 bit uint.")
-	}
-	binary.LittleEndian.PutUint16(c[deltaHeaderBufLenOffset:], uint16(len(c)))
-
-	n, err := w.Write(c[:cap(c)])
-	if err != nil {
-		return err
-	}
-	if n != cap(c) {
-		return fmt.Errorf("wanted to write %d bytes, wrote %d", cap(c), n)
-	}
-	return nil
-}
-
-// UnmarshalFromBuf implements chunk.
-func (c *deltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
-	*c = (*c)[:cap(*c)]
-	copy(*c, buf)
-	return c.setLen()
-}
-
-// setLen sets the length of the underlying slice and performs some sanity checks.
-func (c *deltaEncodedChunk) setLen() error {
-	l := binary.LittleEndian.Uint16((*c)[deltaHeaderBufLenOffset:])
-	if int(l) > cap(*c) {
-		return fmt.Errorf("delta chunk length exceeded during unmarshalling: %d", l)
-	}
-	if int(l) < deltaHeaderBytes {
-		return fmt.Errorf("delta chunk length less than header size: %d < %d", l, deltaHeaderBytes)
-	}
-	switch c.timeBytes() {
-	case d1, d2, d4, d8:
-		// Pass.
-	default:
-		return fmt.Errorf("invalid number of time bytes in delta chunk: %d", c.timeBytes())
-	}
-	switch c.valueBytes() {
-	case d0, d1, d2, d4, d8:
-		// Pass.
-	default:
-		return fmt.Errorf("invalid number of value bytes in delta chunk: %d", c.valueBytes())
-	}
-	*c = (*c)[:l]
-	return nil
-}
-
-// Encoding implements chunk.
-func (c deltaEncodedChunk) Encoding() Encoding { return Delta }
-
-// Utilization implements chunk.
-func (c deltaEncodedChunk) Utilization() float64 {
-	return float64(len(c)) / float64(cap(c))
-}
-
-func (c deltaEncodedChunk) timeBytes() deltaBytes {
-	return deltaBytes(c[deltaHeaderTimeBytesOffset])
-}
-
-func (c deltaEncodedChunk) valueBytes() deltaBytes {
-	return deltaBytes(c[deltaHeaderValueBytesOffset])
-}
-
-func (c deltaEncodedChunk) isInt() bool {
-	return c[deltaHeaderIsIntOffset] == 1
-}
-
-func (c deltaEncodedChunk) baseTime() model.Time {
-	return model.Time(binary.LittleEndian.Uint64(c[deltaHeaderBaseTimeOffset:]))
-}
-
-func (c deltaEncodedChunk) baseValue() model.SampleValue {
-	return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(c[deltaHeaderBaseValueOffset:])))
-}
-
-func (c deltaEncodedChunk) sampleSize() int {
-	return int(c.timeBytes() + c.valueBytes())
-}
-
-// Len implements Chunk. Runs in constant time.
-func (c deltaEncodedChunk) Len() int {
-	if len(c) < deltaHeaderBytes {
-		return 0
-	}
-	return (len(c) - deltaHeaderBytes) / c.sampleSize()
-}
-
-func (c deltaEncodedChunk) Size() int {
-	return len(c)
-}
-
-// deltaEncodedIndexAccessor implements indexAccessor.
-type deltaEncodedIndexAccessor struct {
-	c              deltaEncodedChunk
-	baseT          model.Time
-	baseV          model.SampleValue
-	tBytes, vBytes deltaBytes
-	isInt          bool
-	lastErr        error
-}
-
-func (acc *deltaEncodedIndexAccessor) err() error {
-	return acc.lastErr
-}
-
-func (acc *deltaEncodedIndexAccessor) timestampAtIndex(idx int) model.Time {
-	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes)
-
-	switch acc.tBytes {
-	case d1:
-		return acc.baseT + model.Time(uint8(acc.c[offset]))
-	case d2:
-		return acc.baseT + model.Time(binary.LittleEndian.Uint16(acc.c[offset:]))
-	case d4:
-		return acc.baseT + model.Time(binary.LittleEndian.Uint32(acc.c[offset:]))
-	case d8:
-		// Take absolute value for d8.
-		return model.Time(binary.LittleEndian.Uint64(acc.c[offset:]))
-	default:
-		acc.lastErr = fmt.Errorf("invalid number of bytes for time delta: %d", acc.tBytes)
-		return model.Earliest
-	}
-}
-
-func (acc *deltaEncodedIndexAccessor) sampleValueAtIndex(idx int) model.SampleValue {
-	offset := deltaHeaderBytes + idx*int(acc.tBytes+acc.vBytes) + int(acc.tBytes)
-
-	if acc.isInt {
-		switch acc.vBytes {
-		case d0:
-			return acc.baseV
-		case d1:
-			return acc.baseV + model.SampleValue(int8(acc.c[offset]))
-		case d2:
-			return acc.baseV + model.SampleValue(int16(binary.LittleEndian.Uint16(acc.c[offset:])))
-		case d4:
-			return acc.baseV + model.SampleValue(int32(binary.LittleEndian.Uint32(acc.c[offset:])))
-		// No d8 for ints.
-		default:
-			acc.lastErr = fmt.Errorf("invalid number of bytes for integer delta: %d", acc.vBytes)
-			return 0
-		}
-	} else {
-		switch acc.vBytes {
-		case d4:
-			return acc.baseV + model.SampleValue(math.Float32frombits(binary.LittleEndian.Uint32(acc.c[offset:])))
-		case d8:
-			// Take absolute value for d8.
-			return model.SampleValue(math.Float64frombits(binary.LittleEndian.Uint64(acc.c[offset:])))
-		default:
-			acc.lastErr = fmt.Errorf("invalid number of bytes for floating point delta: %d", acc.vBytes)
-			return 0
-		}
-	}
-}
diff --git a/encoding/delta_test.go b/encoding/delta_test.go
deleted file mode 100644
index 3c014c60e53da..0000000000000
--- a/encoding/delta_test.go
+++ /dev/null
@@ -1,113 +0,0 @@
-// This file was taken from Prometheus (https://github.com/prometheus/prometheus).
-// The original license header is included below:
-//
-// Copyright 2016 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Note: this file has tests for code in both delta.go and doubledelta.go --
-// it may make sense to split those out later, but given that the tests are
-// near-identical and share a helper, this feels simpler for now.
-
-package encoding
-
-import (
-	"bytes"
-	"encoding/binary"
-	"strings"
-	"testing"
-
-	"github.com/prometheus/common/model"
-)
-
-func TestUnmarshallingCorruptedDeltaReturnsAnError(t *testing.T) {
-
-	var verifyUnmarshallingError = func(
-		err error,
-		chunkTypeName string,
-		unmarshalMethod string,
-		expectedStr string,
-	) {
-
-		if err == nil {
-			t.Errorf("Failed to obtain an error when unmarshalling corrupt %s (from %s)", chunkTypeName, unmarshalMethod)
-			return
-		}
-
-		if !strings.Contains(err.Error(), expectedStr) {
-			t.Errorf(
-				"'%s' not present in error when unmarshalling corrupt %s (from %s): '%s'",
-				expectedStr,
-				chunkTypeName,
-				unmarshalMethod,
-				err.Error())
-		}
-	}
-
-	cases := []struct {
-		chunkTypeName    string
-		chunkConstructor func(deltaBytes, deltaBytes, bool, int) Chunk
-		minHeaderLen     int
-		chunkLenPos      int
-		timeBytesPos     int
-	}{
-		{
-			chunkTypeName: "deltaEncodedChunk",
-			chunkConstructor: func(a, b deltaBytes, c bool, d int) Chunk {
-				return newDeltaEncodedChunk(a, b, c, d)
-			},
-			minHeaderLen: deltaHeaderBytes,
-			chunkLenPos:  deltaHeaderBufLenOffset,
-			timeBytesPos: deltaHeaderTimeBytesOffset,
-		},
-		{
-			chunkTypeName: "doubleDeltaEncodedChunk",
-			chunkConstructor: func(a, b deltaBytes, c bool, d int) Chunk {
-				return newDoubleDeltaEncodedChunk(a, b, c, d)
-			},
-			minHeaderLen: doubleDeltaHeaderMinBytes,
-			chunkLenPos:  doubleDeltaHeaderBufLenOffset,
-			timeBytesPos: doubleDeltaHeaderTimeBytesOffset,
-		},
-	}
-	for _, c := range cases {
-		chunk := c.chunkConstructor(d1, d4, false, ChunkLen)
-
-		cs, err := chunk.Add(model.SamplePair{
-			Timestamp: model.Now(),
-			Value:     model.SampleValue(100),
-		})
-		if err != nil {
-			t.Fatalf("Couldn't add sample to empty %s: %s", c.chunkTypeName, err)
-		}
-
-		var writer bytes.Buffer
-		cs[0].Marshal(&writer)
-
-		// Corrupt time byte to 0, which is illegal.
-		buf := writer.Bytes()
-		buf[c.timeBytesPos] = 0
-		err = cs[0].UnmarshalFromBuf(buf)
-		verifyUnmarshallingError(err, c.chunkTypeName, "buf", "invalid number of time bytes")
-
-		// Fix the corruption to go on.
-		buf[c.timeBytesPos] = byte(d1)
-
-		// Corrupt the length to be every possible too-small value
-		for i := 0; i < c.minHeaderLen; i++ {
-			binary.LittleEndian.PutUint16(buf[c.chunkLenPos:], uint16(i))
-
-			err = cs[0].UnmarshalFromBuf(buf)
-			verifyUnmarshallingError(err, c.chunkTypeName, "buf", "header size")
-		}
-	}
-}
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index b9ca6f0402b8c..683ce844eef6a 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -84,26 +84,28 @@ func newDoubleDeltaEncodedChunk(tb, vb deltaBytes, isInt bool, length int) *doub
 }
 
 // Add implements chunk.
-func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
+func (c *doubleDeltaEncodedChunk) Add(s model.SamplePair) (Chunk, error) {
 	// TODO(beorn7): Since we return &c, this method might cause an unnecessary allocation.
 	if c.Len() == 0 {
-		return c.addFirstSample(s), nil
+		c.addFirstSample(s)
+		return nil, nil
 	}
 
 	tb := c.timeBytes()
 	vb := c.valueBytes()
 
 	if c.Len() == 1 {
-		return c.addSecondSample(s, tb, vb)
+		err := c.addSecondSample(s, tb, vb)
+		return nil, err
 	}
 
-	remainingBytes := cap(c) - len(c)
+	remainingBytes := cap(*c) - len(*c)
 	sampleSize := c.sampleSize()
 
 	// Do we generally have space for another sample in this chunk? If not,
 	// overflow into a new one.
 	if remainingBytes < sampleSize {
-		return addToOverflowChunk(&c, s)
+		return addToOverflowChunk(s)
 	}
 
 	projectedTime := c.baseTime() + model.Time(c.Len())*c.baseTimeDelta()
@@ -133,26 +135,47 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 		}
 	}
 	if tb != ntb || vb != nvb || c.isInt() != nInt {
-		if len(c)*2 < cap(c) {
-			return transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(c)), &c, s)
+		if len(*c)*2 < cap(*c) {
+			result, err := transcodeAndAdd(newDoubleDeltaEncodedChunk(ntb, nvb, nInt, cap(*c)), c, s)
+			if err != nil {
+				return nil, err
+			}
+			// We cannot handle >2 chunks returned as we can only return 1 chunk.
+			// Ideally there wont be >2 chunks, but if it happens to be >2,
+			// we fall through to perfom `addToOverflowChunk` instead.
+			if len(result) == 1 {
+				// Replace the current chunk with the new bigger chunk.
+				c0 := result[0].(*doubleDeltaEncodedChunk)
+				*c = *c0
+				return nil, nil
+			} else if len(result) == 2 {
+				// Replace the current chunk with the new bigger chunk
+				// and return the additional chunk.
+				c0 := result[0].(*doubleDeltaEncodedChunk)
+				c1 := result[1].(*doubleDeltaEncodedChunk)
+				*c = *c0
+				return c1, nil
+			}
 		}
+
 		// Chunk is already half full. Better create a new one and save the transcoding efforts.
-		return addToOverflowChunk(&c, s)
+		// We also perform this if `transcodeAndAdd` resulted in >2 chunks.
+		return addToOverflowChunk(s)
 	}
 
-	offset := len(c)
-	c = c[:offset+sampleSize]
+	offset := len(*c)
+	(*c) = (*c)[:offset+sampleSize]
 
 	switch tb {
 	case d1:
-		c[offset] = byte(ddt)
+		(*c)[offset] = byte(ddt)
 	case d2:
-		binary.LittleEndian.PutUint16(c[offset:], uint16(ddt))
+		binary.LittleEndian.PutUint16((*c)[offset:], uint16(ddt))
 	case d4:
-		binary.LittleEndian.PutUint32(c[offset:], uint32(ddt))
+		binary.LittleEndian.PutUint32((*c)[offset:], uint32(ddt))
 	case d8:
 		// Store the absolute value (no delta) in case of d8.
-		binary.LittleEndian.PutUint64(c[offset:], uint64(s.Timestamp))
+		binary.LittleEndian.PutUint64((*c)[offset:], uint64(s.Timestamp))
 	default:
 		return nil, fmt.Errorf("invalid number of bytes for time delta: %d", tb)
 	}
@@ -164,11 +187,11 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 		case d0:
 			// No-op. Constant delta is stored as base value.
 		case d1:
-			c[offset] = byte(int8(ddv))
+			(*c)[offset] = byte(int8(ddv))
 		case d2:
-			binary.LittleEndian.PutUint16(c[offset:], uint16(int16(ddv)))
+			binary.LittleEndian.PutUint16((*c)[offset:], uint16(int16(ddv)))
 		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], uint32(int32(ddv)))
+			binary.LittleEndian.PutUint32((*c)[offset:], uint32(int32(ddv)))
 		// d8 must not happen. Those samples are encoded as float64.
 		default:
 			return nil, fmt.Errorf("invalid number of bytes for integer delta: %d", vb)
@@ -176,15 +199,15 @@ func (c doubleDeltaEncodedChunk) Add(s model.SamplePair) ([]Chunk, error) {
 	} else {
 		switch vb {
 		case d4:
-			binary.LittleEndian.PutUint32(c[offset:], math.Float32bits(float32(ddv)))
+			binary.LittleEndian.PutUint32((*c)[offset:], math.Float32bits(float32(ddv)))
 		case d8:
 			// Store the absolute value (no delta) in case of d8.
-			binary.LittleEndian.PutUint64(c[offset:], math.Float64bits(float64(s.Value)))
+			binary.LittleEndian.PutUint64((*c)[offset:], math.Float64bits(float64(s.Value)))
 		default:
 			return nil, fmt.Errorf("invalid number of bytes for floating point delta: %d", vb)
 		}
 	}
-	return []Chunk{&c}, nil
+	return nil, nil
 }
 
 // FirstTime implements chunk.
@@ -243,15 +266,15 @@ func (c doubleDeltaEncodedChunk) MarshalToBuf(buf []byte) error {
 
 // UnmarshalFromBuf implements chunk.
 func (c *doubleDeltaEncodedChunk) UnmarshalFromBuf(buf []byte) error {
-	*c = (*c)[:cap(*c)]
-	copy(*c, buf)
+	(*c) = (*c)[:cap((*c))]
+	copy((*c), buf)
 	return c.setLen()
 }
 
 // setLen sets the length of the underlying slice and performs some sanity checks.
 func (c *doubleDeltaEncodedChunk) setLen() error {
 	l := binary.LittleEndian.Uint16((*c)[doubleDeltaHeaderBufLenOffset:])
-	if int(l) > cap(*c) {
+	if int(l) > cap((*c)) {
 		return fmt.Errorf("doubledelta chunk length exceeded during unmarshalling: %d", l)
 	}
 	if int(l) < doubleDeltaHeaderMinBytes {
@@ -269,7 +292,7 @@ func (c *doubleDeltaEncodedChunk) setLen() error {
 	default:
 		return fmt.Errorf("invalid number of value bytes in doubledelta chunk: %d", c.valueBytes())
 	}
-	*c = (*c)[:l]
+	(*c) = (*c)[:l]
 	return nil
 }
 
@@ -356,40 +379,39 @@ func (c doubleDeltaEncodedChunk) isInt() bool {
 
 // addFirstSample is a helper method only used by c.add(). It adds timestamp and
 // value as base time and value.
-func (c doubleDeltaEncodedChunk) addFirstSample(s model.SamplePair) []Chunk {
-	c = c[:doubleDeltaHeaderBaseValueOffset+8]
+func (c *doubleDeltaEncodedChunk) addFirstSample(s model.SamplePair) {
+	(*c) = (*c)[:doubleDeltaHeaderBaseValueOffset+8]
 	binary.LittleEndian.PutUint64(
-		c[doubleDeltaHeaderBaseTimeOffset:],
+		(*c)[doubleDeltaHeaderBaseTimeOffset:],
 		uint64(s.Timestamp),
 	)
 	binary.LittleEndian.PutUint64(
-		c[doubleDeltaHeaderBaseValueOffset:],
+		(*c)[doubleDeltaHeaderBaseValueOffset:],
 		math.Float64bits(float64(s.Value)),
 	)
-	return []Chunk{&c}
 }
 
 // addSecondSample is a helper method only used by c.add(). It calculates the
 // base delta from the provided sample and adds it to the chunk.
-func (c doubleDeltaEncodedChunk) addSecondSample(s model.SamplePair, tb, vb deltaBytes) ([]Chunk, error) {
+func (c *doubleDeltaEncodedChunk) addSecondSample(s model.SamplePair, tb, vb deltaBytes) error {
 	baseTimeDelta := s.Timestamp - c.baseTime()
 	if baseTimeDelta < 0 {
-		return nil, fmt.Errorf("base time delta is less than zero: %v", baseTimeDelta)
+		return fmt.Errorf("base time delta is less than zero: %v", baseTimeDelta)
 	}
-	c = c[:doubleDeltaHeaderBytes]
+	(*c) = (*c)[:doubleDeltaHeaderBytes]
 	if tb >= d8 || bytesNeededForUnsignedTimestampDelta(baseTimeDelta) >= d8 {
 		// If already the base delta needs d8 (or we are at d8
 		// already, anyway), we better encode this timestamp
 		// directly rather than as a delta and switch everything
 		// to d8.
-		c[doubleDeltaHeaderTimeBytesOffset] = byte(d8)
+		(*c)[doubleDeltaHeaderTimeBytesOffset] = byte(d8)
 		binary.LittleEndian.PutUint64(
-			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			(*c)[doubleDeltaHeaderBaseTimeDeltaOffset:],
 			uint64(s.Timestamp),
 		)
 	} else {
 		binary.LittleEndian.PutUint64(
-			c[doubleDeltaHeaderBaseTimeDeltaOffset:],
+			(*c)[doubleDeltaHeaderBaseTimeDeltaOffset:],
 			uint64(baseTimeDelta),
 		)
 	}
@@ -400,19 +422,19 @@ func (c doubleDeltaEncodedChunk) addSecondSample(s model.SamplePair, tb, vb delt
 		// if we are at d8 already, anyway), we better encode
 		// this value directly rather than as a delta and switch
 		// everything to d8.
-		c[doubleDeltaHeaderValueBytesOffset] = byte(d8)
-		c[doubleDeltaHeaderIsIntOffset] = 0
+		(*c)[doubleDeltaHeaderValueBytesOffset] = byte(d8)
+		(*c)[doubleDeltaHeaderIsIntOffset] = 0
 		binary.LittleEndian.PutUint64(
-			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			(*c)[doubleDeltaHeaderBaseValueDeltaOffset:],
 			math.Float64bits(float64(s.Value)),
 		)
 	} else {
 		binary.LittleEndian.PutUint64(
-			c[doubleDeltaHeaderBaseValueDeltaOffset:],
+			(*c)[doubleDeltaHeaderBaseValueDeltaOffset:],
 			math.Float64bits(float64(baseValueDelta)),
 		)
 	}
-	return []Chunk{&c}, nil
+	return nil
 }
 
 // doubleDeltaEncodedIndexAccessor implements indexAccessor.
diff --git a/encoding/factory.go b/encoding/factory.go
index 83ab9f2602d57..5ac314d9d0fde 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -1,6 +1,7 @@
 package encoding
 
 import (
+	"errors"
 	"flag"
 	"fmt"
 	"strconv"
@@ -26,6 +27,15 @@ func (Config) RegisterFlags(f *flag.FlagSet) {
 	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
 }
 
+// Validate errors out if the encoding is set to Delta.
+func (Config) Validate() error {
+	if DefaultEncoding == Delta {
+		// Delta is deprecated.
+		return errors.New("delta encoding is deprecated")
+	}
+	return nil
+}
+
 // String implements flag.Value.
 func (e Encoding) String() string {
 	if known, found := encodings[e]; found {
@@ -35,7 +45,8 @@ func (e Encoding) String() string {
 }
 
 const (
-	// Delta encoding
+	// Delta encoding is no longer supported and will be automatically changed to DoubleDelta.
+	// It still exists here to not change the `ingester.chunk-encoding` flag values.
 	Delta Encoding = iota
 	// DoubleDelta encoding
 	DoubleDelta
@@ -51,12 +62,6 @@ type encoding struct {
 }
 
 var encodings = map[Encoding]encoding{
-	Delta: {
-		Name: "Delta",
-		New: func() Chunk {
-			return newDeltaEncodedChunk(d1, d0, true, ChunkLen)
-		},
-	},
 	DoubleDelta: {
 		Name: "DoubleDelta",
 		New: func() Chunk {
diff --git a/encoding/varbit.go b/encoding/varbit.go
index c2663fe9e2473..2df8abc482716 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -260,17 +260,20 @@ func newVarbitChunk(enc varbitValueEncoding) *varbitChunk {
 }
 
 // Add implements chunk.
-func (c *varbitChunk) Add(s model.SamplePair) ([]Chunk, error) {
+func (c *varbitChunk) Add(s model.SamplePair) (Chunk, error) {
 	offset := c.nextSampleOffset()
 	switch {
 	case c.closed():
-		return addToOverflowChunk(c, s)
+		return addToOverflowChunk(s)
 	case offset > varbitNextSampleBitOffsetThreshold:
-		return c.addLastSample(s), nil
+		c.addLastSample(s)
+		return nil, nil
 	case offset == varbitFirstSampleBitOffset:
-		return c.addFirstSample(s), nil
+		c.addFirstSample(s)
+		return nil, nil
 	case offset == varbitSecondSampleBitOffset:
-		return c.addSecondSample(s)
+		err := c.addSecondSample(s)
+		return nil, err
 	}
 	return c.addLaterSample(s, offset)
 }
@@ -492,7 +495,7 @@ func (c varbitChunk) setLastSample(s model.SamplePair) {
 
 // addFirstSample is a helper method only used by c.add(). It adds timestamp and
 // value as base time and value.
-func (c *varbitChunk) addFirstSample(s model.SamplePair) []Chunk {
+func (c *varbitChunk) addFirstSample(s model.SamplePair) {
 	binary.BigEndian.PutUint64(
 		(*c)[varbitFirstTimeOffset:],
 		uint64(s.Timestamp),
@@ -503,21 +506,21 @@ func (c *varbitChunk) addFirstSample(s model.SamplePair) []Chunk {
 	)
 	c.setLastSample(s) // To simplify handling of single-sample chunks.
 	c.setNextSampleOffset(varbitSecondSampleBitOffset)
-	return []Chunk{c}
 }
 
 // addSecondSample is a helper method only used by c.add(). It calculates the
 // first time delta from the provided sample and adds it to the chunk together
 // with the provided sample as the last sample.
-func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
+func (c *varbitChunk) addSecondSample(s model.SamplePair) error {
 	firstTimeDelta := s.Timestamp - c.firstTime()
 	if firstTimeDelta < 0 {
-		return nil, fmt.Errorf("first Δt is less than zero: %v", firstTimeDelta)
+		return fmt.Errorf("first Δt is less than zero: %v", firstTimeDelta)
 	}
 	if firstTimeDelta > varbitMaxTimeDelta {
 		// A time delta too great. Still, we can add it as a last sample
 		// before overflowing.
-		return c.addLastSample(s), nil
+		c.addLastSample(s)
+		return nil
 	}
 	(*c)[varbitFirstTimeDeltaOffset] = byte(firstTimeDelta >> 16)
 	(*c)[varbitFirstTimeDeltaOffset+1] = byte(firstTimeDelta >> 8)
@@ -529,7 +532,7 @@ func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
 
 	c.setLastSample(s)
 	c.setNextSampleOffset(varbitThirdSampleBitOffset)
-	return []Chunk{c}, nil
+	return nil
 }
 
 // addLastSample is a helper method only used by c.add() and in other helper
@@ -538,15 +541,15 @@ func (c *varbitChunk) addSecondSample(s model.SamplePair) ([]Chunk, error) {
 // adds the very last sample added to this chunk ever, while setLastSample sets
 // the sample most recently added to the chunk so that it can be used for the
 // calculations required to add the next sample.
-func (c *varbitChunk) addLastSample(s model.SamplePair) []Chunk {
+func (c *varbitChunk) addLastSample(s model.SamplePair) {
 	c.setLastSample(s)
 	(*c)[varbitFlagOffset] |= 0x80
-	return []Chunk{c}
+	return
 }
 
 // addLaterSample is a helper method only used by c.add(). It adds a third or
 // later sample.
-func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk, error) {
+func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) (Chunk, error) {
 	var (
 		lastTime      = c.lastTime()
 		lastTimeDelta = c.lastTimeDelta()
@@ -564,39 +567,88 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 	if newTimeDelta > varbitMaxTimeDelta {
 		// A time delta too great. Still, we can add it as a last sample
 		// before overflowing.
-		return c.addLastSample(s), nil
+		c.addLastSample(s)
+		return nil, nil
 	}
 
 	// Analyze worst case, does it fit? If not, set new sample as the last.
 	if int(offset)+varbitWorstCaseBitsPerSample[encoding] > ChunkLen*8 {
-		return c.addLastSample(s), nil
+		c.addLastSample(s)
+		return nil, nil
 	}
 
 	// Transcoding/overflow decisions first.
 	if encoding == varbitZeroEncoding && s.Value != lastValue {
 		// Cannot go on with zero encoding.
-		if offset > ChunkLen*4 {
-			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s)
-		}
-		if isInt32(s.Value - lastValue) {
-			// Trying int encoding looks promising.
-			return transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
+		if offset <= ChunkLen*4 {
+			var result []Chunk
+			var err error
+			if isInt32(s.Value - lastValue) {
+				// Trying int encoding looks promising.
+				result, err = transcodeAndAdd(newVarbitChunk(varbitIntDoubleDeltaEncoding), c, s)
+			} else {
+				result, err = transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+			}
+			if err != nil {
+				return nil, err
+			}
+
+			// We cannot handle >2 chunks returned as we can only return 1 chunk.
+			// Ideally there wont be >2 chunks, but if it happens to be >2,
+			// we fall through to perfom `addToOverflowChunk` instead.
+			if len(result) == 1 {
+				// Replace the current chunk with the new bigger chunk.
+				c0 := result[0].(*varbitChunk)
+				*c = *c0
+				return nil, nil
+			} else if len(result) == 2 {
+				// Replace the current chunk with the new bigger chunk
+				// and return the additional chunk.
+				c0 := result[0].(*varbitChunk)
+				c1 := result[1].(*varbitChunk)
+				*c = *c0
+				return c1, nil
+			}
 		}
-		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		// We also perform this if `transcodeAndAdd` resulted in >2 chunks.
+		return addToOverflowChunk(s)
 	}
 	if encoding == varbitIntDoubleDeltaEncoding && !isInt32(s.Value-lastValue) {
 		// Cannot go on with int encoding.
-		if offset > ChunkLen*4 {
-			// Chunk already half full. Don't transcode, overflow instead.
-			return addToOverflowChunk(c, s)
+		if offset <= ChunkLen*4 {
+			result, err := transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+			if err != nil {
+				return nil, err
+			}
+			// We cannot handle >2 chunks returned as we can only return 1 chunk.
+			// Ideally there wont be >2 chunks, but if it happens to be >2,
+			// we fall through to perfom `addToOverflowChunk` instead.
+			if len(result) == 1 {
+				// Replace the current chunk with the new bigger chunk.
+				c0 := result[0].(*varbitChunk)
+				*c = *c0
+				return nil, nil
+			} else if len(result) == 2 {
+				// Replace the current chunk with the new bigger chunk
+				// and return the additional chunk.
+				c0 := result[0].(*varbitChunk)
+				c1 := result[1].(*varbitChunk)
+				*c = *c0
+				return c1, nil
+			}
 		}
-		return transcodeAndAdd(newVarbitChunk(varbitXOREncoding), c, s)
+
+		// Chunk is already half full. Better create a new one and save the transcoding efforts.
+		// We also perform this if `transcodeAndAdd` resulted in >2 chunks.
+		return addToOverflowChunk(s)
 	}
 
 	offset, overflow := c.addDDTime(offset, lastTimeDelta, newTimeDelta)
 	if overflow {
-		return c.addLastSample(s), nil
+		c.addLastSample(s)
+		return nil, nil
 	}
 	switch encoding {
 	case varbitZeroEncoding:
@@ -613,7 +665,7 @@ func (c *varbitChunk) addLaterSample(s model.SamplePair, offset uint16) ([]Chunk
 
 	c.setNextSampleOffset(offset)
 	c.setLastSample(s)
-	return []Chunk{c}, nil
+	return nil, nil
 }
 
 func (c varbitChunk) prepForThirdSample(
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 0a5f1cf9dbecb..667fcd1a3bac7 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -80,12 +80,13 @@ func dummyChunk(now model.Time) chunk.Chunk {
 }
 
 func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
-	cs, _ := promchunk.New().Add(model.SamplePair{Timestamp: now, Value: 0})
+	cs := promchunk.New()
+	cs.Add(model.SamplePair{Timestamp: now, Value: 0})
 	chunk := chunk.NewChunk(
 		userID,
 		client.Fingerprint(metric),
 		metric,
-		cs[0],
+		cs,
 		now.Add(-time.Hour),
 		now,
 	)

From 41ba1b4e276fd8775e6393511f9c9cb900a2cafb Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 22 Oct 2019 02:36:37 +0200
Subject: [PATCH 419/660] Removed unused code (#1740)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cache/redis_cache.go         | 11 -----------
 cassandra/storage_client.go  |  4 ----
 chunk_test.go                |  7 -------
 encoding/chunk.go            |  1 -
 gcp/bigtable_index_client.go |  1 -
 local/boltdb_index_client.go |  1 -
 testutils/testutils.go       |  8 --------
 7 files changed, 33 deletions(-)

diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 878033b89a944..43e14dba3fc89 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -141,14 +141,3 @@ func (c *RedisCache) ping(ctx context.Context) error {
 	}
 	return err
 }
-
-func redisStatusCode(err error) string {
-	switch err {
-	case nil:
-		return "200"
-	case redis.ErrNil:
-		return "404"
-	default:
-		return "500"
-	}
-}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 4a87f49022b00..1b139bd719fe0 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -14,10 +14,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
-const (
-	maxRowReads = 100
-)
-
 // Config for a StorageClient
 type Config struct {
 	Addresses                string        `yaml:"addresses,omitempty"`
diff --git a/chunk_test.go b/chunk_test.go
index c040ba31c3dcf..dfdde147cdff7 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -130,13 +130,6 @@ func TestChunkCodec(t *testing.T) {
 
 const fixedTimestamp = model.Time(1557654321000)
 
-func encodeForCompatibilityTest(t *testing.T) {
-	dummy := dummyChunkForEncoding(fixedTimestamp, labelsForDummyChunks, encoding.Bigchunk, 1)
-	encoded, err := dummy.Encoded()
-	require.NoError(t, err)
-	fmt.Printf("%q\n%q\n", dummy.ExternalKey(), encoded)
-}
-
 func TestChunkDecodeBackwardsCompatibility(t *testing.T) {
 	// Chunk encoded using code at commit b1777a50ab19
 	rawData := []byte("\x00\x00\x00\xb7\xff\x06\x00\x00sNaPpY\x01\xa5\x00\x00\x04\xc7a\xba{\"fingerprint\":18245339272195143978,\"userID\":\"userID\",\"from\":1557650721,\"through\":1557654321,\"metric\":{\"bar\":\"baz\",\"toms\":\"code\",\"__name__\":\"foo\"},\"encoding\":3}\n\x00\x00\x00\x15\x01\x00\x11\x00\x00\x01\xd0\xdd\xf5\xb6\xd5Z\x00\x00\x00\x00\x00\x00\x00\x00\x00")
diff --git a/encoding/chunk.go b/encoding/chunk.go
index c5963ee5c34fa..b31304714d1b7 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -31,7 +31,6 @@ const ChunkLen = 1024
 
 var (
 	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
-	errAddedToEvictedChunk = errors.New("attempted to add sample to evicted chunk")
 )
 
 // Chunk is the interface for all chunks. Chunks are generally not
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 14ddc1bd6cf37..d18a2515e0c3b 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -27,7 +27,6 @@ const (
 	column       = "c"
 	separator    = "\000"
 	maxRowReads  = 100
-	null         = string('\xff')
 )
 
 // Config for a StorageClient
diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index dcba167524495..9ac30985dbab3 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -22,7 +22,6 @@ var bucketName = []byte("index")
 
 const (
 	separator      = "\000"
-	null           = string('\xff')
 	dbReloadPeriod = 10 * time.Minute
 )
 
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 667fcd1a3bac7..d8365fa0d1d7a 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -71,14 +71,6 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 	return keys, chunks, nil
 }
 
-func dummyChunk(now model.Time) chunk.Chunk {
-	return dummyChunkFor(now, labels.Labels{
-		{Name: model.MetricNameLabel, Value: "foo"},
-		{Name: "bar", Value: "baz"},
-		{Name: "toms", Value: "code"},
-	})
-}
-
 func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
 	cs := promchunk.New()
 	cs.Add(model.SamplePair{Timestamp: now, Value: 0})

From 265f37d296be86ce211d349ca18bc92af69197d3 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <cs15btech11018@iith.ac.in>
Date: Thu, 24 Oct 2019 09:34:03 +0200
Subject: [PATCH 420/660] Remove redundant info in log

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 table_manager.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/table_manager.go b/table_manager.go
index ccdb146fad14e..d07a9f4a66f4d 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -217,7 +217,7 @@ func (m *TableManager) bucketRetentionLoop() {
 // not and update those that need it.  It is exposed for testing.
 func (m *TableManager) SyncTables(ctx context.Context) error {
 	expected := m.calculateExpectedTables()
-	level.Info(util.Logger).Log("msg", "synching tables", "num_expected_tables", len(expected), "expected_tables", len(expected))
+	level.Info(util.Logger).Log("msg", "synching tables", "expected_tables", len(expected))
 
 	toCreate, toCheckThroughput, toDelete, err := m.partitionTables(ctx, expected)
 	if err != nil {

From 351377d2a30ca28ad40e8733ac423062c2653d27 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 25 Oct 2019 15:09:27 -0400
Subject: [PATCH 421/660] Adds v11 schema (#1538)

* Adds v11 schema to store label names within index.

Stores only label names and not the entire metric. Storing entire metric
will bloat the index by 30% and it doesn't really make sense to do it
right now. Adding just label names adds a tolerable 7% to the index.

Also, in Prometheus, we don't treat __name__ as a special label. We
always return it when calling /labels API and we should do the same
here.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Finishing touches

fix lint issue

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

removes useless loop

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

This should be on v11 not v10.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

s/metricConstRangeKeyV1/labelNamesRangeKeyV1/

The code was first written to store the entire series, but now changed
to do just labelNames.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

Add note about v11 being experimental.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
Co-authored-by: cyriltovena <cyril.tovena@gmail.com>
Co-authored-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store_test.go        |   5 +-
 chunk_store_utils.go       |   8 +--
 composite_store.go         |   2 +-
 inmemory_storage_client.go |   1 +
 schema.go                  | 109 +++++++++++++++++++++++++++++++++++++
 schema_caching.go          |   8 +++
 schema_config.go           |  17 ++++--
 schema_test.go             |  55 +++++++++++++++++++
 series_store.go            |  60 +++++++++++++++++++-
 9 files changed, 250 insertions(+), 15 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 00e943bff2a6d..6951f80881c8c 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -37,6 +37,7 @@ var schemas = []struct {
 	{"v6", true},
 	{"v9", true},
 	{"v10", true},
+	{"v11", true},
 }
 
 var stores = []struct {
@@ -416,11 +417,11 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 	}{
 		{
 			`foo`,
-			[]string{"bar", "flip", "toms"},
+			[]string{labels.MetricName, "bar", "flip", "toms"},
 		},
 		{
 			`bar`,
-			[]string{"bar", "toms"},
+			[]string{labels.MetricName, "bar", "toms"},
 		},
 	} {
 		for _, schema := range schemas {
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 9a5ef76ea3742..1d83a0600b0b7 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -42,11 +42,9 @@ func labelNamesFromChunks(chunks []Chunk) []string {
 	var result []string
 	for _, c := range chunks {
 		for _, l := range c.Metric {
-			if l.Name != model.MetricNameLabel {
-				if _, ok := keys[string(l.Name)]; !ok {
-					keys[string(l.Name)] = struct{}{}
-					result = append(result, string(l.Name))
-				}
+			if _, ok := keys[string(l.Name)]; !ok {
+				keys[string(l.Name)] = struct{}{}
+				result = append(result, string(l.Name))
 			}
 		}
 	}
diff --git a/composite_store.go b/composite_store.go
index f22aad0705d54..3680dc67efdf1 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -55,7 +55,7 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index
 	var store Store
 	var err error
 	switch cfg.Schema {
-	case "v9", "v10":
+	case "v9", "v10", "v11":
 		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits)
 	default:
 		store, err = newStore(storeCfg, schema, index, chunks, limits)
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 56251a19f328f..80464e3a7ccd2 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -165,6 +165,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 			itemComponents := decodeRangeKey(items[i].rangeValue)
 			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) &&
 				!bytes.Equal(itemComponents[3], seriesRangeKeyV1) &&
+				!bytes.Equal(itemComponents[3], labelNamesRangeKeyV1) &&
 				!bytes.Equal(itemComponents[3], labelSeriesRangeKeyV1) {
 				return fmt.Errorf("Dupe write")
 			}
diff --git a/schema.go b/schema.go
index 8e2f5b8465dc3..778bbb3e4c86d 100644
--- a/schema.go
+++ b/schema.go
@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"strings"
 
+	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 )
@@ -22,6 +23,8 @@ var (
 	// For v9 schema
 	seriesRangeKeyV1      = []byte{'7'}
 	labelSeriesRangeKeyV1 = []byte{'8'}
+	// For v11 schema
+	labelNamesRangeKeyV1 = []byte{'9'}
 
 	// ErrNotSupported when a schema doesn't support that particular lookup.
 	ErrNotSupported = errors.New("not supported")
@@ -45,6 +48,8 @@ type Schema interface {
 
 	// If the query resulted in series IDs, use this method to find chunks.
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
+	// Returns queries to retrieve all label names of multiple series by id.
+	GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
 }
 
 // IndexQuery describes a query for entries
@@ -196,6 +201,20 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 	return result, nil
 }
 
+func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	var result []IndexQuery
+
+	buckets := s.buckets(from, through, userID)
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetLabelNamesForSeries(bucket, seriesID)
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+	return result, nil
+}
+
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 	GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
@@ -205,6 +224,7 @@ type entries interface {
 	GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error)
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
+	GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 }
 
 // original entries:
@@ -276,6 +296,10 @@ func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, err
 	return nil, ErrNotSupported
 }
 
+func (originalEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
 // v3Schema went to base64 encoded label values & a version ID
 // - range key: <label name>\0<base64(label value)>\0<chunk name>\0<version 1>
 
@@ -391,6 +415,10 @@ func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]Index
 	return nil, ErrNotSupported
 }
 
+func (labelNameInHashKeyEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
 // v5 schema is an extension of v4, with the chunk end time in the
 // range key to improve query latency.  However, it did it wrong
 // so the chunk end times are ignored.
@@ -461,6 +489,10 @@ func (v5Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 	return nil, ErrNotSupported
 }
 
+func (v5Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
 // v6Entries fixes issues with v5 time encoding being wrong (see #337), and
 // moves label value out of range key (see #199).
 type v6Entries struct{}
@@ -537,6 +569,10 @@ func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
 	return nil, ErrNotSupported
 }
 
+func (v6Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
 // v9Entries adds a layer of indirection between labels -> series -> chunks.
 type v9Entries struct {
 }
@@ -632,6 +668,10 @@ func (v9Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuer
 	}, nil
 }
 
+func (v9Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
 // v10Entries builds on v9 by sharding index rows to reduce their size.
 type v10Entries struct {
 	rowShards uint32
@@ -736,3 +776,72 @@ func (v10Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQue
 		},
 	}, nil
 }
+
+func (v10Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
+	return nil, ErrNotSupported
+}
+
+// v11Entries builds on v10 but adds index entries for each series to store respective labels.
+type v11Entries struct {
+	v10Entries
+}
+
+func (s v11Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
+	seriesID := labelsSeriesID(labels)
+
+	// read first 32 bits of the hash and use this to calculate the shard
+	shard := binary.BigEndian.Uint32(seriesID) % s.rowShards
+
+	labelNames := make([]string, 0, len(labels))
+	for _, l := range labels {
+		if l.Name == model.MetricNameLabel {
+			continue
+		}
+		labelNames = append(labelNames, l.Name)
+	}
+	data, err := jsoniter.ConfigFastest.Marshal(labelNames)
+	if err != nil {
+		return nil, err
+	}
+	entries := []IndexEntry{
+		// Entry for metricName -> seriesID
+		{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
+			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+		},
+		// Entry for seriesID -> label names
+		{
+			TableName:  bucket.tableName,
+			HashValue:  string(seriesID),
+			RangeValue: encodeRangeKey(nil, nil, nil, labelNamesRangeKeyV1),
+			Value:      data,
+		},
+	}
+
+	// Entries for metricName:labelName -> hash(value):seriesID
+	// We use a hash of the value to limit its length.
+	for _, v := range labels {
+		if v.Name == model.MetricNameLabel {
+			continue
+		}
+		valueHash := sha256bytes(v.Value)
+		entries = append(entries, IndexEntry{
+			TableName:  bucket.tableName,
+			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, v.Name),
+			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			Value:      []byte(v.Value),
+		})
+	}
+
+	return entries, nil
+}
+
+func (v11Entries) GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error) {
+	return []IndexQuery{
+		{
+			TableName: bucket.tableName,
+			HashValue: string(seriesID),
+		},
+	}, nil
+}
diff --git a/schema_caching.go b/schema_caching.go
index 6db7105d93d1a..73c8986269310 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -46,6 +46,14 @@ func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID stri
 	return s.setImmutability(from, through, queries), nil
 }
 
+func (s *schemaCaching) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	queries, err := s.Schema.GetLabelNamesForSeries(from, through, userID, seriesID)
+	if err != nil {
+		return nil, err
+	}
+	return s.setImmutability(from, through, queries), nil
+}
+
 func (s *schemaCaching) setImmutability(from, through model.Time, queries []IndexQuery) []IndexQuery {
 	cacheBefore := model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())
 
diff --git a/schema_config.go b/schema_config.go
index 1b630a615c809..4e3e768339553 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -219,8 +219,12 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() Schema {
-	var e entries
+	rowShards := uint32(16)
+	if cfg.RowShards > 0 {
+		rowShards = cfg.RowShards
+	}
 
+	var e entries
 	switch cfg.Schema {
 	case "v1":
 		e = originalEntries{}
@@ -237,14 +241,15 @@ func (cfg PeriodConfig) CreateSchema() Schema {
 	case "v9":
 		e = v9Entries{}
 	case "v10":
-		rowShards := uint32(16)
-		if cfg.RowShards > 0 {
-			rowShards = cfg.RowShards
-		}
-
 		e = v10Entries{
 			rowShards: rowShards,
 		}
+	case "v11":
+		e = v11Entries{
+			v10Entries: v10Entries{
+				rowShards: rowShards,
+			},
+		}
 	default:
 		return nil
 	}
diff --git a/schema_test.go b/schema_test.go
index 18eae083ba112..95cbb1c4fc251 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -8,8 +8,10 @@ import (
 	"sort"
 	"testing"
 
+	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 )
@@ -333,3 +335,56 @@ func TestSchemaRangeKey(t *testing.T) {
 		})
 	}
 }
+
+func BenchmarkEncodeLabelsJson(b *testing.B) {
+	decoded := &labels.Labels{}
+	lbs := labels.FromMap(map[string]string{
+		"foo":      "bar",
+		"fuzz":     "buzz",
+		"cluster":  "test",
+		"test":     "test1",
+		"instance": "cortex-01",
+		"bar":      "foo",
+		"version":  "0.1",
+	})
+	json := jsoniter.ConfigFastest
+	var data []byte
+	var err error
+	for n := 0; n < b.N; n++ {
+		data, err = json.Marshal(lbs)
+		if err != nil {
+			panic(err)
+		}
+		err = json.Unmarshal(data, decoded)
+		if err != nil {
+			panic(err)
+		}
+	}
+	b.Log("data size", len(data))
+	b.Log("decode", decoded)
+}
+
+func BenchmarkEncodeLabelsString(b *testing.B) {
+	var decoded labels.Labels
+	lbs := labels.FromMap(map[string]string{
+		"foo":      "bar",
+		"fuzz":     "buzz",
+		"cluster":  "test",
+		"test":     "test1",
+		"instance": "cortex-01",
+		"bar":      "foo",
+		"version":  "0.1",
+	})
+	var data []byte
+	var err error
+	for n := 0; n < b.N; n++ {
+		data = []byte(lbs.String())
+		decoded, err = promql.ParseMetric(string(data))
+		if err != nil {
+			panic(err)
+		}
+	}
+	b.Log("data size", len(data))
+	b.Log("decode", decoded)
+
+}
diff --git a/series_store.go b/series_store.go
index 1bfda046f162d..bf1a9ae81c748 100644
--- a/series_store.go
+++ b/series_store.go
@@ -4,8 +4,10 @@ import (
 	"context"
 	"fmt"
 	"net/http"
+	"sort"
 
 	"github.com/go-kit/kit/log/level"
+	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -199,6 +201,25 @@ func (c *seriesStore) LabelNamesForMetricName(ctx context.Context, userID string
 	}
 	level.Debug(log).Log("series-ids", len(seriesIDs))
 
+	// Lookup the series in the index to get label names.
+	labelNames, err := c.lookupLabelNamesBySeries(ctx, from, through, userID, seriesIDs)
+	if err != nil {
+		// looking up metrics by series is not supported falling back on chunks
+		if err == ErrNotSupported {
+			return c.lookupLabelNamesByChunks(ctx, from, through, userID, seriesIDs)
+		}
+		level.Error(log).Log("msg", "lookupLabelNamesBySeries", "err", err)
+		return nil, err
+	}
+	level.Debug(log).Log("labelNames", len(labelNames))
+
+	return labelNames, nil
+}
+
+func (c *seriesStore) lookupLabelNamesByChunks(ctx context.Context, from, through model.Time, userID string, seriesIDs []string) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupLabelNamesByChunks")
+	defer log.Span.Finish()
+
 	// Lookup the series in the index to get the chunks.
 	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, userID, seriesIDs)
 	if err != nil {
@@ -228,7 +249,6 @@ func (c *seriesStore) LabelNamesForMetricName(ctx context.Context, userID string
 	}
 	return labelNamesFromChunks(allChunks), nil
 }
-
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, userID, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
@@ -367,6 +387,44 @@ func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through mo
 	return result, err
 }
 
+func (c *seriesStore) lookupLabelNamesBySeries(ctx context.Context, from, through model.Time, userID string, seriesIDs []string) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupLabelNamesBySeries")
+	defer log.Span.Finish()
+
+	level.Debug(log).Log("seriesIDs", len(seriesIDs))
+	queries := make([]IndexQuery, 0, len(seriesIDs))
+	for _, seriesID := range seriesIDs {
+		qs, err := c.schema.GetLabelNamesForSeries(from, through, userID, []byte(seriesID))
+		if err != nil {
+			return nil, err
+		}
+		queries = append(queries, qs...)
+	}
+	level.Debug(log).Log("queries", len(queries))
+	entries, err := c.lookupEntriesByQueries(ctx, queries)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("entries", len(entries))
+	result := []string{model.MetricNameLabel}
+	uniqueLabelNames := map[string]struct{}{model.MetricNameLabel: {}}
+	for _, entry := range entries {
+		lbs := []string{}
+		err := jsoniter.ConfigFastest.Unmarshal(entry.Value, &lbs)
+		if err != nil {
+			return nil, err
+		}
+		for _, l := range lbs {
+			if _, ok := uniqueLabelNames[l]; !ok {
+				uniqueLabelNames[l] = struct{}{}
+				result = append(result, l)
+			}
+		}
+	}
+	sort.Strings(result)
+	return result, nil
+}
+
 // Put implements ChunkStore
 func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 	for _, chunk := range chunks {

From 7b2510c1a3bd8e324d657bdda048651db1273a27 Mon Sep 17 00:00:00 2001
From: Thor <8681572+thorfour@users.noreply.github.com>
Date: Wed, 30 Oct 2019 11:53:58 -0500
Subject: [PATCH 422/660] Feat/blocks (#1695)

* ingester: v2 add tsdb block storage per user

Signed-off-by: Thor <thansen@digitalocean.com>

* ingester: transfer tsdb on shutdown

Signed-off-by: Thor <thansen@digitalocean.com>

* querier: query s3 tsdb blocks

Signed-off-by: Thor <thansen@digitalocean.com>

* vendor

Signed-off-by: Thor <thansen@digitalocean.com>

* ingester.v2.enable

Signed-off-by: Thor <thansen@digitalocean.com>

* split v2 into separate file

Signed-off-by: Thor <thansen@digitalocean.com>

* log error

Signed-off-by: Thor <thansen@digitalocean.com>

* check for creation of db in-between locks

Signed-off-by: Thor <thansen@digitalocean.com>

* anchor regexp

Signed-off-by: Thor <thansen@digitalocean.com>

* configurable ship interval

Signed-off-by: Thor <thansen@digitalocean.com>

* fixed transfer_test flake

Signed-off-by: Thor <thansen@digitalocean.com>

* skip store init when v2 enabled

Signed-off-by: Thor <thansen@digitalocean.com>

* s3insecure flag

Signed-off-by: Thor <thansen@digitalocean.com>

* ignore eof errors on block sync

Signed-off-by: Thor <thansen@digitalocean.com>

* moved transfer function to common wrapper

Signed-off-by: Thor <thansen@digitalocean.com>

* don't stop stores if nil

Signed-off-by: Thor <thansen@digitalocean.com>

* use TSDB base dir for querier directories

Signed-off-by: Thor <thansen@digitalocean.com>

* ignore table manager in v2

Signed-off-by: Thor <thansen@digitalocean.com>

* Refactored TSDB config file structure and CLI flags

Signed-off-by: Thor <thansen@digitalocean.com>

* fixed lint errors, mark flags as experimental

Signed-off-by: Thor <thansen@digitalocean.com>

* use separate directory for syncing block data

Signed-off-by: Thor <thansen@digitalocean.com>

* fixed unit test with new config changes

Signed-off-by: Thor <thansen@digitalocean.com>

* sync-dir -> sync_dir

Signed-off-by: Thor <thansen@digitalocean.com>

* validate configs

Signed-off-by: Thor <thansen@digitalocean.com>

* fixed rebase overflow changes

Signed-off-by: Thor <thansen@digitalocean.com>
---
 storage/factory.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/storage/factory.go b/storage/factory.go
index cce20b93e1db9..bb106675bbf3a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -18,6 +18,12 @@ import (
 	"github.com/pkg/errors"
 )
 
+// Supported storage engines
+const (
+	StorageEngineChunks = "chunks"
+	StorageEngineTSDB   = "tsdb"
+)
+
 // StoreLimits helps get Limits specific to Queries for Stores
 type StoreLimits interface {
 	CardinalityLimit(userID string) int
@@ -27,6 +33,7 @@ type StoreLimits interface {
 
 // Config chooses which storage client to use.
 type Config struct {
+	Engine                 string             `yaml:"engine"`
 	AWSStorageConfig       aws.StorageConfig  `yaml:"aws"`
 	GCPStorageConfig       gcp.Config         `yaml:"bigtable"`
 	GCSConfig              gcp.GCSConfig      `yaml:"gcs"`
@@ -48,10 +55,20 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.BoltDBConfig.RegisterFlags(f)
 	cfg.FSConfig.RegisterFlags(f)
 
+	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or tsdb. Be aware tsdb is experimental and shouldn't be used in production.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }
 
+// Validate config and returns error on failure
+func (cfg *Config) Validate() error {
+	if cfg.Engine != StorageEngineChunks && cfg.Engine != StorageEngineTSDB {
+		return errors.New("unsupported storage engine")
+	}
+
+	return nil
+}
+
 // NewStore makes the storage clients based on the configuration.
 func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits) (chunk.Store, error) {
 	tieredCache, err := cache.New(cfg.IndexQueriesCacheConfig)

From 1f078deccdb8a0f9ce68a37645ade423f4fdf941 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 4 Nov 2019 09:00:55 +0000
Subject: [PATCH 423/660] Fix chunk reading from ancient v3 schema

Empirically, from data stored at Weaveworks, the code used a different
version marker and a different size calculation.

Allow that data to be read without affecting chunks written more recently.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go       | 4 +++-
 schema.go      | 1 +
 schema_util.go | 3 +++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/chunk.go b/chunk.go
index 299f85a646654..059db0dd31d87 100644
--- a/chunk.go
+++ b/chunk.go
@@ -288,7 +288,9 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	if err != nil {
 		return errors.Wrap(err, "when decoding chunk metadata")
 	}
-	if len(input)-r.Len() != int(metadataLen) {
+	metadataRead := len(input) - r.Len()
+	// Older versions of Cortex included the initial length word; newer versions do not.
+	if !(metadataRead == int(metadataLen) || metadataRead == int(metadataLen)+4) {
 		return ErrMetadataLength
 	}
 
diff --git a/schema.go b/schema.go
index 778bbb3e4c86d..ce69abf8ed79d 100644
--- a/schema.go
+++ b/schema.go
@@ -13,6 +13,7 @@ import (
 )
 
 var (
+	chunkTimeRangeKeyV1a = []byte{1}
 	chunkTimeRangeKeyV1  = []byte{'1'}
 	chunkTimeRangeKeyV2  = []byte{'2'}
 	chunkTimeRangeKeyV3  = []byte{'3'}
diff --git a/schema_util.go b/schema_util.go
index 107bccbddc8e5..3b722c2c538f4 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -187,6 +187,9 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
 
 	// v3 schema had four components - label name, label value, chunk ID and version.
 	// "version" is 1 and label value is base64 encoded.
+	// (older code wrote "version" as 1, not '1')
+	case bytes.Equal(components[3], chunkTimeRangeKeyV1a):
+		fallthrough
 	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
 		chunkID = string(components[2])
 		labelValue, err = decodeBase64Value(components[1])

From d5ac02f7cab0ebb65d57e7d0841b8ea0516efa4c Mon Sep 17 00:00:00 2001
From: Adam Johnson <a.johnson@winton.com>
Date: Sat, 9 Nov 2019 14:52:02 +0000
Subject: [PATCH 424/660] Use session.NewSession in order to use
 AssumeRoleWithWebIdentity

This is required to support IAM roles for service accounts in AWS EKS

Signed-off-by: Adam Johnson <a.johnson@winton.com>
---
 aws/dynamodb_storage_client.go | 2 +-
 aws/s3_storage_client.go       | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index a81ef7323bca7..855700a7fece9 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -861,5 +861,5 @@ func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 		return nil, err
 	}
 	config = config.WithMaxRetries(0) // We do our own retries, so we can monitor them
-	return session.New(config), nil
+	return session.NewSession(config)
 }
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 7b05128e4a18a..11c831492e939 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -51,7 +51,11 @@ func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.O
 	s3Config = s3Config.WithS3ForcePathStyle(cfg.S3ForcePathStyle) // support for Path Style S3 url if has the flag
 
 	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
-	s3Client := s3.New(session.New(s3Config))
+	sess, err := session.NewSession(s3Config)
+	if err != nil {
+		return nil, err
+	}
+	s3Client := s3.New(sess)
 	bucketNames := []string{strings.TrimPrefix(cfg.S3.URL.Path, "/")}
 	if cfg.BucketNames != "" {
 		bucketNames = strings.Split(cfg.BucketNames, ",") // comma separated list of bucket names

From 4bed557412f56f59cda8bd7be306ae3a242e30f9 Mon Sep 17 00:00:00 2001
From: Roger Steneteg <36709673+rsteneteg@users.noreply.github.com>
Date: Sun, 10 Nov 2019 08:10:45 -0600
Subject: [PATCH 425/660] removed dynamo option, to make storage client names
 consistent for all components (#1728)

Signed-off-by: Roger Steneteg <rsteneteg@ea.com>
---
 storage/factory.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index bb106675bbf3a..0d70068e23dd7 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -117,7 +117,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	case "inmemory":
 		store := chunk.NewMockStorage()
 		return store, nil
-	case "aws", "aws-dynamo", "dynamo":
+	case "aws", "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -150,7 +150,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		return store, nil
 	case "aws", "s3":
 		return aws.NewS3ObjectClient(cfg.AWSStorageConfig, schemaCfg)
-	case "aws-dynamo", "dynamo":
+	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}

From 213b8d1e5cf8684db5f29dd3633a118784ba2c85 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 11 Nov 2019 13:16:21 +0100
Subject: [PATCH 426/660] Improve reporting of some chunk errors (#1789)

%q is better than '%v' because it will show unprintable characters as escape sequences.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 chunk.go       | 2 +-
 schema_test.go | 2 +-
 schema_util.go | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/chunk.go b/chunk.go
index 059db0dd31d87..546e70e2d3e4e 100644
--- a/chunk.go
+++ b/chunk.go
@@ -291,7 +291,7 @@ func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
 	metadataRead := len(input) - r.Len()
 	// Older versions of Cortex included the initial length word; newer versions do not.
 	if !(metadataRead == int(metadataLen) || metadataRead == int(metadataLen)+4) {
-		return ErrMetadataLength
+		return errors.Wrapf(ErrMetadataLength, "expected %d, got %d", metadataLen, metadataRead)
 	}
 
 	// Next, confirm the chunks matches what we expected.  Easiest way to do this
diff --git a/schema_test.go b/schema_test.go
index 95cbb1c4fc251..41c4b43379c95 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -171,7 +171,7 @@ func parseRangeValueType(rangeValue []byte) (int, error) {
 		return SeriesRangeValue, nil
 
 	default:
-		return 0, fmt.Errorf("unrecognised range value type. version: '%v'", string(components[3]))
+		return 0, fmt.Errorf("unrecognised range value type. version: %q", string(components[3]))
 	}
 }
 
diff --git a/schema_util.go b/schema_util.go
index 3b722c2c538f4..46765ad93e722 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -139,7 +139,7 @@ func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValu
 		return model.LabelValue(value), nil
 
 	default:
-		return "", fmt.Errorf("unrecognised metricNameRangeKey version: '%v'", string(components[3]))
+		return "", fmt.Errorf("unrecognised metricNameRangeKey version: %q", string(components[3]))
 	}
 }
 
@@ -160,7 +160,7 @@ func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error
 		return series, nil
 
 	default:
-		return nil, fmt.Errorf("unrecognised seriesRangeKey version: '%v'", string(components[3]))
+		return nil, fmt.Errorf("unrecognised seriesRangeKey version: %q", string(components[3]))
 	}
 }
 
@@ -232,7 +232,7 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
 		return
 
 	default:
-		err = fmt.Errorf("unrecognised chunkTimeRangeKey version: '%v'", string(components[3]))
+		err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3]))
 		return
 	}
 }

From 3c7f46b9999ab3409546cb15235fba4ed6b93cdd Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Thu, 14 Nov 2019 17:03:22 +0100
Subject: [PATCH 427/660] Fixes store duplicate label names and values. (#1790)

* Fixes store label name and values duplicate.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Normalize string unicity control

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Adds benchmark to compare the new unique algorithm

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go          | 10 ++-----
 chunk_store_test.go     | 46 +++++++++++++++++++++++++++++++
 chunk_store_utils.go    | 12 ++------
 composite_store.go      | 12 ++++----
 composite_store_test.go | 61 +++++++++++++++++++++++++++++++++++++++++
 series_store.go         | 16 ++++-------
 strings.go              | 28 +++++++++++++++++++
 7 files changed, 152 insertions(+), 33 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 7c8ba806c69b7..326cefe8862cf 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -204,17 +204,15 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, userID string, fro
 		return nil, err
 	}
 
-	var result []string
+	var result UniqueStrings
 	for _, entry := range entries {
 		_, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
-		result = append(result, string(labelValue))
+		result.Add(string(labelValue))
 	}
-	sort.Strings(result)
-	result = uniqueStrings(result)
-	return result, nil
+	return result.Strings(), nil
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
@@ -462,7 +460,6 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
 	result := make([]string, 0, len(entries))
-
 	for _, entry := range entries {
 		chunkKey, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
@@ -474,7 +471,6 @@ func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
 		}
 		result = append(result, chunkKey)
 	}
-
 	// Return ids sorted and deduped because they will be merged with other sets.
 	sort.Strings(result)
 	result = uniqueStrings(result)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 6951f80881c8c..80632775a26d0 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -851,3 +851,49 @@ func TestStoreMaxLookBack(t *testing.T) {
 	require.Equal(t, 1, len(chunks))
 	chunks[0].Through.Equal(now)
 }
+
+func benchmarkParseIndexEntries(i int64, b *testing.B) {
+	b.ReportAllocs()
+	b.StopTimer()
+	store := &store{}
+	ctx := context.Background()
+	entries := generateIndexEntries(i)
+	matcher, err := labels.NewMatcher(labels.MatchRegexp, "", ".*")
+	if err != nil {
+		b.Fatal(err)
+	}
+	b.StartTimer()
+	for n := 0; n < b.N; n++ {
+		keys, err := store.parseIndexEntries(ctx, entries, matcher)
+		if err != nil {
+			b.Fatal(err)
+		}
+		if len(keys) != len(entries)/2 {
+			b.Fatalf("expected keys:%d got:%d", len(entries)/2, len(keys))
+		}
+	}
+}
+
+func BenchmarkParseIndexEntries500(b *testing.B)   { benchmarkParseIndexEntries(500, b) }
+func BenchmarkParseIndexEntries2500(b *testing.B)  { benchmarkParseIndexEntries(2500, b) }
+func BenchmarkParseIndexEntries10000(b *testing.B) { benchmarkParseIndexEntries(10000, b) }
+func BenchmarkParseIndexEntries50000(b *testing.B) { benchmarkParseIndexEntries(50000, b) }
+
+func generateIndexEntries(n int64) []IndexEntry {
+	res := make([]IndexEntry, 0, n)
+	for i := int64(n - 1); i >= 0; i-- {
+		labelValue := fmt.Sprintf("labelvalue%d", i%(n/2))
+		chunkID := fmt.Sprintf("chunkid%d", i%(n/2))
+		rangeValue := []byte{}
+		rangeValue = append(rangeValue, []byte("component1")...)
+		rangeValue = append(rangeValue, 0)
+		rangeValue = append(rangeValue, []byte(labelValue)...)
+		rangeValue = append(rangeValue, 0)
+		rangeValue = append(rangeValue, []byte(chunkID)...)
+		rangeValue = append(rangeValue, 0)
+		res = append(res, IndexEntry{
+			RangeValue: rangeValue,
+		})
+	}
+	return res
+}
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 1d83a0600b0b7..856af86be1819 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -2,7 +2,6 @@ package chunk
 
 import (
 	"context"
-	"sort"
 	"sync"
 
 	"github.com/go-kit/kit/log/level"
@@ -38,18 +37,13 @@ func keysFromChunks(chunks []Chunk) []string {
 }
 
 func labelNamesFromChunks(chunks []Chunk) []string {
-	keys := map[string]struct{}{}
-	var result []string
+	var result UniqueStrings
 	for _, c := range chunks {
 		for _, l := range c.Metric {
-			if _, ok := keys[string(l.Name)]; !ok {
-				keys[string(l.Name)] = struct{}{}
-				result = append(result, string(l.Name))
-			}
+			result.Add(string(l.Name))
 		}
 	}
-	sort.Strings(result)
-	return result
+	return result.Strings()
 }
 
 func filterChunksByUniqueFingerprint(chunks []Chunk) ([]Chunk, []string) {
diff --git a/composite_store.go b/composite_store.go
index 3680dc67efdf1..f0f52b6d70c81 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -100,30 +100,30 @@ func (c compositeStore) Get(ctx context.Context, userID string, from, through mo
 
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
 func (c compositeStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
-	var result []string
+	var result UniqueStrings
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
 		labelValues, err := store.LabelValuesForMetricName(ctx, userID, from, through, metricName, labelName)
 		if err != nil {
 			return err
 		}
-		result = append(result, labelValues...)
+		result.Add(labelValues...)
 		return nil
 	})
-	return result, err
+	return result.Strings(), err
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
 func (c compositeStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
-	var result []string
+	var result UniqueStrings
 	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
 		labelNames, err := store.LabelNamesForMetricName(ctx, userID, from, through, metricName)
 		if err != nil {
 			return err
 		}
-		result = append(result, labelNames...)
+		result.Add(labelNames...)
 		return nil
 	})
-	return result, err
+	return result.Strings(), err
 }
 
 func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
diff --git a/composite_store_test.go b/composite_store_test.go
index 9169ccef835b7..f0e1344b53cbc 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -180,3 +180,64 @@ func TestCompositeStore(t *testing.T) {
 		})
 	}
 }
+
+type mockStoreLabel struct {
+	mockStore
+	values []string
+}
+
+func (m mockStoreLabel) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
+	return m.values, nil
+}
+
+func (m mockStoreLabel) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
+	return m.values, nil
+}
+
+func TestCompositeStoreLabels(t *testing.T) {
+	t.Parallel()
+
+	cs := compositeStore{
+		stores: []compositeStoreEntry{
+			{model.TimeFromUnix(0), mockStore(1)},
+			{model.TimeFromUnix(20), mockStoreLabel{mockStore(1), []string{"b", "c", "e"}}},
+			{model.TimeFromUnix(40), mockStoreLabel{mockStore(1), []string{"a", "b", "c", "f"}}},
+		},
+	}
+
+	for i, tc := range []struct {
+		from, through int64
+		want          []string
+	}{
+		{
+			0, 10,
+			nil,
+		},
+		{
+			0, 30,
+			[]string{"b", "c", "e"},
+		},
+		{
+			0, 40,
+			[]string{"a", "b", "c", "e", "f"},
+		},
+	} {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			have, err := cs.LabelNamesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "")
+			if err != nil {
+				t.Fatalf("err - %s", err)
+			}
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong label names - %s", test.Diff(tc.want, have))
+			}
+			have, err = cs.LabelValuesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "", "")
+			if err != nil {
+				t.Fatalf("err - %s", err)
+			}
+			if !reflect.DeepEqual(tc.want, have) {
+				t.Fatalf("wrong label values - %s", test.Diff(tc.want, have))
+			}
+		})
+	}
+
+}
diff --git a/series_store.go b/series_store.go
index bf1a9ae81c748..09d3c2beb0aed 100644
--- a/series_store.go
+++ b/series_store.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"net/http"
-	"sort"
 
 	"github.com/go-kit/kit/log/level"
 	jsoniter "github.com/json-iterator/go"
@@ -406,23 +405,18 @@ func (c *seriesStore) lookupLabelNamesBySeries(ctx context.Context, from, throug
 		return nil, err
 	}
 	level.Debug(log).Log("entries", len(entries))
-	result := []string{model.MetricNameLabel}
-	uniqueLabelNames := map[string]struct{}{model.MetricNameLabel: {}}
+
+	var result UniqueStrings
+	result.Add(model.MetricNameLabel)
 	for _, entry := range entries {
 		lbs := []string{}
 		err := jsoniter.ConfigFastest.Unmarshal(entry.Value, &lbs)
 		if err != nil {
 			return nil, err
 		}
-		for _, l := range lbs {
-			if _, ok := uniqueLabelNames[l]; !ok {
-				uniqueLabelNames[l] = struct{}{}
-				result = append(result, l)
-			}
-		}
+		result.Add(lbs...)
 	}
-	sort.Strings(result)
-	return result, nil
+	return result.Strings(), nil
 }
 
 // Put implements ChunkStore
diff --git a/strings.go b/strings.go
index 0d91e640d2daa..1db675160212b 100644
--- a/strings.go
+++ b/strings.go
@@ -1,5 +1,7 @@
 package chunk
 
+import "sort"
+
 func uniqueStrings(cs []string) []string {
 	if len(cs) == 0 {
 		return []string{}
@@ -57,3 +59,29 @@ func nWayIntersectStrings(sets [][]string) []string {
 		return intersectStrings(left, right)
 	}
 }
+
+// UniqueStrings keeps a slice of unique strings.
+type UniqueStrings struct {
+	values map[string]struct{}
+	result []string
+}
+
+// Add adds a new string, dropping duplicates.
+func (us *UniqueStrings) Add(strings ...string) {
+	for _, s := range strings {
+		if _, ok := us.values[s]; ok {
+			continue
+		}
+		if us.values == nil {
+			us.values = map[string]struct{}{}
+		}
+		us.values[s] = struct{}{}
+		us.result = append(us.result, s)
+	}
+}
+
+// Strings returns the sorted sliced of unique strings.
+func (us UniqueStrings) Strings() []string {
+	sort.Strings(us.result)
+	return us.result
+}

From 60d4dfdb3d2bb0bbd172efb9cf7b528c30674887 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 2 Dec 2019 12:45:28 +0100
Subject: [PATCH 428/660] Lower log level for query time range validation
 (#1860)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 chunk_store.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 326cefe8862cf..ffe19cc0bcec2 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -265,7 +265,7 @@ func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from
 
 	if from.After(now) {
 		// time-span start is in future ... regard as legal
-		level.Error(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
+		level.Info(log).Log("msg", "whole timerange in future, yield empty resultset", "through", through, "from", from, "now", now)
 		return true, nil
 	}
 
@@ -283,7 +283,7 @@ func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from
 
 	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
-		level.Error(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
+		level.Info(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
 		*through = now // Avoid processing future part - otherwise some schemas could fail with eg non-existent table gripes
 	}
 

From 41684a564d849dc74ccb95b93da0a7aec0fa2507 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 3 Dec 2019 23:40:51 +0100
Subject: [PATCH 429/660] Removed chunk.metadataInIndex because unused (#1836)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 chunk.go            | 15 ---------------
 chunk_store.go      |  4 ++--
 schema_test.go      |  2 +-
 schema_util.go      |  8 +++-----
 schema_util_test.go |  2 +-
 5 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/chunk.go b/chunk.go
index 546e70e2d3e4e..10642d9124eab 100644
--- a/chunk.go
+++ b/chunk.go
@@ -57,10 +57,6 @@ type Chunk struct {
 	Encoding prom_chunk.Encoding `json:"encoding"`
 	Data     prom_chunk.Chunk    `json:"-"`
 
-	// This flag is used for very old chunks, where the metadata is read out
-	// of the index.
-	metadataInIndex bool
-
 	// The encoded version of the chunk, held so we don't need to re-encode it
 	encoded []byte
 }
@@ -258,17 +254,6 @@ func NewDecodeContext() *DecodeContext {
 // Decode the chunk from the given buffer, and confirm the chunk is the one we
 // expected.
 func (c *Chunk) Decode(decodeContext *DecodeContext, input []byte) error {
-	// Legacy chunks were written with metadata in the index.
-	if c.metadataInIndex {
-		var err error
-		c.Data, err = prom_chunk.NewForEncoding(prom_chunk.DoubleDelta)
-		if err != nil {
-			return err
-		}
-		c.encoded = input
-		return errors.Wrap(c.Data.UnmarshalFromBuf(input), "when unmarshalling legacy chunk")
-	}
-
 	// First, calculate the checksum of the chunk and confirm it matches
 	// what we expected.
 	if c.ChecksumSet && c.Checksum != crc32.Checksum(input, castagnoliTable) {
diff --git a/chunk_store.go b/chunk_store.go
index ffe19cc0bcec2..da7e6eec6042c 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -206,7 +206,7 @@ func (c *store) LabelValuesForMetricName(ctx context.Context, userID string, fro
 
 	var result UniqueStrings
 	for _, entry := range entries {
-		_, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		_, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
@@ -461,7 +461,7 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
-		chunkKey, labelValue, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
diff --git a/schema_test.go b/schema_test.go
index 41c4b43379c95..1749596c8313e 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -325,7 +325,7 @@ func TestSchemaRangeKey(t *testing.T) {
 					_, err := parseMetricNameRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case ChunkTimeRangeValue:
-					_, _, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+					_, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case SeriesRangeValue:
 					_, err := parseSeriesRangeValue(entry.RangeValue, entry.Value)
diff --git a/schema_util.go b/schema_util.go
index 46765ad93e722..e507e7c02e9fd 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -164,11 +164,10 @@ func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error
 	}
 }
 
-// parseChunkTimeRangeValue returns the chunkKey, labelValue and metadataInIndex
-// for chunk time range values.
+// parseChunkTimeRangeValue returns the chunkID and labelValue for chunk time
+// range values.
 func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
-	chunkID string, labelValue model.LabelValue, metadataInIndex bool,
-	isSeriesID bool, err error,
+	chunkID string, labelValue model.LabelValue, isSeriesID bool, err error,
 ) {
 	components := decodeRangeKey(rangeValue)
 
@@ -182,7 +181,6 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
 	case len(components) == 3:
 		chunkID = string(components[2])
 		labelValue = model.LabelValue(components[1])
-		metadataInIndex = true
 		return
 
 	// v3 schema had four components - label name, label value, chunk ID and version.
diff --git a/schema_util_test.go b/schema_util_test.go
index a8096b70dd73f..96fc0aa10a35e 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -92,7 +92,7 @@ func TestParseChunkTimeRangeValue(t *testing.T) {
 		{[]byte("a1b2c3d4\x00Y29kZQ\x002:1484661279394:1484664879394\x004\x00"),
 			"code", "2:1484661279394:1484664879394"},
 	} {
-		chunkID, labelValue, _, _, err := parseChunkTimeRangeValue(c.encoded, nil)
+		chunkID, labelValue, _, err := parseChunkTimeRangeValue(c.encoded, nil)
 		require.NoError(t, err)
 		assert.Equal(t, model.LabelValue(c.value), labelValue)
 		assert.Equal(t, c.chunkID, chunkID)

From 9a6cfd21dbabe52f863dfdba9a08baa43b67cb26 Mon Sep 17 00:00:00 2001
From: Samiur A <samiur.arif@gmail.com>
Date: Wed, 11 Dec 2019 09:51:24 -0700
Subject: [PATCH 430/660] Doing an initial synch and then sleep to space out
 subsequent cycles. #1618 (#1900)

Signed-off-by: Samiur Arif <samiur.arif@gmail.com>
---
 table_manager.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index d07a9f4a66f4d..eba9b23f3587a 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -167,9 +167,6 @@ func (m *TableManager) Stop() {
 func (m *TableManager) loop() {
 	defer m.wait.Done()
 
-	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
-	time.Sleep(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval))))
-
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
@@ -179,6 +176,9 @@ func (m *TableManager) loop() {
 		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 	}
 
+	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
+	time.Sleep(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval))))
+
 	for {
 		select {
 		case <-ticker.C:

From df0e4678ce7dbda21b96b8ef65e8fe577f27a5db Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@microsoft.com>
Date: Fri, 25 Oct 2019 09:13:47 -0700
Subject: [PATCH 431/660] Adding Microsoft Azure Blob storage support for
 chunks

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 azure/blob_storage_client.go | 143 +++++++++++++++++++++++++++++++++++
 storage/factory.go           |  21 +++--
 2 files changed, 156 insertions(+), 8 deletions(-)
 create mode 100644 azure/blob_storage_client.go

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
new file mode 100644
index 0000000000000..4f0f42cfdec36
--- /dev/null
+++ b/azure/blob_storage_client.go
@@ -0,0 +1,143 @@
+package azure
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"net/url"
+	"strings"
+	"time"
+
+	"github.com/Azure/azure-storage-blob-go/azblob"
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
+
+// BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage.
+type BlobStorageConfig struct {
+	ContainerName      string        `yaml:"containerName"`
+	AccountName        string        `yaml:"accountName"`
+	AccountKey         string        `yaml:"accountKey"`
+	DownloadBufferSize int           `yaml:"downloadBufferSize"`
+	UploadBufferSize   int           `yaml:"uploadBufferSize"`
+	UploadBufferCount  int           `yaml:"uploadBufferCount"`
+	RequestTimeout     time.Duration `yaml:"requestTimeout"`
+	MaxRetries         int           `yaml:"maxRetries"`
+	MinRetryDelay      time.Duration `yaml:"minretryDelay"`
+	MaxRetryDelay      time.Duration `yaml:"maxRetryDelay"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&c.ContainerName, "azure.container-name", "cortex", "Name of the blob container used to store chunks. Defaults to `cortex`. This container must be created before running cortex.")
+	f.StringVar(&c.AccountName, "azure.account-name", "", "The Microsoft Azure account name to be used")
+	f.StringVar(&c.AccountKey, "azure.account-key", "", "The Microsoft Azure account key to use.")
+	f.DurationVar(&c.RequestTimeout, "azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
+	f.IntVar(&c.DownloadBufferSize, "azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
+	f.IntVar(&c.UploadBufferSize, "azure.upload-buffer-size", 256000, "Preallocated buffer size for up;oads (default is 256KB)")
+	f.IntVar(&c.UploadBufferCount, "azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk. (defaults to 1)")
+	f.IntVar(&c.MaxRetries, "azure.max-retries", 5, "Number of retries for a request which times out.")
+	f.DurationVar(&c.MinRetryDelay, "azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.")
+	f.DurationVar(&c.MaxRetryDelay, "azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.")
+}
+
+// BlobStorage is used to interact with azure blob storage for setting or getting time series chunks.
+// Implements ObjectStorage
+type BlobStorage struct {
+	//blobService storage.Serv
+	cfg *BlobStorageConfig
+}
+
+// NewBlobStorage creates a new instance of the BlobStorage struct.
+func NewBlobStorage(cfg *BlobStorageConfig) *BlobStorage {
+	return &BlobStorage{cfg: cfg}
+}
+
+// Stop is a no op, as there are no background workers with this driver currently
+func (b *BlobStorage) Stop() {}
+
+// GetChunks retrieves the requested data chunks from blob storage.
+func (b *BlobStorage) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, chunks, b.getChunk)
+}
+
+func (b *BlobStorage) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+	if b.cfg.RequestTimeout > 0 {
+		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
+		var cancel context.CancelFunc
+		ctx, cancel = context.WithTimeout(ctx, b.cfg.RequestTimeout)
+		defer cancel()
+	}
+
+	blockBlobURL, err := b.getBlobURL(input.ExternalKey())
+	if err != nil {
+		return chunk.Chunk{}, err
+	}
+
+	buf := make([]byte, 0, b.cfg.DownloadBufferSize)
+
+	err = azblob.DownloadBlobToBuffer(ctx, blockBlobURL.BlobURL, 0, 0, buf, azblob.DownloadFromBlobOptions{})
+	if err != nil {
+		return chunk.Chunk{}, err
+	}
+
+	if err := input.Decode(decodeContext, buf); err != nil {
+		return chunk.Chunk{}, err
+	}
+
+	return input, nil
+}
+
+// PutChunks writes a set of chunks to azure blob storage using block blobs.
+func (b *BlobStorage) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+
+	for _, chunk := range chunks {
+		buf, err := chunk.Encoded()
+		if err != nil {
+			return err
+		}
+
+		blockBlobURL, err := b.getBlobURL(chunk.ExternalKey())
+		if err != nil {
+			return err
+		}
+
+		bufferSize := b.cfg.UploadBufferSize
+		maxBuffers := b.cfg.UploadBufferCount
+		_, err = azblob.UploadStreamToBlockBlob(ctx, bytes.NewReader(buf), blockBlobURL,
+			azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers})
+
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
+
+	blobID = strings.Replace(blobID, ":", "-", -1)
+
+	//generate url for new chunk blob
+	u, err := url.Parse(fmt.Sprintf(blobURLFmt, b.cfg.AccountName, b.cfg.ContainerName, blobID))
+	if err != nil {
+		return azblob.BlockBlobURL{}, err
+	}
+	credential, err := azblob.NewSharedKeyCredential(b.cfg.AccountName, b.cfg.AccountKey)
+	if err != nil {
+		return azblob.BlockBlobURL{}, err
+	}
+
+	return azblob.NewBlockBlobURL(*u, azblob.NewPipeline(credential, azblob.PipelineOptions{
+		Retry: azblob.RetryOptions{
+			Policy:        azblob.RetryPolicyExponential,
+			MaxTries:      (int32)(b.cfg.MaxRetries),
+			TryTimeout:    b.cfg.RequestTimeout,
+			RetryDelay:    b.cfg.MinRetryDelay,
+			MaxRetryDelay: b.cfg.MaxRetryDelay,
+		},
+	})), nil
+}
diff --git a/storage/factory.go b/storage/factory.go
index 0d70068e23dd7..6613aee9ce754 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/aws"
+	"github.com/cortexproject/cortex/pkg/chunk/azure"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
@@ -33,13 +34,14 @@ type StoreLimits interface {
 
 // Config chooses which storage client to use.
 type Config struct {
-	Engine                 string             `yaml:"engine"`
-	AWSStorageConfig       aws.StorageConfig  `yaml:"aws"`
-	GCPStorageConfig       gcp.Config         `yaml:"bigtable"`
-	GCSConfig              gcp.GCSConfig      `yaml:"gcs"`
-	CassandraStorageConfig cassandra.Config   `yaml:"cassandra"`
-	BoltDBConfig           local.BoltDBConfig `yaml:"boltdb"`
-	FSConfig               local.FSConfig     `yaml:"filesystem"`
+	Engine                 string                  `yaml:"engine"`
+	AWSStorageConfig       aws.StorageConfig       `yaml:"aws"`
+	AzureStorageConfig     azure.BlobStorageConfig `yaml:"azure"`
+	GCPStorageConfig       gcp.Config              `yaml:"bigtable"`
+	GCSConfig              gcp.GCSConfig           `yaml:"gcs"`
+	CassandraStorageConfig cassandra.Config        `yaml:"cassandra"`
+	BoltDBConfig           local.BoltDBConfig      `yaml:"boltdb"`
+	FSConfig               local.FSConfig          `yaml:"filesystem"`
 
 	IndexCacheValidity time.Duration
 
@@ -49,6 +51,7 @@ type Config struct {
 // RegisterFlags adds the flags required to configure this flag set.
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.AWSStorageConfig.RegisterFlags(f)
+	cfg.AzureStorageConfig.RegisterFlags(f)
 	cfg.GCPStorageConfig.RegisterFlags(f)
 	cfg.GCSConfig.RegisterFlags(f)
 	cfg.CassandraStorageConfig.RegisterFlags(f)
@@ -159,6 +162,8 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+	case "azure":
+		return azure.NewBlobStorage(&cfg.AzureStorageConfig), nil
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable", "bigtable-hashed":
@@ -170,7 +175,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 	case "filesystem":
 		return local.NewFSObjectClient(cfg.FSConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, azure, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }
 

From 2d87c9f0c105341212d116a7725522a6e7bf0ee0 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@microsoft.com>
Date: Sun, 15 Dec 2019 21:02:35 -0800
Subject: [PATCH 432/660] yaml naming style update & readme edit

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 azure/blob_storage_client.go | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 4f0f42cfdec36..7732e65498764 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -18,16 +18,16 @@ const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
 
 // BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage.
 type BlobStorageConfig struct {
-	ContainerName      string        `yaml:"containerName"`
-	AccountName        string        `yaml:"accountName"`
-	AccountKey         string        `yaml:"accountKey"`
-	DownloadBufferSize int           `yaml:"downloadBufferSize"`
-	UploadBufferSize   int           `yaml:"uploadBufferSize"`
-	UploadBufferCount  int           `yaml:"uploadBufferCount"`
-	RequestTimeout     time.Duration `yaml:"requestTimeout"`
-	MaxRetries         int           `yaml:"maxRetries"`
-	MinRetryDelay      time.Duration `yaml:"minretryDelay"`
-	MaxRetryDelay      time.Duration `yaml:"maxRetryDelay"`
+	ContainerName      string        `yaml:"container_name"`
+	AccountName        string        `yaml:"account_name"`
+	AccountKey         string        `yaml:"account_key"`
+	DownloadBufferSize int           `yaml:"download_buffer_size"`
+	UploadBufferSize   int           `yaml:"upload_buffer_size"`
+	UploadBufferCount  int           `yaml:"upload_buffer_count"`
+	RequestTimeout     time.Duration `yaml:"request_timeout"`
+	MaxRetries         int           `yaml:"max_retries"`
+	MinRetryDelay      time.Duration `yaml:"min_retry_delay"`
+	MaxRetryDelay      time.Duration `yaml:"max_retry_delay"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet

From 2c3e541b627090d0465cc03cf354c411089c1dd8 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@microsoft.com>
Date: Mon, 16 Dec 2019 21:24:37 -0800
Subject: [PATCH 433/660] adding TLS and password options to RedisCache config

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 cache/redis_cache.go | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 43e14dba3fc89..57756c015cd69 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -25,6 +25,8 @@ type RedisConfig struct {
 	Expiration     time.Duration `yaml:"expiration,omitempty"`
 	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
 	MaxActiveConns int           `yaml:"max_active_conns,omitempty"`
+	Password       string        `yaml:"password"`
+	EnableTLS      bool          `yaml:"enable_tls"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -34,6 +36,8 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
 	f.IntVar(&cfg.MaxIdleConns, prefix+"redis.max-idle-conns", 80, description+"Maximum number of idle connections in pool.")
 	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
+	f.StringVar(&cfg.Password, prefix+"redis.password", "", "password to use when connecting to redis.")
+	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, "enables connecting to redis with TLS.")
 }
 
 // NewRedisCache creates a new RedisCache
@@ -44,7 +48,15 @@ func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
 			MaxIdle:   cfg.MaxIdleConns,
 			MaxActive: cfg.MaxActiveConns,
 			Dial: func() (redis.Conn, error) {
-				c, err := redis.Dial("tcp", cfg.Endpoint)
+				options := make([]redis.DialOption, 0, 2)
+				if cfg.EnableTLS {
+					options = append(options, redis.DialUseTLS(true))
+				}
+				if cfg.Password != "" {
+					options = append(options, redis.DialPassword(cfg.Password))
+				}
+
+				c, err := redis.Dial("tcp", cfg.Endpoint, options...)
 				if err != nil {
 					return nil, err
 				}

From 28553750aaa1c7372a280a2012df2cd623f904d8 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@microsoft.com>
Date: Mon, 6 Jan 2020 08:10:39 -0800
Subject: [PATCH 434/660] documentation corrections from PR feedback

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 cache/redis_cache.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 57756c015cd69..c6cbc66200664 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -36,8 +36,8 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
 	f.IntVar(&cfg.MaxIdleConns, prefix+"redis.max-idle-conns", 80, description+"Maximum number of idle connections in pool.")
 	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
-	f.StringVar(&cfg.Password, prefix+"redis.password", "", "password to use when connecting to redis.")
-	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, "enables connecting to redis with TLS.")
+	f.StringVar(&cfg.Password, prefix+"redis.password", "", description+"Password to use when connecting to redis.")
+	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
 }
 
 // NewRedisCache creates a new RedisCache

From 388b79ff473fba132da5b5511e0c585568a7c0ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Tue, 7 Jan 2020 19:48:52 +0100
Subject: [PATCH 435/660] Added KV Store client that mirrors one backend to
 another (#1749)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added new KV Store client, MultiClient.

This client is configured with multiple stores, one of them is designated as
primary. All client operations are forwarded to the primary store.

MultiClient also does "mirroring" of values from primary to secondary store.

MultiClient can listen on changes in runtime configuration (via overrides
mechanism), and switch primary store and enable/disable mirroring.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use Stop, which is now part of kv.Client.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Put back setting of defaultLimits -- used when loading YAML files.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Moved setting of default limits for YAML unmarshal to separate function.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Pass multi-client context as argument.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* watchConfigChannel now reacts on context being done as well

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Changed Mirroring to *bool.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Ignore mock by yaml.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Renamed mirroring to mirror-enabled to be consistent with MultiConfig.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Renamed 'multi' to 'multi_kv_config' in overrides.yaml.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Forward writes done via CAS function to secondary client.

Mirroring goroutine was removed, and replaced by forwarding writes
done via CAS function to secondary client. Rate limits config was
removed, but there is now timeout for secondary write, to avoid
blocking CAS function for too long, if secondary write is slow
(eg. etcd being down can cause very long writes).

Only WatchKey and WatchPrefix functions now react on change of
primary client.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added metrics to multi client.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed equality check when writing to secondary store.

Without watch-and-mirror functionality, there is no need to check
if value is already present in the secondary store.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Renamed OverridesManager and moved it to its own package.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make lint happy, 3.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make lint happy, 4.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Add metric type to variable names, yaml name changes, fixed metric names, removed forgotten log.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed yet one more yaml name.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed tests after changing yaml fields.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix bug when default limits are not applied until next overrides reload.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Ignore LoadConfig if LoadPath is empty.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use channels to communicate config updates.

Instead of spawning new goroutine for each config update,
we now use channels to communicate config updates.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Initialize limits before starting runtimeconfig Manager.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Updated CHANGELOG.md and arguments.md.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Typo

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation error in ingester_v2_test.go.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed error after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed error after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use logger with component="multikv" to log messages.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Improve log message when runtime config file is not specified.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Don't use memberlist in the example, as it is still experimental.
Addressed other review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store_test.go         | 2 +-
 storage/caching_fixtures.go | 2 +-
 storage/factory_test.go     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 80632775a26d0..14f4fae8d83d4 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -88,7 +88,7 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 	var limits validation.Limits
 	flagext.DefaultValues(&limits)
 	limits.MaxQueryLength = 30 * 24 * time.Hour
-	overrides, err := validation.NewOverrides(limits)
+	overrides, err := validation.NewOverrides(limits, nil)
 	require.NoError(t, err)
 
 	store := NewCompositeStore()
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index 47e90fe44e572..ece8043098692 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -41,5 +41,5 @@ func defaultLimits() (*validation.Overrides, error) {
 	var defaults validation.Limits
 	flagext.DefaultValues(&defaults)
 	defaults.CardinalityLimit = 5
-	return validation.NewOverrides(defaults)
+	return validation.NewOverrides(defaults, nil)
 }
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 0b6737d94d0e3..2d680250cba84 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -30,7 +30,7 @@ func TestFactoryStop(t *testing.T) {
 		},
 	}
 
-	limits, err := validation.NewOverrides(defaults)
+	limits, err := validation.NewOverrides(defaults, nil)
 	require.NoError(t, err)
 
 	store, err := NewStore(cfg, storeConfig, schemaConfig, limits)

From 8b08f1587b35d2c049bd0e6b732e483bede6fd60 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Thu, 16 Jan 2020 09:07:58 +0100
Subject: [PATCH 436/660] Added tool to automatically generate config file
 documentation (#1952)

* Added tool to automatically generate config file documentation

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Make doc due to config changes after rebasing master

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added support for inline yaml field

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed linter issues

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed linter in the alertmanager

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed Cortex  module name type detection

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Commented why SchemaConfig doc is not auto-generated

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Updated config file doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cache/cache.go               | 4 ++--
 gcp/bigtable_index_client.go | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index 540b03de4539f..17e1503ff3139 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -33,10 +33,10 @@ type Config struct {
 	Fifocache      FifoCacheConfig       `yaml:"fifocache,omitempty"`
 
 	// This is to name the cache metrics properly.
-	Prefix string `yaml:"prefix,omitempty"`
+	Prefix string `yaml:"prefix,omitempty" doc:"hidden"`
 
 	// For tests to inject specific implementations.
-	Cache Cache
+	Cache Cache `yaml:"-"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index d18a2515e0c3b..ff44f128f6851 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -36,8 +36,8 @@ type Config struct {
 
 	GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"`
 
-	ColumnKey      bool
-	DistributeKeys bool
+	ColumnKey      bool `yaml:"-"`
+	DistributeKeys bool `yaml:"-"`
 
 	TableCacheEnabled    bool
 	TableCacheExpiration time.Duration

From efbeecda4e59b4f4269ced9773955a4d36bbf345 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 22 Jan 2020 14:57:31 +0530
Subject: [PATCH 437/660] fix calculation of expected tables and create tables
 from upcoming schema considering grace period (#1976)

* fix calculation of expected tables and create tables from upcoming schema considering grace period

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated comment to make it clearer

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 schema_config.go      |  4 ++--
 table_manager.go      |  3 ++-
 table_manager_test.go | 29 +++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 4e3e768339553..06312f827bfcf 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -424,8 +424,8 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		nowWeek        = now / periodSecs
 		result         = []TableDesc{}
 	)
-	// If through ends on 00:00 of the day, don't include the upcoming day
-	if through.Unix()%secondsInDay == 0 {
+	// If interval ends exactly on a period boundary, don’t include the upcoming period
+	if through.Unix()%periodSecs == 0 {
 		lastTable--
 	}
 	// Don't make tables further back than the configured retention
diff --git a/table_manager.go b/table_manager.go
index eba9b23f3587a..6001cb7f12bfb 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -239,7 +239,8 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 	result := []TableDesc{}
 
 	for i, config := range m.schemaCfg.Configs {
-		if config.From.Time.Time().After(mtime.Now()) {
+		// Consider configs which we are about to hit and requires tables to be created due to grace period
+		if config.From.Time.Time().After(mtime.Now().Add(m.cfg.CreationGracePeriod)) {
 			continue
 		}
 		if config.IndexTables.Period == 0 { // non-periodic table
diff --git a/table_manager_test.go b/table_manager_test.go
index a6951d7ab8f6e..cecc7edfa2f91 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -285,6 +285,21 @@ func TestTableManager(t *testing.T) {
 		},
 	)
 
+	// Move ahead where we are short by just grace period before hitting next section
+	tmTest(t, client, tableManager,
+		"Move ahead where we are short by just grace period before hitting next section",
+		weeklyTable2Start.Add(-gracePeriod+time.Second),
+		[]TableDesc{
+			{Name: baseTableName, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: tablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: table2Prefix + "5", ProvisionedRead: read, ProvisionedWrite: write, WriteScale: activeScalingConfig},
+			{Name: chunkTablePrefix + week1Suffix, ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + week2Suffix, ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTable2Prefix + "5", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
 	// Move to the next section of the config
 	tmTest(t, client, tableManager,
 		"Move forward to next section of schema config",
@@ -648,6 +663,20 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		},
 	)
 
+	// Check after three weeks and a day short by grace period, we have three tables (two previous periods and the new one), table 0 was deleted
+	tmTest(t, client, tableManager,
+		"Move forward by three table periods and a day short by grace period",
+		baseTableStart.Add(tablePeriod*3+24*time.Hour-gracePeriod),
+		[]TableDesc{
+			{Name: tablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "2", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig},
+			{Name: tablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+			{Name: chunkTablePrefix + "1", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "2", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite},
+			{Name: chunkTablePrefix + "3", ProvisionedRead: read, ProvisionedWrite: write},
+		},
+	)
+
 	// Verify that without RetentionDeletesEnabled no tables are removed
 	tableManager.cfg.RetentionDeletesEnabled = false
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test

From d666947119e659051666128aea21192baa5a96c7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 24 Jan 2020 08:14:45 +0000
Subject: [PATCH 438/660] Remove error return from Cache.Stop() (#2030)

All code paths return nil as the error, so we can simplify the code.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 cache/background.go   | 4 ++--
 cache/cache.go        | 2 +-
 cache/fifo_cache.go   | 3 +--
 cache/instrumented.go | 4 ++--
 cache/memcached.go    | 5 ++---
 cache/mock.go         | 3 +--
 cache/redis_cache.go  | 4 ++--
 cache/snappy.go       | 4 ++--
 cache/stop_once.go    | 6 ++----
 cache/tiered.go       | 7 ++-----
 10 files changed, 17 insertions(+), 25 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index ea589630725a1..1cbdfdde9388b 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -73,11 +73,11 @@ func NewBackground(name string, cfg BackgroundConfig, cache Cache) Cache {
 }
 
 // Stop the background flushing goroutines.
-func (c *backgroundCache) Stop() error {
+func (c *backgroundCache) Stop() {
 	close(c.quit)
 	c.wg.Wait()
 
-	return c.Cache.Stop()
+	c.Cache.Stop()
 }
 
 // Store writes keys for the cache in the background.
diff --git a/cache/cache.go b/cache/cache.go
index 17e1503ff3139..d8cdf4914883b 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -17,7 +17,7 @@ import (
 type Cache interface {
 	Store(ctx context.Context, key []string, buf [][]byte)
 	Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string)
-	Stop() error
+	Stop()
 }
 
 // Config for building Caches.
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 4555b0d417990..164d6c645b3ef 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -139,8 +139,7 @@ func (c *FifoCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 }
 
 // Stop implements Cache.
-func (c *FifoCache) Stop() error {
-	return nil
+func (c *FifoCache) Stop() {
 }
 
 // Put stores the value against the key.
diff --git a/cache/instrumented.go b/cache/instrumented.go
index 67c994d1c7705..f425cf21f94c1 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -109,6 +109,6 @@ func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string,
 	return found, bufs, missing
 }
 
-func (i *instrumentedCache) Stop() error {
-	return i.Cache.Stop()
+func (i *instrumentedCache) Stop() {
+	i.Cache.Stop()
 }
diff --git a/cache/memcached.go b/cache/memcached.go
index 5c83ae639a06a..a2f74e8c4af33 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -235,14 +235,13 @@ func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 }
 
 // Stop does nothing.
-func (c *Memcached) Stop() error {
+func (c *Memcached) Stop() {
 	if c.inputCh == nil {
-		return nil
+		return
 	}
 
 	close(c.inputCh)
 	c.wg.Wait()
-	return nil
 }
 
 // HashKey hashes key into something you can store in memcached.
diff --git a/cache/mock.go b/cache/mock.go
index 6db73704ffb63..e44be6cbf37e7 100644
--- a/cache/mock.go
+++ b/cache/mock.go
@@ -33,8 +33,7 @@ func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, b
 	return
 }
 
-func (m *mockCache) Stop() error {
-	return nil
+func (m *mockCache) Stop() {
 }
 
 // NewMockCache makes a new MockCache
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index c6cbc66200664..431b73ff0552c 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -109,8 +109,8 @@ func (c *RedisCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 }
 
 // Stop stops the redis client.
-func (c *RedisCache) Stop() error {
-	return c.pool.Close()
+func (c *RedisCache) Stop() {
+	_ = c.pool.Close()
 }
 
 // mset adds key-value pairs to the cache.
diff --git a/cache/snappy.go b/cache/snappy.go
index 2cf32b1f66d35..b03161119cd44 100644
--- a/cache/snappy.go
+++ b/cache/snappy.go
@@ -42,6 +42,6 @@ func (s *snappyCache) Fetch(ctx context.Context, keys []string) ([]string, [][]b
 	return found, ds, missing
 }
 
-func (s *snappyCache) Stop() error {
-	return s.next.Stop()
+func (s *snappyCache) Stop() {
+	s.next.Stop()
 }
diff --git a/cache/stop_once.go b/cache/stop_once.go
index 1382691abcc8d..1cc6222c99a8e 100644
--- a/cache/stop_once.go
+++ b/cache/stop_once.go
@@ -14,10 +14,8 @@ func StopOnce(cache Cache) Cache {
 	}
 }
 
-func (s *stopOnce) Stop() error {
-	var err error
+func (s *stopOnce) Stop() {
 	s.once.Do(func() {
-		err = s.Cache.Stop()
+		s.Cache.Stop()
 	})
-	return err
 }
diff --git a/cache/tiered.go b/cache/tiered.go
index 07bc6e4fe5924..27e2310cadead 100644
--- a/cache/tiered.go
+++ b/cache/tiered.go
@@ -56,11 +56,8 @@ func (t tiered) Fetch(ctx context.Context, keys []string) ([]string, [][]byte, [
 	return resultKeys, resultBufs, missing
 }
 
-func (t tiered) Stop() error {
+func (t tiered) Stop() {
 	for _, c := range []Cache(t) {
-		if err := c.Stop(); err != nil {
-			return err
-		}
+		c.Stop()
 	}
-	return nil
 }

From ef477f82ede43460aeb52fcca15b2dcf6b3a046f Mon Sep 17 00:00:00 2001
From: Thor <8681572+thorfour@users.noreply.github.com>
Date: Tue, 28 Jan 2020 17:41:28 +0000
Subject: [PATCH 439/660] querier: move chunk stores MinChunkAge to
 QueryStoreAfter (#1893)

Signed-off-by: Thor <thansen@digitalocean.com>
---
 chunk_store.go | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index da7e6eec6042c..f7a80134f10e2 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -45,7 +45,6 @@ type StoreConfig struct {
 	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config,omitempty"`
 	WriteDedupeCacheConfig cache.Config `yaml:"write_dedupe_cache_config,omitempty"`
 
-	MinChunkAge           time.Duration `yaml:"min_chunk_age,omitempty"`
 	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than,omitempty"`
 
 	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
@@ -61,7 +60,6 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.chunkCacheStubs, "store.chunk-cache-stubs", false, "If true, don't write the full chunk to cache, just a stub entry.")
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
-	f.DurationVar(&cfg.MinChunkAge, "store.min-chunk-age", 0, "Minimum time between chunk update and being saved to the store.")
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 	f.DurationVar(&cfg.MaxLookBackPeriod, "store.max-look-back-period", 0, "Limit how long back data can be queried")
 
@@ -269,11 +267,6 @@ func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from
 		return true, nil
 	}
 
-	if from.After(now.Add(-c.cfg.MinChunkAge)) {
-		// no data relevant to this query will have arrived at the store yet
-		return true, nil
-	}
-
 	if c.cfg.MaxLookBackPeriod != 0 {
 		oldestStartTime := model.Now().Add(-c.cfg.MaxLookBackPeriod)
 		if oldestStartTime.After(*from) {

From 2fcaaa5e4fed6eab583791568c068a43790dd33e Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@users.noreply.github.com>
Date: Sat, 1 Feb 2020 13:56:32 -0800
Subject: [PATCH 440/660] adding retry options for cassandra and updating docs
 (#2054)

* adding retry options for cassandra and updating docs

* adjusting some naming based on feedback

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 cassandra/storage_client.go | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 1b139bd719fe0..3a1d5864cfdcf 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -30,6 +30,9 @@ type Config struct {
 	Password                 string        `yaml:"password,omitempty"`
 	Timeout                  time.Duration `yaml:"timeout,omitempty"`
 	ConnectTimeout           time.Duration `yaml:"connect_timeout,omitempty"`
+	Retries                  int           `yaml:"max_retries"`
+	MaxBackoff               time.Duration `yaml:"retry_max_backoff"`
+	MinBackoff               time.Duration `yaml:"retry_min_backoff"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -48,6 +51,9 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
 	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 600*time.Millisecond, "Initial connection timeout, used during initial dial to server.")
+	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
+	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
+	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
 }
 
 func (cfg *Config) session() (*gocql.Session, error) {
@@ -68,6 +74,13 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.QueryObserver = observer{}
 	cluster.Timeout = cfg.Timeout
 	cluster.ConnectTimeout = cfg.ConnectTimeout
+	if cfg.Retries > 0 {
+		cluster.RetryPolicy = &gocql.ExponentialBackoffRetryPolicy{
+			NumRetries: cfg.Retries,
+			Min:        cfg.MinBackoff,
+			Max:        cfg.MaxBackoff,
+		}
+	}
 	cfg.setClusterConfig(cluster)
 
 	return cluster.CreateSession()

From 2dd8efdb32ec2ce6df4dbdd0d87c372b21b566e3 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Mon, 3 Feb 2020 10:14:09 -0500
Subject: [PATCH 441/660] fix typos (#2067)

Signed-off-by: yeya24 <yb532204897@gmail.com>
---
 chunk_store_utils.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 856af86be1819..114e2b301066c 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -183,7 +183,7 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 		var err error
 		if !c.cacheStubs {
 			encoded, err = chunks[i].Encoded()
-			// TODO don't fail, just log and conitnue?
+			// TODO don't fail, just log and continue?
 			if err != nil {
 				return err
 			}

From 14f873642c7c9db716fb59f3a30b78e70a7db8e7 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 4 Feb 2020 12:37:37 +0000
Subject: [PATCH 442/660] Refactor: change index key markers from slice to
 single byte (#1786)

This is more idiomatic Go and more efficient because it doesn't have
to loop over the slice.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 inmemory_storage_client.go |   9 +--
 schema.go                  |  80 ++++++++++++------------
 schema_test.go             |  39 ++++++------
 schema_util.go             | 122 ++++++++++++++++++++-----------------
 schema_util_test.go        |   4 +-
 5 files changed, 134 insertions(+), 120 deletions(-)

diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 80464e3a7ccd2..fc5b33e9bb68f 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -163,10 +163,11 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 		} else {
 			// Return error if duplicate write and not metric name entry or series entry
 			itemComponents := decodeRangeKey(items[i].rangeValue)
-			if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) &&
-				!bytes.Equal(itemComponents[3], seriesRangeKeyV1) &&
-				!bytes.Equal(itemComponents[3], labelNamesRangeKeyV1) &&
-				!bytes.Equal(itemComponents[3], labelSeriesRangeKeyV1) {
+			keyType := itemComponents[3][0]
+			if keyType != metricNameRangeKeyV1 &&
+				keyType != seriesRangeKeyV1 &&
+				keyType != labelNamesRangeKeyV1 &&
+				keyType != labelSeriesRangeKeyV1 {
 				return fmt.Errorf("Dupe write")
 			}
 		}
diff --git a/schema.go b/schema.go
index ce69abf8ed79d..e52a5eed11530 100644
--- a/schema.go
+++ b/schema.go
@@ -12,21 +12,23 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 )
 
-var (
-	chunkTimeRangeKeyV1a = []byte{1}
-	chunkTimeRangeKeyV1  = []byte{'1'}
-	chunkTimeRangeKeyV2  = []byte{'2'}
-	chunkTimeRangeKeyV3  = []byte{'3'}
-	chunkTimeRangeKeyV4  = []byte{'4'}
-	chunkTimeRangeKeyV5  = []byte{'5'}
-	metricNameRangeKeyV1 = []byte{'6'}
+const (
+	chunkTimeRangeKeyV1a = 1
+	chunkTimeRangeKeyV1  = '1'
+	chunkTimeRangeKeyV2  = '2'
+	chunkTimeRangeKeyV3  = '3'
+	chunkTimeRangeKeyV4  = '4'
+	chunkTimeRangeKeyV5  = '5'
+	metricNameRangeKeyV1 = '6'
 
 	// For v9 schema
-	seriesRangeKeyV1      = []byte{'7'}
-	labelSeriesRangeKeyV1 = []byte{'8'}
+	seriesRangeKeyV1      = '7'
+	labelSeriesRangeKeyV1 = '8'
 	// For v11 schema
-	labelNamesRangeKeyV1 = []byte{'9'}
+	labelNamesRangeKeyV1 = '9'
+)
 
+var (
 	// ErrNotSupported when a schema doesn't support that particular lookup.
 	ErrNotSupported = errors.New("not supported")
 )
@@ -247,7 +249,7 @@ func (originalEntries) GetWriteEntries(bucket Bucket, metricName string, labels
 		result = append(result, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey([]byte(v.Name), []byte(v.Value), chunkIDBytes),
+			RangeValue: rangeValuePrefix([]byte(v.Name), []byte(v.Value), chunkIDBytes),
 		})
 	}
 	return result, nil
@@ -275,7 +277,7 @@ func (originalEntries) GetReadMetricLabelQueries(bucket Bucket, metricName strin
 		{
 			TableName:        bucket.tableName,
 			HashValue:        bucket.hashKey + ":" + metricName,
-			RangeValuePrefix: encodeRangeKey([]byte(labelName)),
+			RangeValuePrefix: rangeValuePrefix([]byte(labelName)),
 		},
 	}, nil
 }
@@ -288,7 +290,7 @@ func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName
 		{
 			TableName:        bucket.tableName,
 			HashValue:        bucket.hashKey + ":" + metricName,
-			RangeValuePrefix: encodeRangeKey([]byte(labelName), []byte(labelValue)),
+			RangeValuePrefix: rangeValuePrefix([]byte(labelName), []byte(labelValue)),
 		},
 	}, nil
 }
@@ -320,7 +322,7 @@ func (base64Entries) GetWriteEntries(bucket Bucket, metricName string, labels la
 		result = append(result, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey([]byte(v.Name), encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV1, []byte(v.Name), encodedBytes, chunkIDBytes),
 		})
 	}
 	return result, nil
@@ -339,7 +341,7 @@ func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName st
 		{
 			TableName:        bucket.tableName,
 			HashValue:        bucket.hashKey + ":" + metricName,
-			RangeValuePrefix: encodeRangeKey([]byte(labelName), encodedBytes),
+			RangeValuePrefix: rangeValuePrefix([]byte(labelName), encodedBytes),
 		},
 	}, nil
 }
@@ -357,7 +359,7 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName strin
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey(nil, nil, chunkIDBytes, chunkTimeRangeKeyV2),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV2, nil, nil, chunkIDBytes),
 		},
 	}
 
@@ -369,7 +371,7 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName strin
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(nil, encodedBytes, chunkIDBytes, chunkTimeRangeKeyV1),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV1, nil, encodedBytes, chunkIDBytes),
 		})
 	}
 
@@ -407,7 +409,7 @@ func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, m
 		{
 			TableName:        bucket.tableName,
 			HashValue:        fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
-			RangeValuePrefix: encodeRangeKey(nil, encodedBytes),
+			RangeValuePrefix: rangeValuePrefix(nil, encodedBytes),
 		},
 	}, nil
 }
@@ -433,7 +435,7 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV3, encodedThroughBytes, nil, chunkIDBytes),
 		},
 	}
 
@@ -445,7 +447,7 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(encodedThroughBytes, encodedValueBytes, chunkIDBytes, chunkTimeRangeKeyV4),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV4, encodedThroughBytes, encodedValueBytes, chunkIDBytes),
 		})
 	}
 
@@ -506,7 +508,7 @@ func (v6Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV3),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV3, encodedThroughBytes, nil, chunkIDBytes),
 		},
 	}
 
@@ -517,7 +519,7 @@ func (v6Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, chunkIDBytes, chunkTimeRangeKeyV5),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV5, encodedThroughBytes, nil, chunkIDBytes),
 			Value:      []byte(v.Value),
 		})
 	}
@@ -538,7 +540,7 @@ func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]Index
 		{
 			TableName:       bucket.tableName,
 			HashValue:       bucket.hashKey + ":" + metricName,
-			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			RangeValueStart: rangeValuePrefix(encodedFromBytes),
 		},
 	}, nil
 }
@@ -549,7 +551,7 @@ func (v6Entries) GetReadMetricLabelQueries(bucket Bucket, metricName string, lab
 		{
 			TableName:       bucket.tableName,
 			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
-			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			RangeValueStart: rangeValuePrefix(encodedFromBytes),
 		},
 	}, nil
 }
@@ -560,7 +562,7 @@ func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string
 		{
 			TableName:       bucket.tableName,
 			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
-			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			RangeValueStart: rangeValuePrefix(encodedFromBytes),
 			ValueEqual:      []byte(labelValue),
 		},
 	}, nil
@@ -590,7 +592,7 @@ func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels l
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
-			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
 		},
 	}
 
@@ -604,7 +606,7 @@ func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels l
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			RangeValue: encodeRangeKey(labelSeriesRangeKeyV1, valueHash, seriesID, nil),
 			Value:      []byte(v.Value),
 		})
 	}
@@ -621,7 +623,7 @@ func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels l
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + string(seriesID),
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV3, encodedThroughBytes, nil, []byte(chunkID)),
 		},
 	}
 
@@ -652,7 +654,7 @@ func (v9Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string
 		{
 			TableName:       bucket.tableName,
 			HashValue:       fmt.Sprintf("%s:%s:%s", bucket.hashKey, metricName, labelName),
-			RangeValueStart: encodeRangeKey(valueHash),
+			RangeValueStart: rangeValuePrefix(valueHash),
 			ValueEqual:      []byte(labelValue),
 		},
 	}, nil
@@ -664,7 +666,7 @@ func (v9Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuer
 		{
 			TableName:       bucket.tableName,
 			HashValue:       bucket.hashKey + ":" + string(seriesID),
-			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			RangeValueStart: rangeValuePrefix(encodedFromBytes),
 		},
 	}, nil
 }
@@ -693,7 +695,7 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 		{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
-			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
 		},
 	}
 
@@ -707,7 +709,7 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			RangeValue: encodeRangeKey(labelSeriesRangeKeyV1, valueHash, seriesID, nil),
 			Value:      []byte(v.Value),
 		})
 	}
@@ -724,7 +726,7 @@ func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels
 		{
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + string(seriesID),
-			RangeValue: encodeRangeKey(encodedThroughBytes, nil, []byte(chunkID), chunkTimeRangeKeyV3),
+			RangeValue: encodeRangeKey(chunkTimeRangeKeyV3, encodedThroughBytes, nil, []byte(chunkID)),
 		},
 	}
 
@@ -760,7 +762,7 @@ func (s v10Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName str
 		result = append(result, IndexQuery{
 			TableName:       bucket.tableName,
 			HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, bucket.hashKey, metricName, labelName),
-			RangeValueStart: encodeRangeKey(valueHash),
+			RangeValueStart: rangeValuePrefix(valueHash),
 			ValueEqual:      []byte(labelValue),
 		})
 	}
@@ -773,7 +775,7 @@ func (v10Entries) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQue
 		{
 			TableName:       bucket.tableName,
 			HashValue:       bucket.hashKey + ":" + string(seriesID),
-			RangeValueStart: encodeRangeKey(encodedFromBytes),
+			RangeValueStart: rangeValuePrefix(encodedFromBytes),
 		},
 	}, nil
 }
@@ -809,13 +811,13 @@ func (s v11Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 		{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
-			RangeValue: encodeRangeKey(seriesID, nil, nil, seriesRangeKeyV1),
+			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
 		},
 		// Entry for seriesID -> label names
 		{
 			TableName:  bucket.tableName,
 			HashValue:  string(seriesID),
-			RangeValue: encodeRangeKey(nil, nil, nil, labelNamesRangeKeyV1),
+			RangeValue: encodeRangeKey(labelNamesRangeKeyV1, nil, nil, nil),
 			Value:      data,
 		},
 	}
@@ -830,7 +832,7 @@ func (s v11Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 		entries = append(entries, IndexEntry{
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s:%s", shard, bucket.hashKey, metricName, v.Name),
-			RangeValue: encodeRangeKey(valueHash, seriesID, nil, labelSeriesRangeKeyV1),
+			RangeValue: encodeRangeKey(labelSeriesRangeKeyV1, valueHash, seriesID, nil),
 			Value:      []byte(v.Value),
 		})
 	}
diff --git a/schema_test.go b/schema_test.go
index 1749596c8313e..f42b4108768b3 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -147,32 +147,33 @@ func parseRangeValueType(rangeValue []byte) (int, error) {
 		return ChunkTimeRangeValue, nil
 
 	// chunk time range values
-	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
-		return ChunkTimeRangeValue, nil
+	case len(components[3]) == 1:
+		switch components[3][0] {
+		case chunkTimeRangeKeyV1:
+			return ChunkTimeRangeValue, nil
 
-	case bytes.Equal(components[3], chunkTimeRangeKeyV2):
-		return ChunkTimeRangeValue, nil
+		case chunkTimeRangeKeyV2:
+			return ChunkTimeRangeValue, nil
 
-	case bytes.Equal(components[3], chunkTimeRangeKeyV3):
-		return ChunkTimeRangeValue, nil
+		case chunkTimeRangeKeyV3:
+			return ChunkTimeRangeValue, nil
 
-	case bytes.Equal(components[3], chunkTimeRangeKeyV4):
-		return ChunkTimeRangeValue, nil
+		case chunkTimeRangeKeyV4:
+			return ChunkTimeRangeValue, nil
 
-	case bytes.Equal(components[3], chunkTimeRangeKeyV5):
-		return ChunkTimeRangeValue, nil
-
-	// metric name range values
-	case bytes.Equal(components[3], metricNameRangeKeyV1):
-		return MetricNameRangeValue, nil
+		case chunkTimeRangeKeyV5:
+			return ChunkTimeRangeValue, nil
 
-	// series range values
-	case bytes.Equal(components[3], seriesRangeKeyV1):
-		return SeriesRangeValue, nil
+		// metric name range values
+		case metricNameRangeKeyV1:
+			return MetricNameRangeValue, nil
 
-	default:
-		return 0, fmt.Errorf("unrecognised range value type. version: %q", string(components[3]))
+		// series range values
+		case seriesRangeKeyV1:
+			return SeriesRangeValue, nil
+		}
 	}
+	return 0, fmt.Errorf("unrecognised range value type. version: %q", string(components[3]))
 }
 
 func TestSchemaRangeKey(t *testing.T) {
diff --git a/schema_util.go b/schema_util.go
index e507e7c02e9fd..0906288e93f66 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -1,7 +1,6 @@
 package chunk
 
 import (
-	"bytes"
 	"crypto/sha256"
 	"encoding/base64"
 	"encoding/binary"
@@ -58,19 +57,31 @@ func sha256bytes(s string) []byte {
 	return encodeBase64Bytes(h[:])
 }
 
-func encodeRangeKey(ss ...[]byte) []byte {
-	length := 0
+// Build an index key, encoded as multiple parts separated by a 0 byte, with extra space at the end.
+func buildRangeValue(extra int, ss ...[]byte) []byte {
+	length := extra
 	for _, s := range ss {
 		length += len(s) + 1
 	}
 	output, i := make([]byte, length, length), 0
 	for _, s := range ss {
-		copy(output[i:i+len(s)], s)
-		i += len(s) + 1
+		i += copy(output[i:], s) + 1
 	}
 	return output
 }
 
+// Encode a complete key including type marker (which goes at the end)
+func encodeRangeKey(keyType byte, ss ...[]byte) []byte {
+	output := buildRangeValue(2, ss...)
+	output[len(output)-2] = keyType
+	return output
+}
+
+// Prefix values are used in querying the database, e.g. find all the records with a specific label value
+func rangeValuePrefix(ss ...[]byte) []byte {
+	return buildRangeValue(0, ss...)
+}
+
 func decodeRangeKey(value []byte) [][]byte {
 	components := make([][]byte, 0, 5)
 	i, j := 0, 0
@@ -135,7 +146,7 @@ func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValu
 		return "", fmt.Errorf("invalid metric name range value: %x", rangeValue)
 
 	// v1 has the metric name as the value (with the hash as the first component)
-	case bytes.Equal(components[3], metricNameRangeKeyV1):
+	case len(components[3]) == 1 && components[3][0] == metricNameRangeKeyV1:
 		return model.LabelValue(value), nil
 
 	default:
@@ -152,7 +163,7 @@ func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error
 		return nil, fmt.Errorf("invalid metric range value: %x", rangeValue)
 
 	// v1 has the encoded json metric as the value (with the fingerprint as the first component)
-	case bytes.Equal(components[3], seriesRangeKeyV1):
+	case len(components[3]) == 1 && components[3][0] == seriesRangeKeyV1:
 		var series model.Metric
 		if err := json.Unmarshal(value, &series); err != nil {
 			return nil, err
@@ -183,54 +194,53 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
 		labelValue = model.LabelValue(components[1])
 		return
 
-	// v3 schema had four components - label name, label value, chunk ID and version.
-	// "version" is 1 and label value is base64 encoded.
-	// (older code wrote "version" as 1, not '1')
-	case bytes.Equal(components[3], chunkTimeRangeKeyV1a):
-		fallthrough
-	case bytes.Equal(components[3], chunkTimeRangeKeyV1):
-		chunkID = string(components[2])
-		labelValue, err = decodeBase64Value(components[1])
-		return
-
-	// v4 schema wrote v3 range keys and a new range key - version 2,
-	// with four components - <empty>, <empty>, chunk ID and version.
-	case bytes.Equal(components[3], chunkTimeRangeKeyV2):
-		chunkID = string(components[2])
-		return
-
-	// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
-	case bytes.Equal(components[3], chunkTimeRangeKeyV3):
-		chunkID = string(components[2])
-		return
-
-	// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
-	case bytes.Equal(components[3], chunkTimeRangeKeyV4):
-		chunkID = string(components[2])
-		labelValue, err = decodeBase64Value(components[1])
-		return
-
-	// v6 schema added version 5 range keys, which have the label value written in
-	// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
-	case bytes.Equal(components[3], chunkTimeRangeKeyV5):
-		chunkID = string(components[2])
-		labelValue = model.LabelValue(value)
-		return
-
-	// v9 schema actually return series IDs
-	case bytes.Equal(components[3], seriesRangeKeyV1):
-		chunkID = string(components[0])
-		isSeriesID = true
-		return
-
-	case bytes.Equal(components[3], labelSeriesRangeKeyV1):
-		chunkID = string(components[1])
-		labelValue = model.LabelValue(value)
-		isSeriesID = true
-		return
-
-	default:
-		err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3]))
-		return
+	case len(components[3]) == 1:
+		switch components[3][0] {
+		// v3 schema had four components - label name, label value, chunk ID and version.
+		// "version" is 1 and label value is base64 encoded.
+		// (older code wrote "version" as 1, not '1')
+		case chunkTimeRangeKeyV1a, chunkTimeRangeKeyV1:
+			chunkID = string(components[2])
+			labelValue, err = decodeBase64Value(components[1])
+			return
+
+		// v4 schema wrote v3 range keys and a new range key - version 2,
+		// with four components - <empty>, <empty>, chunk ID and version.
+		case chunkTimeRangeKeyV2:
+			chunkID = string(components[2])
+			return
+
+		// v5 schema version 3 range key is chunk end time, <empty>, chunk ID, version
+		case chunkTimeRangeKeyV3:
+			chunkID = string(components[2])
+			return
+
+		// v5 schema version 4 range key is chunk end time, label value, chunk ID, version
+		case chunkTimeRangeKeyV4:
+			chunkID = string(components[2])
+			labelValue, err = decodeBase64Value(components[1])
+			return
+
+		// v6 schema added version 5 range keys, which have the label value written in
+		// to the value, not the range key. So they are [chunk end time, <empty>, chunk ID, version].
+		case chunkTimeRangeKeyV5:
+			chunkID = string(components[2])
+			labelValue = model.LabelValue(value)
+			return
+
+		// v9 schema actually return series IDs
+		case seriesRangeKeyV1:
+			chunkID = string(components[0])
+			isSeriesID = true
+			return
+
+		case labelSeriesRangeKeyV1:
+			chunkID = string(components[1])
+			labelValue = model.LabelValue(value)
+			isSeriesID = true
+			return
+		}
 	}
+	err = fmt.Errorf("unrecognised chunkTimeRangeKey version: %q", string(components[3]))
+	return
 }
diff --git a/schema_util_test.go b/schema_util_test.go
index 96fc0aa10a35e..3e5eb95a3d46f 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -108,7 +108,7 @@ func TestParseMetricNameRangeValue(t *testing.T) {
 		// version 1 (id 6) metric name range keys (used in v7 Schema) have
 		// metric name hash in first 'dimension', however just returns the value
 		{[]byte("a1b2c3d4\x00\x00\x006\x00"), "foo", "foo"},
-		{encodeRangeKey([]byte("bar"), nil, nil, metricNameRangeKeyV1), "bar", "bar"},
+		{encodeRangeKey(metricNameRangeKeyV1, []byte("bar"), nil, nil), "bar", "bar"},
 	} {
 		metricName, err := parseMetricNameRangeValue(c.encoded, []byte(c.value))
 		require.NoError(t, err)
@@ -133,7 +133,7 @@ func TestParseSeriesRangeValue(t *testing.T) {
 		value     []byte
 		expMetric model.Metric
 	}{
-		{encodeRangeKey(fingerprintBytes, nil, nil, seriesRangeKeyV1), metricBytes, metric},
+		{encodeRangeKey(seriesRangeKeyV1, fingerprintBytes, nil, nil), metricBytes, metric},
 	} {
 		metric, err := parseSeriesRangeValue(c.encoded, c.value)
 		require.NoError(t, err)

From dd7fabe9ae36773612535659c39c90193c8480ee Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Wed, 5 Feb 2020 11:15:28 +0100
Subject: [PATCH 443/660] Bump Cassandra default connect and query timeouts
 (#2076)

See the conversation that led to this: https://cloud-native.slack.com/archives/CCYDASBLP/p1580342552117900

I personally think that 600ms is quite short, and bumping it shouldn't
have a bad effect.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cassandra/storage_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 3a1d5864cfdcf..9420a634a169d 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -49,8 +49,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
-	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 600*time.Millisecond, "Timeout when connecting to cassandra.")
-	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 600*time.Millisecond, "Initial connection timeout, used during initial dial to server.")
+	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
+	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 5*time.Second, "Initial connection timeout, used during initial dial to server.")
 	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
 	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")

From 031709af8a04a3f003c53b09be6c6194fb7b8415 Mon Sep 17 00:00:00 2001
From: Ken Haines <khaines@users.noreply.github.com>
Date: Wed, 5 Feb 2020 23:58:07 -0800
Subject: [PATCH 444/660] Fixing & Simplifying Azure Blob Download (#2074)

* switching azblob fetching to lower level function to avoid unnecessary parallel requests

Signed-off-by: Ken Haines <khaines@microsoft.com>

* updating CHANGELOG.md

Signed-off-by: Ken Haines <khaines@microsoft.com>

* PR Feedback

Signed-off-by: Ken Haines <khaines@microsoft.com>
---
 azure/blob_storage_client.go | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 7732e65498764..6674105b41b7f 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"io/ioutil"
 	"net/url"
 	"strings"
 	"time"
@@ -77,9 +78,17 @@ func (b *BlobStorage) getChunk(ctx context.Context, decodeContext *chunk.DecodeC
 		return chunk.Chunk{}, err
 	}
 
-	buf := make([]byte, 0, b.cfg.DownloadBufferSize)
+	// Request access to the blob
+	downloadResponse, err := blockBlobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false)
+	if err != nil {
+		return chunk.Chunk{}, err
+	}
+
+	// download the contents & read into a buffer
+	bodyStream := downloadResponse.Body(azblob.RetryReaderOptions{MaxRetryRequests: b.cfg.MaxRetries})
+	defer bodyStream.Close()
 
-	err = azblob.DownloadBlobToBuffer(ctx, blockBlobURL.BlobURL, 0, 0, buf, azblob.DownloadFromBlobOptions{})
+	buf, err := ioutil.ReadAll(bodyStream)
 	if err != nil {
 		return chunk.Chunk{}, err
 	}

From 7279221d2445ee2a6e9870cb7ffad85ff678b50e Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 7 Feb 2020 12:49:05 +0530
Subject: [PATCH 445/660] support for registering custom index clients, added
 new methods to object stores (#2049)

* support for registering custom index clients, added new methods to object store

NewIndexClient accepts factory methods for creating custom index clients
added new methods to object stores to work on objects(io.Reader) instead of just chunks

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* splitted s3 config from dynamodb config and updated docs

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* removed unwanted code

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* added List method to azure and addressed other feedback in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* addressed some of the feedback from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed an issue with reporting errors in PutObject for GCS object store

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_storage_client.go    |  10 +-
 aws/fixtures.go                   |   2 +-
 aws/s3_storage_client.go          | 154 +++++++++++++++++++++++-------
 azure/blob_storage_client.go      | 129 ++++++++++++++++++++-----
 gcp/fixtures.go                   |   2 +-
 gcp/gcs_object_client.go          | 134 ++++++++++++++++++--------
 local/boltdb_index_client.go      |  74 +++++++++-----
 local/boltdb_index_client_test.go | 116 ++++++++++++++++++++++
 local/boltdb_reload_test.go       |  72 --------------
 local/fs_object_client.go         |  84 ++++++++++++++--
 storage/factory.go                |  23 ++++-
 storage/factory_test.go           |  79 +++++++++++++++
 storage_client.go                 |  13 ++-
 util/util.go                      |  13 +++
 14 files changed, 685 insertions(+), 220 deletions(-)
 create mode 100644 local/boltdb_index_client_test.go
 delete mode 100644 local/boltdb_reload_test.go

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 855700a7fece9..4be3334a1161c 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -125,19 +125,13 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 // StorageConfig specifies config for storing data on AWS.
 type StorageConfig struct {
 	DynamoDBConfig
-	S3               flagext.URLValue
-	BucketNames      string
-	S3ForcePathStyle bool
+	S3Config `yaml:",inline"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.DynamoDBConfig.RegisterFlags(f)
-
-	f.Var(&cfg.S3, "s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
-		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
-	f.BoolVar(&cfg.S3ForcePathStyle, "s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
-	f.StringVar(&cfg.BucketNames, "s3.buckets", "", "Comma separated list of bucket names to evenly distribute chunks over. Overrides any buckets specified in s3.url flag")
+	cfg.S3Config.RegisterFlags(f)
 }
 
 type dynamoDBStorageClient struct {
diff --git a/aws/fixtures.go b/aws/fixtures.go
index f67469f2bf6fd..9df2ddc3578a4 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -45,7 +45,7 @@ var Fixtures = []testutils.Fixture{
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaConfig,
 			}
-			object := &s3ObjectClient{
+			object := &S3ObjectClient{
 				S3: newMockS3(),
 			}
 			return index, object, table, schemaConfig, nil
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 11c831492e939..3283d6e99cdbc 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -3,8 +3,10 @@ package aws
 import (
 	"bytes"
 	"context"
+	"flag"
 	"fmt"
 	"hash/fnv"
+	"io"
 	"io/ioutil"
 	"strings"
 
@@ -16,6 +18,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 )
@@ -33,13 +36,33 @@ func init() {
 	s3RequestDuration.Register()
 }
 
-type s3ObjectClient struct {
+// S3Config specifies config for storing chunks on AWS S3.
+type S3Config struct {
+	S3               flagext.URLValue
+	BucketNames      string
+	S3ForcePathStyle bool
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *S3Config) RegisterFlags(f *flag.FlagSet) {
+	cfg.RegisterFlagsWithPrefix("", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet with a specified prefix
+func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.Var(&cfg.S3, prefix+"s3.url", "S3 endpoint URL with escaped Key and Secret encoded. "+
+		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
+	f.BoolVar(&cfg.S3ForcePathStyle, prefix+"s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
+	f.StringVar(&cfg.BucketNames, prefix+"s3.buckets", "", "Comma separated list of bucket names to evenly distribute chunks over. Overrides any buckets specified in s3.url flag")
+}
+
+type S3ObjectClient struct {
 	bucketNames []string
 	S3          s3iface.S3API
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
-func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	if cfg.S3.URL == nil {
 		return nil, fmt.Errorf("no URL specified for S3")
 	}
@@ -60,50 +83,40 @@ func NewS3ObjectClient(cfg StorageConfig, schemaCfg chunk.SchemaConfig) (chunk.O
 	if cfg.BucketNames != "" {
 		bucketNames = strings.Split(cfg.BucketNames, ",") // comma separated list of bucket names
 	}
-	client := s3ObjectClient{
+	client := S3ObjectClient{
 		S3:          s3Client,
 		bucketNames: bucketNames,
 	}
-	return client, nil
+	return &client, nil
 }
 
-func (a s3ObjectClient) Stop() {
+func (a *S3ObjectClient) Stop() {
 }
 
-func (a s3ObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+func (a *S3ObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	return util.GetParallelChunks(ctx, chunks, a.getChunk)
 }
 
-func (a s3ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
-	var resp *s3.GetObjectOutput
-
-	// Map the key into a bucket
-	key := c.ExternalKey()
-	bucket := a.bucketFromKey(key)
-
-	err := instrument.CollectedRequest(ctx, "S3.GetObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-		var err error
-		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
-			Bucket: aws.String(bucket),
-			Key:    aws.String(key),
-		})
-		return err
-	})
+func (a *S3ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+	readCloser, err := a.GetObject(ctx, c.ExternalKey())
 	if err != nil {
 		return chunk.Chunk{}, err
 	}
-	defer resp.Body.Close()
-	buf, err := ioutil.ReadAll(resp.Body)
+
+	defer readCloser.Close()
+
+	buf, err := ioutil.ReadAll(readCloser)
 	if err != nil {
 		return chunk.Chunk{}, err
 	}
+
 	if err := c.Decode(decodeContext, buf); err != nil {
 		return chunk.Chunk{}, err
 	}
 	return c, nil
 }
 
-func (a s3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (a *S3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	var (
 		s3ChunkKeys []string
 		s3ChunkBufs [][]byte
@@ -123,7 +136,7 @@ func (a s3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	incomingErrors := make(chan error)
 	for i := range s3ChunkBufs {
 		go func(i int) {
-			incomingErrors <- a.putS3Chunk(ctx, s3ChunkKeys[i], s3ChunkBufs[i])
+			incomingErrors <- a.PutObject(ctx, s3ChunkKeys[i], bytes.NewReader(s3ChunkBufs[i]))
 		}(i)
 	}
 
@@ -137,19 +150,8 @@ func (a s3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	return lastErr
 }
 
-func (a s3ObjectClient) putS3Chunk(ctx context.Context, key string, buf []byte) error {
-	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
-			Body:   bytes.NewReader(buf),
-			Bucket: aws.String(a.bucketFromKey(key)),
-			Key:    aws.String(key),
-		})
-		return err
-	})
-}
-
 // bucketFromKey maps a key to a bucket name
-func (a s3ObjectClient) bucketFromKey(key string) string {
+func (a *S3ObjectClient) bucketFromKey(key string) string {
 	if len(a.bucketNames) == 0 {
 		return ""
 	}
@@ -160,3 +162,81 @@ func (a s3ObjectClient) bucketFromKey(key string) string {
 
 	return a.bucketNames[hash%uint32(len(a.bucketNames))]
 }
+
+// Get object from the store
+func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	var resp *s3.GetObjectOutput
+
+	// Map the key into a bucket
+	bucket := a.bucketFromKey(objectKey)
+
+	err := instrument.CollectedRequest(ctx, "S3.GetObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		var err error
+		resp, err = a.S3.GetObjectWithContext(ctx, &s3.GetObjectInput{
+			Bucket: aws.String(bucket),
+			Key:    aws.String(objectKey),
+		})
+		return err
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return resp.Body, nil
+}
+
+// Put object into the store
+func (a *S3ObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
+			Body:   object,
+			Bucket: aws.String(a.bucketFromKey(objectKey)),
+			Key:    aws.String(objectKey),
+		})
+		return err
+	})
+}
+
+// List only objects from the store non-recursively
+func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+	var storageObjects []chunk.StorageObject
+
+	for i := range a.bucketNames {
+		err := instrument.CollectedRequest(ctx, "S3.List", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			input := s3.ListObjectsV2Input{
+				Bucket:    aws.String(a.bucketNames[i]),
+				Prefix:    aws.String(prefix),
+				Delimiter: aws.String(chunk.DirDelim),
+			}
+
+			for {
+				output, err := a.S3.ListObjectsV2WithContext(ctx, &input)
+				if err != nil {
+					return err
+				}
+
+				for _, content := range output.Contents {
+					storageObjects = append(storageObjects, chunk.StorageObject{
+						Key:        *content.Key,
+						ModifiedAt: *content.LastModified,
+					})
+				}
+
+				if !*output.IsTruncated {
+					// No more results to fetch
+					break
+				}
+
+				input.SetContinuationToken(*output.NextContinuationToken)
+			}
+
+			return nil
+		})
+
+		if err != nil {
+			return nil, err
+		}
+	}
+
+	return storageObjects, nil
+}
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 6674105b41b7f..d8edb1ea1255e 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -5,17 +5,21 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"io"
 	"io/ioutil"
 	"net/url"
 	"strings"
 	"time"
 
+	"github.com/Azure/azure-pipeline-go/pipeline"
 	"github.com/Azure/azure-storage-blob-go/azblob"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
+const containerURLFmt = "https://%s.blob.core.windows.net/%s"
 
 // BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage.
 type BlobStorageConfig struct {
@@ -49,12 +53,21 @@ func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
 // Implements ObjectStorage
 type BlobStorage struct {
 	//blobService storage.Serv
-	cfg *BlobStorageConfig
+	cfg          *BlobStorageConfig
+	containerURL azblob.ContainerURL
 }
 
 // NewBlobStorage creates a new instance of the BlobStorage struct.
-func NewBlobStorage(cfg *BlobStorageConfig) *BlobStorage {
-	return &BlobStorage{cfg: cfg}
+func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
+	blobStorage := &BlobStorage{cfg: cfg}
+
+	var err error
+	blobStorage.containerURL, err = blobStorage.buildContainerURL()
+	if err != nil {
+		return nil, err
+	}
+
+	return blobStorage, nil
 }
 
 // Stop is a no op, as there are no background workers with this driver currently
@@ -73,22 +86,14 @@ func (b *BlobStorage) getChunk(ctx context.Context, decodeContext *chunk.DecodeC
 		defer cancel()
 	}
 
-	blockBlobURL, err := b.getBlobURL(input.ExternalKey())
+	readCloser, err := b.GetObject(ctx, input.ExternalKey())
 	if err != nil {
 		return chunk.Chunk{}, err
 	}
 
-	// Request access to the blob
-	downloadResponse, err := blockBlobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
+	defer readCloser.Close()
 
-	// download the contents & read into a buffer
-	bodyStream := downloadResponse.Body(azblob.RetryReaderOptions{MaxRetryRequests: b.cfg.MaxRetries})
-	defer bodyStream.Close()
-
-	buf, err := ioutil.ReadAll(bodyStream)
+	buf, err := ioutil.ReadAll(readCloser)
 	if err != nil {
 		return chunk.Chunk{}, err
 	}
@@ -109,21 +114,41 @@ func (b *BlobStorage) PutChunks(ctx context.Context, chunks []chunk.Chunk) error
 			return err
 		}
 
-		blockBlobURL, err := b.getBlobURL(chunk.ExternalKey())
+		err = b.PutObject(ctx, chunk.ExternalKey(), bytes.NewReader(buf))
 		if err != nil {
 			return err
 		}
+	}
+	return nil
+}
 
-		bufferSize := b.cfg.UploadBufferSize
-		maxBuffers := b.cfg.UploadBufferCount
-		_, err = azblob.UploadStreamToBlockBlob(ctx, bytes.NewReader(buf), blockBlobURL,
-			azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers})
+func (b *BlobStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	blockBlobURL, err := b.getBlobURL(objectKey)
+	if err != nil {
+		return nil, err
+	}
 
-		if err != nil {
-			return err
-		}
+	// Request access to the blob
+	downloadResponse, err := blockBlobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false)
+	if err != nil {
+		return nil, err
 	}
-	return nil
+
+	return downloadResponse.Body(azblob.RetryReaderOptions{MaxRetryRequests: b.cfg.MaxRetries}), nil
+}
+
+func (b *BlobStorage) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	blockBlobURL, err := b.getBlobURL(objectKey)
+	if err != nil {
+		return err
+	}
+
+	bufferSize := b.cfg.UploadBufferSize
+	maxBuffers := b.cfg.UploadBufferCount
+	_, err = azblob.UploadStreamToBlockBlob(ctx, object, blockBlobURL,
+		azblob.UploadStreamToBlockBlobOptions{BufferSize: bufferSize, MaxBuffers: maxBuffers})
+
+	return err
 }
 
 func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
@@ -135,12 +160,36 @@ func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
 	if err != nil {
 		return azblob.BlockBlobURL{}, err
 	}
-	credential, err := azblob.NewSharedKeyCredential(b.cfg.AccountName, b.cfg.AccountKey)
+
+	azPipeline, err := b.newPipeline()
 	if err != nil {
 		return azblob.BlockBlobURL{}, err
 	}
 
-	return azblob.NewBlockBlobURL(*u, azblob.NewPipeline(credential, azblob.PipelineOptions{
+	return azblob.NewBlockBlobURL(*u, azPipeline), nil
+}
+
+func (b *BlobStorage) buildContainerURL() (azblob.ContainerURL, error) {
+	u, err := url.Parse(fmt.Sprintf(containerURLFmt, b.cfg.AccountName, b.cfg.ContainerName))
+	if err != nil {
+		return azblob.ContainerURL{}, err
+	}
+
+	azPipeline, err := b.newPipeline()
+	if err != nil {
+		return azblob.ContainerURL{}, err
+	}
+
+	return azblob.NewContainerURL(*u, azPipeline), nil
+}
+
+func (b *BlobStorage) newPipeline() (pipeline.Pipeline, error) {
+	credential, err := azblob.NewSharedKeyCredential(b.cfg.AccountName, b.cfg.AccountKey)
+	if err != nil {
+		return nil, err
+	}
+
+	return azblob.NewPipeline(credential, azblob.PipelineOptions{
 		Retry: azblob.RetryOptions{
 			Policy:        azblob.RetryPolicyExponential,
 			MaxTries:      (int32)(b.cfg.MaxRetries),
@@ -148,5 +197,33 @@ func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
 			RetryDelay:    b.cfg.MinRetryDelay,
 			MaxRetryDelay: b.cfg.MaxRetryDelay,
 		},
-	})), nil
+	}), nil
+}
+
+// List only objects from the store non-recursively
+func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+	var storageObjects []chunk.StorageObject
+
+	for marker := (azblob.Marker{}); marker.NotDone(); {
+		if ctx.Err() != nil {
+			return nil, ctx.Err()
+		}
+
+		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, chunk.DirDelim, azblob.ListBlobsSegmentOptions{Prefix: prefix})
+		if err != nil {
+			return nil, err
+		}
+
+		marker = listBlob.NextMarker
+
+		// Process the blobs returned in this result segment (if the segment is empty, the loop body won't execute)
+		for _, blobInfo := range listBlob.Segment.BlobItems {
+			storageObjects = append(storageObjects, chunk.StorageObject{
+				Key:        blobInfo.Name,
+				ModifiedAt: blobInfo.Properties.LastModified,
+			})
+		}
+	}
+
+	return storageObjects, nil
 }
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index d0eafac81d8a0..eededc576f28b 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -78,7 +78,7 @@ func (f *fixture) Clients() (
 	if f.gcsObjectClient {
 		cClient = newGCSObjectClient(GCSConfig{
 			BucketName: "chunks",
-		}, schemaConfig, f.gcssrv.Client())
+		}, f.gcssrv.Client())
 	} else {
 		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index c07f25ac94c86..8c82aadf9956f 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -1,23 +1,25 @@
 package gcp
 
 import (
+	"bytes"
 	"context"
 	"flag"
+	"io"
 	"io/ioutil"
 	"time"
 
 	"cloud.google.com/go/storage"
 	"github.com/pkg/errors"
+	"google.golang.org/api/iterator"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
-type gcsObjectClient struct {
-	cfg       GCSConfig
-	schemaCfg chunk.SchemaConfig
-	client    *storage.Client
-	bucket    *storage.BucketHandle
+type GCSObjectClient struct {
+	cfg    GCSConfig
+	client *storage.Client
+	bucket *storage.BucketHandle
 }
 
 // GCSConfig is config for the GCS Chunk Client.
@@ -29,13 +31,18 @@ type GCSConfig struct {
 
 // RegisterFlags registers flags.
 func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.BucketName, "gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
-	f.IntVar(&cfg.ChunkBufferSize, "gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
-	f.DurationVar(&cfg.RequestTimeout, "gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
+	cfg.RegisterFlagsWithPrefix("", f)
+}
+
+// RegisterFlagsWithPrefix registers flags with prefix.
+func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&cfg.BucketName, prefix+"gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
+	f.IntVar(&cfg.ChunkBufferSize, prefix+"gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
+	f.DurationVar(&cfg.RequestTimeout, prefix+"gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
 }
 
 // NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
-func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, error) {
 	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
 		return nil, err
@@ -45,64 +52,49 @@ func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, schemaCfg chunk.Sche
 	if err != nil {
 		return nil, err
 	}
-	return newGCSObjectClient(cfg, schemaCfg, client), nil
+	return newGCSObjectClient(cfg, client), nil
 }
 
-func newGCSObjectClient(cfg GCSConfig, schemaCfg chunk.SchemaConfig, client *storage.Client) chunk.ObjectClient {
+func newGCSObjectClient(cfg GCSConfig, client *storage.Client) *GCSObjectClient {
 	bucket := client.Bucket(cfg.BucketName)
-	return &gcsObjectClient{
-		cfg:       cfg,
-		schemaCfg: schemaCfg,
-		client:    client,
-		bucket:    bucket,
+	return &GCSObjectClient{
+		cfg:    cfg,
+		client: client,
+		bucket: bucket,
 	}
 }
 
-func (s *gcsObjectClient) Stop() {
+func (s *GCSObjectClient) Stop() {
 	s.client.Close()
 }
 
-func (s *gcsObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (s *GCSObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for _, chunk := range chunks {
 		buf, err := chunk.Encoded()
 		if err != nil {
 			return err
 		}
-		writer := s.bucket.Object(chunk.ExternalKey()).NewWriter(ctx)
-		// Default GCSChunkSize is 8M and for each call, 8M is allocated xD
-		// By setting it to 0, we just upload the object in a single a request
-		// which should work for our chunk sizes.
-		writer.ChunkSize = s.cfg.ChunkBufferSize
 
-		if _, err := writer.Write(buf); err != nil {
-			return err
-		}
-		if err := writer.Close(); err != nil {
+		if err := s.PutObject(ctx, chunk.ExternalKey(), bytes.NewReader(buf)); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-func (s *gcsObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+func (s *GCSObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
 	return util.GetParallelChunks(ctx, input, s.getChunk)
 }
 
-func (s *gcsObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
-	if s.cfg.RequestTimeout > 0 {
-		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
-		var cancel context.CancelFunc
-		ctx, cancel = context.WithTimeout(ctx, s.cfg.RequestTimeout)
-		defer cancel()
-	}
-
-	reader, err := s.bucket.Object(input.ExternalKey()).NewReader(ctx)
+func (s *GCSObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+	readCloser, err := s.GetObject(ctx, input.ExternalKey())
 	if err != nil {
 		return chunk.Chunk{}, errors.WithStack(err)
 	}
-	defer reader.Close()
 
-	buf, err := ioutil.ReadAll(reader)
+	defer readCloser.Close()
+
+	buf, err := ioutil.ReadAll(readCloser)
 	if err != nil {
 		return chunk.Chunk{}, errors.WithStack(err)
 	}
@@ -113,3 +105,67 @@ func (s *gcsObjectClient) getChunk(ctx context.Context, decodeContext *chunk.Dec
 
 	return input, nil
 }
+
+// Get object from the store
+func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	if s.cfg.RequestTimeout > 0 {
+		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
+		var cancel context.CancelFunc
+		ctx, cancel = context.WithTimeout(ctx, s.cfg.RequestTimeout)
+		defer cancel()
+	}
+
+	return s.bucket.Object(objectKey).NewReader(ctx)
+}
+
+// Put object into the store
+func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	writer := s.bucket.Object(objectKey).NewWriter(ctx)
+	// Default GCSChunkSize is 8M and for each call, 8M is allocated xD
+	// By setting it to 0, we just upload the object in a single a request
+	// which should work for our chunk sizes.
+	writer.ChunkSize = s.cfg.ChunkBufferSize
+
+	if _, err := io.Copy(writer, object); err != nil {
+		_ = writer.Close()
+		return err
+	}
+	if err := writer.Close(); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// List only objects from the store non-recursively
+func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+	var storageObjects []chunk.StorageObject
+
+	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: chunk.DirDelim})
+	for {
+		if ctx.Err() != nil {
+			return nil, ctx.Err()
+		}
+
+		attr, err := iter.Next()
+		if err != nil {
+			if err == iterator.Done {
+				break
+			}
+			return nil, err
+		}
+
+		// When doing query with Delimiter, Prefix is the only field set for entries which represent synthetic "directory entries".
+		// We do not want to consider those entries since we are doing only non-recursive listing of objects for now.
+		if attr.Name == "" {
+			continue
+		}
+
+		storageObjects = append(storageObjects, chunk.StorageObject{
+			Key:        attr.Name,
+			ModifiedAt: attr.Updated,
+		})
+	}
+
+	return storageObjects, nil
+}
diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 9ac30985dbab3..4bad3d2f9e594 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -3,8 +3,8 @@ package local
 import (
 	"bytes"
 	"context"
+	"errors"
 	"flag"
-	"fmt"
 	"os"
 	"path"
 	"sync"
@@ -18,11 +18,19 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
-var bucketName = []byte("index")
+var (
+	bucketName          = []byte("index")
+	ErrUnexistentBoltDB = errors.New("boltdb file does not exist")
+)
 
 const (
 	separator      = "\000"
 	dbReloadPeriod = 10 * time.Minute
+
+	DBOperationRead = iota
+	DBOperationWrite
+
+	openBoltDBFileTimeout = 5 * time.Second
 )
 
 // BoltDBConfig for a BoltDB index client.
@@ -35,7 +43,7 @@ func (cfg *BoltDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Directory, "boltdb.dir", "", "Location of BoltDB index files.")
 }
 
-type boltIndexClient struct {
+type BoltIndexClient struct {
 	cfg BoltDBConfig
 
 	dbsMtx sync.RWMutex
@@ -45,12 +53,12 @@ type boltIndexClient struct {
 }
 
 // NewBoltDBIndexClient creates a new IndexClient that used BoltDB.
-func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
-	if err := ensureDirectory(cfg.Directory); err != nil {
+func NewBoltDBIndexClient(cfg BoltDBConfig) (*BoltIndexClient, error) {
+	if err := chunk_util.EnsureDirectory(cfg.Directory); err != nil {
 		return nil, err
 	}
 
-	indexClient := &boltIndexClient{
+	indexClient := &BoltIndexClient{
 		cfg:  cfg,
 		dbs:  map[string]*bbolt.DB{},
 		done: make(chan struct{}),
@@ -61,7 +69,7 @@ func NewBoltDBIndexClient(cfg BoltDBConfig) (chunk.IndexClient, error) {
 	return indexClient, nil
 }
 
-func (b *boltIndexClient) loop() {
+func (b *BoltIndexClient) loop() {
 	defer b.wait.Done()
 
 	ticker := time.NewTicker(dbReloadPeriod)
@@ -77,7 +85,7 @@ func (b *boltIndexClient) loop() {
 	}
 }
 
-func (b *boltIndexClient) reload() {
+func (b *BoltIndexClient) reload() {
 	b.dbsMtx.RLock()
 
 	removedDBs := []string{}
@@ -105,7 +113,7 @@ func (b *boltIndexClient) reload() {
 
 }
 
-func (b *boltIndexClient) Stop() {
+func (b *BoltIndexClient) Stop() {
 	close(b.done)
 
 	b.dbsMtx.Lock()
@@ -117,13 +125,15 @@ func (b *boltIndexClient) Stop() {
 	b.wait.Wait()
 }
 
-func (b *boltIndexClient) NewWriteBatch() chunk.WriteBatch {
+func (b *BoltIndexClient) NewWriteBatch() chunk.WriteBatch {
 	return &boltWriteBatch{
 		tables: map[string]map[string][]byte{},
 	}
 }
 
-func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
+// GetDB should always return a db for write operation unless an error occurs while doing so.
+// While for read operation it should throw ErrUnexistentBoltDB error if file does not exist for reading
+func (b *BoltIndexClient) GetDB(name string, operation int) (*bbolt.DB, error) {
 	b.dbsMtx.RLock()
 	db, ok := b.dbs[name]
 	b.dbsMtx.RUnlock()
@@ -131,6 +141,16 @@ func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
 		return db, nil
 	}
 
+	// we do not want to create a new db for reading if it does not exist
+	if operation == DBOperationRead {
+		if _, err := os.Stat(path.Join(b.cfg.Directory, name)); err != nil {
+			if os.IsNotExist(err) {
+				return nil, ErrUnexistentBoltDB
+			}
+			return nil, err
+		}
+	}
+
 	b.dbsMtx.Lock()
 	defer b.dbsMtx.Unlock()
 	db, ok = b.dbs[name]
@@ -140,7 +160,7 @@ func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
 
 	// Open the database.
 	// Set Timeout to avoid obtaining file lock wait indefinitely.
-	db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, &bbolt.Options{Timeout: 5 * time.Second})
+	db, err := bbolt.Open(path.Join(b.cfg.Directory, name), 0666, &bbolt.Options{Timeout: openBoltDBFileTimeout})
 	if err != nil {
 		return nil, err
 	}
@@ -149,9 +169,9 @@ func (b *boltIndexClient) getDB(name string) (*bbolt.DB, error) {
 	return db, nil
 }
 
-func (b *boltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+func (b *BoltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
 	for table, kvps := range batch.(*boltWriteBatch).tables {
-		db, err := b.getDB(table)
+		db, err := b.GetDB(table, DBOperationWrite)
 		if err != nil {
 			return err
 		}
@@ -176,16 +196,24 @@ func (b *boltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch
 	return nil
 }
 
-func (b *boltIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+func (b *BoltIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	return chunk_util.DoParallelQueries(ctx, b.query, queries, callback)
 }
 
-func (b *boltIndexClient) query(ctx context.Context, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
-	db, err := b.getDB(query.TableName)
+func (b *BoltIndexClient) query(ctx context.Context, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
+	db, err := b.GetDB(query.TableName, DBOperationRead)
 	if err != nil {
+		if err == ErrUnexistentBoltDB {
+			return nil
+		}
+
 		return err
 	}
 
+	return b.QueryDB(ctx, db, query, callback)
+}
+
+func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
 	var start []byte
 	if len(query.RangeValuePrefix) > 0 {
 		start = []byte(query.HashValue + separator + string(query.RangeValuePrefix))
@@ -276,12 +304,8 @@ func (b *boltReadBatchIterator) Value() []byte {
 	return b.value
 }
 
-func ensureDirectory(dir string) error {
-	info, err := os.Stat(dir)
-	if os.IsNotExist(err) {
-		return os.MkdirAll(dir, 0777)
-	} else if err == nil && !info.IsDir() {
-		return fmt.Errorf("not a directory: %s", dir)
-	}
-	return err
+// Open the database.
+// Set Timeout to avoid obtaining file lock wait indefinitely.
+func OpenBoltdbFile(path string) (*bbolt.DB, error) {
+	return bbolt.Open(path, 0666, &bbolt.Options{Timeout: 5 * time.Second})
 }
diff --git a/local/boltdb_index_client_test.go b/local/boltdb_index_client_test.go
new file mode 100644
index 0000000000000..5bd5b776b3478
--- /dev/null
+++ b/local/boltdb_index_client_test.go
@@ -0,0 +1,116 @@
+package local
+
+import (
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+	"go.etcd.io/bbolt"
+)
+
+var (
+	testKey   = []byte("test-key")
+	testValue = []byte("test-value")
+)
+
+func setupDB(t *testing.T, boltdbIndexClient *BoltIndexClient, dbname string) {
+	db, err := boltdbIndexClient.GetDB(dbname, DBOperationWrite)
+	require.NoError(t, err)
+
+	err = db.Update(func(tx *bbolt.Tx) error {
+		b, err := tx.CreateBucketIfNotExists(bucketName)
+		if err != nil {
+			return err
+		}
+
+		return b.Put(testKey, testValue)
+	})
+	require.NoError(t, err)
+}
+
+func TestBoltDBReload(t *testing.T) {
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	require.NoError(t, err)
+
+	defer require.NoError(t, os.RemoveAll(dirname))
+
+	boltdbIndexClient, err := NewBoltDBIndexClient(BoltDBConfig{
+		Directory: dirname,
+	})
+	require.NoError(t, err)
+
+	defer boltdbIndexClient.Stop()
+
+	testDb1 := "test1"
+	testDb2 := "test2"
+
+	setupDB(t, boltdbIndexClient, testDb1)
+	setupDB(t, boltdbIndexClient, testDb2)
+
+	boltdbIndexClient.reload()
+	require.Equal(t, 2, len(boltdbIndexClient.dbs), "There should be 2 boltdbs open")
+
+	require.NoError(t, os.Remove(filepath.Join(dirname, testDb1)))
+
+	droppedDb, err := boltdbIndexClient.GetDB(testDb1, DBOperationRead)
+	require.NoError(t, err)
+
+	valueFromDb := []byte{}
+	_ = droppedDb.View(func(tx *bbolt.Tx) error {
+		b := tx.Bucket(bucketName)
+		valueFromDb = b.Get(testKey)
+		return nil
+	})
+	require.Equal(t, testValue, valueFromDb, "should match value from db")
+
+	boltdbIndexClient.reload()
+
+	require.Equal(t, 1, len(boltdbIndexClient.dbs), "There should be 1 boltdb open")
+
+	_, err = boltdbIndexClient.GetDB(testDb1, DBOperationRead)
+	require.Equal(t, ErrUnexistentBoltDB, err)
+}
+
+func TestBoltDB_GetDB(t *testing.T) {
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	require.NoError(t, err)
+
+	defer require.NoError(t, os.RemoveAll(dirname))
+
+	boltdbIndexClient, err := NewBoltDBIndexClient(BoltDBConfig{
+		Directory: dirname,
+	})
+	require.NoError(t, err)
+
+	// setup a db to already exist
+	testDb1 := "test1"
+	setupDB(t, boltdbIndexClient, testDb1)
+
+	// check whether an existing db can be fetched for reading
+	_, err = boltdbIndexClient.GetDB(testDb1, DBOperationRead)
+	require.NoError(t, err)
+
+	// check whether read operation throws ErrUnexistentBoltDB error for db which does not exists
+	unexistentDb := "unexistent-db"
+
+	_, err = boltdbIndexClient.GetDB(unexistentDb, DBOperationRead)
+	require.Equal(t, ErrUnexistentBoltDB, err)
+
+	// check whether write operation sets up a new db for writing
+	db, err := boltdbIndexClient.GetDB(unexistentDb, DBOperationWrite)
+	require.NoError(t, err)
+	require.NotEqual(t, nil, db)
+
+	// recreate index client to check whether we can read already created test1 db without writing first
+	boltdbIndexClient.Stop()
+	boltdbIndexClient, err = NewBoltDBIndexClient(BoltDBConfig{
+		Directory: dirname,
+	})
+	require.NoError(t, err)
+	defer boltdbIndexClient.Stop()
+
+	_, err = boltdbIndexClient.GetDB(testDb1, DBOperationRead)
+	require.NoError(t, err)
+}
diff --git a/local/boltdb_reload_test.go b/local/boltdb_reload_test.go
deleted file mode 100644
index 23f27c288b2a7..0000000000000
--- a/local/boltdb_reload_test.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package local
-
-import (
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/stretchr/testify/require"
-	"go.etcd.io/bbolt"
-)
-
-var (
-	testKey   = []byte("test-key")
-	testValue = []byte("test-value")
-)
-
-func setupDb(t *testing.T, boltdbIndexClient *boltIndexClient, dbname string) {
-	db, err := boltdbIndexClient.getDB(dbname)
-	require.NoError(t, err)
-
-	err = db.Update(func(tx *bbolt.Tx) error {
-		b, err := tx.CreateBucketIfNotExists(bucketName)
-		if err != nil {
-			return err
-		}
-
-		return b.Put(testKey, testValue)
-	})
-	require.NoError(t, err)
-}
-
-func TestBoltDBReload(t *testing.T) {
-	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
-	if err != nil {
-		return
-	}
-
-	indexClient, err := NewBoltDBIndexClient(BoltDBConfig{
-		Directory: dirname,
-	})
-
-	testDb1 := "test1"
-	testDb2 := "test2"
-
-	boltdbIndexClient := indexClient.(*boltIndexClient)
-	setupDb(t, boltdbIndexClient, testDb1)
-	setupDb(t, boltdbIndexClient, testDb2)
-
-	boltdbIndexClient.reload()
-	require.Equal(t, 2, len(boltdbIndexClient.dbs), "There should be 2 boltdbs open")
-
-	require.NoError(t, os.Remove(filepath.Join(dirname, testDb1)))
-
-	droppedDb, err := boltdbIndexClient.getDB(testDb1)
-	require.NoError(t, err)
-
-	valueFromDb := []byte{}
-	_ = droppedDb.View(func(tx *bbolt.Tx) error {
-		b := tx.Bucket(bucketName)
-		valueFromDb = b.Get(testKey)
-		return nil
-	})
-	require.Equal(t, testValue, valueFromDb, "should match value from db")
-
-	boltdbIndexClient.reload()
-
-	require.Equal(t, 1, len(boltdbIndexClient.dbs), "There should be 1 boltdb open")
-
-	boltdbIndexClient.Stop()
-	require.NoError(t, os.RemoveAll(dirname))
-}
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 3d58753a9ba3c..9a4bd4051467c 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -1,9 +1,11 @@
 package local
 
 import (
+	"bytes"
 	"context"
 	"encoding/base64"
 	"flag"
+	"io"
 	"io/ioutil"
 	"os"
 	"path"
@@ -24,7 +26,12 @@ type FSConfig struct {
 
 // RegisterFlags registers flags.
 func (cfg *FSConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.Directory, "local.chunk-directory", "", "Directory to store chunks in.")
+	cfg.RegisterFlagsWithPrefix("", f)
+}
+
+// RegisterFlags registers flags with prefix.
+func (cfg *FSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&cfg.Directory, prefix+"local.chunk-directory", "", "Directory to store chunks in.")
 }
 
 // FSObjectClient holds config for filesystem as object store
@@ -34,7 +41,7 @@ type FSObjectClient struct {
 
 // NewFSObjectClient makes a chunk.ObjectClient which stores chunks as files in the local filesystem.
 func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
-	if err := ensureDirectory(cfg.Directory); err != nil {
+	if err := util.EnsureDirectory(cfg.Directory); err != nil {
 		return nil, err
 	}
 
@@ -47,7 +54,7 @@ func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 func (FSObjectClient) Stop() {}
 
 // PutChunks implements ObjectClient
-func (f *FSObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) error {
+func (f *FSObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
 		if err != nil {
@@ -55,7 +62,7 @@ func (f *FSObjectClient) PutChunks(_ context.Context, chunks []chunk.Chunk) erro
 		}
 
 		filename := base64.StdEncoding.EncodeToString([]byte(chunks[i].ExternalKey()))
-		if err := ioutil.WriteFile(path.Join(f.cfg.Directory, filename), buf, 0644); err != nil {
+		if err := f.PutObject(ctx, filename, bytes.NewReader(buf)); err != nil {
 			return err
 		}
 	}
@@ -67,11 +74,19 @@ func (f *FSObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([
 	return util.GetParallelChunks(ctx, chunks, f.getChunk)
 }
 
-func (f *FSObjectClient) getChunk(_ context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+func (f *FSObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
 	filename := base64.StdEncoding.EncodeToString([]byte(c.ExternalKey()))
-	buf, err := ioutil.ReadFile(path.Join(f.cfg.Directory, filename))
+
+	readCloser, err := f.GetObject(ctx, filename)
 	if err != nil {
-		return c, err
+		return chunk.Chunk{}, err
+	}
+
+	defer readCloser.Close()
+
+	buf, err := ioutil.ReadAll(readCloser)
+	if err != nil {
+		return chunk.Chunk{}, err
 	}
 
 	if err := c.Decode(decodeContext, buf); err != nil {
@@ -81,6 +96,61 @@ func (f *FSObjectClient) getChunk(_ context.Context, decodeContext *chunk.Decode
 	return c, nil
 }
 
+// Get object from the store
+func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	return os.Open(path.Join(f.cfg.Directory, objectKey))
+}
+
+// Put object into the store
+func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	fullPath := path.Join(f.cfg.Directory, objectKey)
+	err := util.EnsureDirectory(path.Dir(fullPath))
+	if err != nil {
+		return err
+	}
+
+	fl, err := os.OpenFile(fullPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+	if err != nil {
+		return err
+	}
+
+	defer fl.Close()
+
+	_, err = io.Copy(fl, object)
+	return err
+}
+
+// List only objects from the store non-recursively
+func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+	var storageObjects []chunk.StorageObject
+	folderPath := filepath.Join(f.cfg.Directory, prefix)
+
+	_, err := os.Stat(folderPath)
+	if err != nil {
+		if os.IsNotExist(err) {
+			return storageObjects, nil
+		}
+		return nil, err
+	}
+
+	filesInfo, err := ioutil.ReadDir(folderPath)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, fileInfo := range filesInfo {
+		if fileInfo.IsDir() {
+			continue
+		}
+		storageObjects = append(storageObjects, chunk.StorageObject{
+			Key:        filepath.Join(prefix, fileInfo.Name()),
+			ModifiedAt: fileInfo.ModTime(),
+		})
+	}
+
+	return storageObjects, nil
+}
+
 // DeleteChunksBefore implements BucketClient
 func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
 	return filepath.Walk(f.cfg.Directory, func(path string, info os.FileInfo, err error) error {
diff --git a/storage/factory.go b/storage/factory.go
index 6613aee9ce754..3e04d4289401a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -25,6 +25,19 @@ const (
 	StorageEngineTSDB   = "tsdb"
 )
 
+type IndexClientFactoryFunc func() (chunk.IndexClient, error)
+
+var customIndexClients = map[string]IndexClientFactoryFunc{}
+
+func RegisterIndexClient(name string, factory IndexClientFactoryFunc) {
+	customIndexClients[name] = factory
+}
+
+// useful for cleaning up state after tests
+func unregisterAllCustomIndexClients() {
+	customIndexClients = map[string]IndexClientFactoryFunc{}
+}
+
 // StoreLimits helps get Limits specific to Queries for Stores
 type StoreLimits interface {
 	CardinalityLimit(userID string) int
@@ -116,6 +129,10 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 
 // NewIndexClient makes a new index client of the desired type.
 func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
+	if factory, ok := customIndexClients[name]; ok {
+		return factory()
+	}
+
 	switch name {
 	case "inmemory":
 		store := chunk.NewMockStorage()
@@ -152,7 +169,7 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		store := chunk.NewMockStorage()
 		return store, nil
 	case "aws", "s3":
-		return aws.NewS3ObjectClient(cfg.AWSStorageConfig, schemaCfg)
+		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config)
 	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
@@ -163,13 +180,13 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		}
 		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "azure":
-		return azure.NewBlobStorage(&cfg.AzureStorageConfig), nil
+		return azure.NewBlobStorage(&cfg.AzureStorageConfig)
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
-		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, schemaCfg)
+		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig)
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 2d680250cba84..fd635cf270d4d 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -1,12 +1,15 @@
 package storage
 
 import (
+	"io/ioutil"
+	"os"
 	"testing"
 
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -38,3 +41,79 @@ func TestFactoryStop(t *testing.T) {
 
 	store.Stop()
 }
+
+type customBoltDBIndexClient struct {
+	*local.BoltIndexClient
+}
+
+func newBoltDBCustomIndexClient(cfg local.BoltDBConfig) (chunk.IndexClient, error) {
+	boltdbClient, err := local.NewBoltDBIndexClient(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &customBoltDBIndexClient{boltdbClient}, nil
+}
+
+type anotherIndexClient struct {
+	*local.BoltIndexClient
+}
+
+func newAnotherIndexClient(cfg local.BoltDBConfig) (chunk.IndexClient, error) {
+	boltdbClient, err := local.NewBoltDBIndexClient(cfg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &anotherIndexClient{boltdbClient}, nil
+}
+
+func TestCustomIndexClient(t *testing.T) {
+	cfg := Config{}
+	schemaCfg := chunk.SchemaConfig{}
+
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	if err != nil {
+		return
+	}
+	cfg.BoltDBConfig.Directory = dirname
+
+	// register custom index clients, overwriting boltdb client and a new one with different name
+	RegisterIndexClient("boltdb", func() (client chunk.IndexClient, e error) {
+		return newBoltDBCustomIndexClient(cfg.BoltDBConfig)
+	})
+
+	RegisterIndexClient("another-index-client", func() (client chunk.IndexClient, e error) {
+		return newAnotherIndexClient(cfg.BoltDBConfig)
+	})
+
+	// try creating a new index client for boltdb
+	indexClient, err := NewIndexClient("boltdb", cfg, schemaCfg)
+	require.NoError(t, err)
+
+	// check whether we got custom boltdb index client type registered above
+	_, ok := indexClient.(*customBoltDBIndexClient)
+	require.Equal(t, true, ok)
+
+	// check whether non-existent index client returns an error
+	_, err = NewIndexClient("boltdb1", cfg, schemaCfg)
+	require.Error(t, err)
+
+	// try creating a new index client for another index client
+	indexClient, err = NewIndexClient("another-index-client", cfg, schemaCfg)
+	require.NoError(t, err)
+
+	// check whether we got another index client type registered above
+	_, ok = indexClient.(*anotherIndexClient)
+	require.Equal(t, true, ok)
+
+	unregisterAllCustomIndexClients()
+
+	// try creating a new index client for boltdb
+	indexClient, err = NewIndexClient("boltdb", cfg, schemaCfg)
+	require.NoError(t, err)
+
+	// check whether we got original boltdb index client
+	_, ok = indexClient.(*local.BoltIndexClient)
+	require.Equal(t, true, ok)
+}
diff --git a/storage_client.go b/storage_client.go
index 802173f88a751..71ec3ab7a0f92 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -1,6 +1,12 @@
 package chunk
 
-import "context"
+import (
+	"context"
+	"time"
+)
+
+// DirDelim is the delimiter used to model a directory structure in an object store.
+const DirDelim = "/"
 
 // IndexClient is a client for the storage of the index (e.g. DynamoDB or Bigtable).
 type IndexClient interface {
@@ -43,3 +49,8 @@ type ReadBatchIterator interface {
 	RangeValue() []byte
 	Value() []byte
 }
+
+type StorageObject struct {
+	Key        string
+	ModifiedAt time.Time
+}
diff --git a/util/util.go b/util/util.go
index 8e77ce7140ba4..17ce7830951a1 100644
--- a/util/util.go
+++ b/util/util.go
@@ -3,6 +3,8 @@ package util
 import (
 	"bytes"
 	"context"
+	"fmt"
+	"os"
 
 	ot "github.com/opentracing/opentracing-go"
 
@@ -113,3 +115,14 @@ func QueryFilter(callback Callback) Callback {
 		return callback(query, &filteringBatch{query, batch})
 	}
 }
+
+// EnsureDirectory makes sure directory is there, if not creates it if not
+func EnsureDirectory(dir string) error {
+	info, err := os.Stat(dir)
+	if os.IsNotExist(err) {
+		return os.MkdirAll(dir, 0777)
+	} else if err == nil && !info.IsDir() {
+		return fmt.Errorf("not a directory: %s", dir)
+	}
+	return err
+}

From b17042fc83cc5bba97dbffdcf7ed74328b379027 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 10 Feb 2020 14:11:11 +0100
Subject: [PATCH 446/660] Remove usage of "golang.org/x/net/context" (#2099)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 14f4fae8d83d4..0131fe0030eca 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -1,6 +1,7 @@
 package chunk
 
 import (
+	"context"
 	"fmt"
 	"math/rand"
 	"reflect"
@@ -12,7 +13,6 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"golang.org/x/net/context"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"

From a633c1ca0d880e24824225c8846a58e55919e1aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 10 Feb 2020 19:29:17 +0100
Subject: [PATCH 447/660] goimports -local github.com/cortexproject/cortex -w
 pkg/ cmd/ tools/ (#2106)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Verify that files have correct order of imports.

goimports -local github.com/cortexproject/cortex does the trick.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* goimports -local github.com/cortexproject/cortex -w pkg/ cmd/ tools/

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Rerun make protos.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed import
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Rerun make protos for ruler
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 aws/aws_autoscaling.go                  | 3 ++-
 aws/dynamodb_storage_client.go          | 5 +++--
 aws/dynamodb_table_client.go            | 3 ++-
 aws/mock.go                             | 3 ++-
 aws/s3_storage_client.go                | 5 +++--
 cache/cache_test.go                     | 7 ++++---
 cache/memcached.go                      | 3 ++-
 cache/memcached_client.go               | 3 ++-
 cache/memcached_client_selector_test.go | 3 ++-
 cache/memcached_test.go                 | 3 ++-
 cache/redis_cache.go                    | 3 ++-
 cache/redis_cache_test.go               | 3 ++-
 cache/snappy.go                         | 3 ++-
 cache/tiered_test.go                    | 3 ++-
 chunk.go                                | 5 +++--
 chunk_store.go                          | 3 ++-
 chunk_store_test.go                     | 5 +++--
 chunk_test.go                           | 7 ++++---
 fixtures.go                             | 3 ++-
 gcp/bigtable_index_client.go            | 3 ++-
 gcp/table_client.go                     | 3 ++-
 inmemory_storage_client.go              | 3 ++-
 storage/caching_index_client_test.go    | 5 +++--
 storage/factory.go                      | 5 +++--
 storage/object_client_test.go           | 3 ++-
 storage/utils_test.go                   | 3 ++-
 table_manager.go                        | 3 ++-
 testutils/testutils.go                  | 5 +++--
 28 files changed, 67 insertions(+), 39 deletions(-)

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
index 42a1a637ce4b8..f1a7bf7806023 100644
--- a/aws/aws_autoscaling.go
+++ b/aws/aws_autoscaling.go
@@ -10,9 +10,10 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
+	"github.com/weaveworks/common/instrument"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/weaveworks/common/instrument"
 )
 
 const (
diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 4be3334a1161c..fe2c904e1d52f 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -21,13 +21,14 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
 
+	awscommon "github.com/weaveworks/common/aws"
+	"github.com/weaveworks/common/instrument"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
-	awscommon "github.com/weaveworks/common/aws"
-	"github.com/weaveworks/common/instrument"
 )
 
 const (
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 58c000df83254..1e741ee82696e 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -12,9 +12,10 @@ import (
 	"github.com/pkg/errors"
 	"golang.org/x/time/rate"
 
+	"github.com/weaveworks/common/instrument"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/weaveworks/common/instrument"
 )
 
 // Pluggable auto-scaler implementation
diff --git a/aws/mock.go b/aws/mock.go
index 149cad522308c..5b33748a0a5d1 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -16,8 +16,9 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const arnPrefix = "arn:"
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 3283d6e99cdbc..c3834107ad00d 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -16,11 +16,12 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
 
+	awscommon "github.com/weaveworks/common/aws"
+	"github.com/weaveworks/common/instrument"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
-	awscommon "github.com/weaveworks/common/aws"
-	"github.com/weaveworks/common/instrument"
 )
 
 var (
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 37664adad06af..12035d7361fac 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -8,12 +8,13 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 )
 
 const userID = "1"
diff --git a/cache/memcached.go b/cache/memcached.go
index a2f74e8c4af33..c8a4868f76eb3 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -9,13 +9,14 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	instr "github.com/weaveworks/common/instrument"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 var (
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 73ece246cf16b..2712a584bc62e 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -9,8 +9,9 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // MemcachedClient interface exists for mocking memcacheClient.
diff --git a/cache/memcached_client_selector_test.go b/cache/memcached_client_selector_test.go
index f7134ce99845e..69305670b6791 100644
--- a/cache/memcached_client_selector_test.go
+++ b/cache/memcached_client_selector_test.go
@@ -5,9 +5,10 @@ import (
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/facette/natsort"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestNatSort(t *testing.T) {
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index 3a6c8982d158f..f6f2b83725f5c 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -7,8 +7,9 @@ import (
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestMemcached(t *testing.T) {
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 431b73ff0552c..ff21399b0d3d4 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -5,9 +5,10 @@ import (
 	"flag"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	"github.com/gomodule/redigo/redis"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // RedisCache type caches chunks in redis
diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
index 20410a8d9177d..7153a67f78ab9 100644
--- a/cache/redis_cache_test.go
+++ b/cache/redis_cache_test.go
@@ -5,10 +5,11 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/gomodule/redigo/redis"
 	"github.com/rafaeljusto/redigomock"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestRedisCache(t *testing.T) {
diff --git a/cache/snappy.go b/cache/snappy.go
index b03161119cd44..2fc2308f4844f 100644
--- a/cache/snappy.go
+++ b/cache/snappy.go
@@ -3,9 +3,10 @@ package cache
 import (
 	"context"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
 	"github.com/golang/snappy"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 type snappyCache struct {
diff --git a/cache/tiered_test.go b/cache/tiered_test.go
index a77457471692a..c4f85eb63aed1 100644
--- a/cache/tiered_test.go
+++ b/cache/tiered_test.go
@@ -4,8 +4,9 @@ import (
 	"context"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestTieredSimple(t *testing.T) {
diff --git a/chunk.go b/chunk.go
index 10642d9124eab..295da5d84ec18 100644
--- a/chunk.go
+++ b/chunk.go
@@ -9,14 +9,15 @@ import (
 	"strings"
 	"sync"
 
-	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 	"github.com/golang/snappy"
 	jsoniter "github.com/json-iterator/go"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
+	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
+
 	errs "github.com/weaveworks/common/errors"
 )
 
diff --git a/chunk_store.go b/chunk_store.go
index f7a80134f10e2..a48b842ce63d7 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -17,13 +17,14 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 
+	"github.com/weaveworks/common/httpgrpc"
+
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
-	"github.com/weaveworks/common/httpgrpc"
 )
 
 var (
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 0131fe0030eca..b6b91de93aa58 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -14,13 +14,14 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/weaveworks/common/test"
+	"github.com/weaveworks/common/user"
+
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
-	"github.com/weaveworks/common/test"
-	"github.com/weaveworks/common/user"
 )
 
 type configFactory func() StoreConfig
diff --git a/chunk_test.go b/chunk_test.go
index dfdde147cdff7..46e3f97ba053a 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -7,13 +7,14 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/ingester/client"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const userID = "userID"
diff --git a/fixtures.go b/fixtures.go
index 2e47dbf23a732..807d738a38be8 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -6,9 +6,10 @@ import (
 	"context"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // BenchmarkLabels is a real example from Kubernetes' embedded cAdvisor metrics, lightly obfuscated
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index ff44f128f6851..ec305577b8e8c 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -14,11 +14,12 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 
+	"github.com/pkg/errors"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/grpcclient"
-	"github.com/pkg/errors"
 )
 
 const (
diff --git a/gcp/table_client.go b/gcp/table_client.go
index dd315bdc91b3b..725021f5548c7 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -9,8 +9,9 @@ import (
 	"cloud.google.com/go/bigtable"
 	"google.golang.org/grpc/status"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 type tableClient struct {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index fc5b33e9bb68f..9456d13649a00 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -7,8 +7,9 @@ import (
 	"sort"
 	"sync"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log/level"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 // MockStorage is a fake in-memory StorageClient.
diff --git a/storage/caching_index_client_test.go b/storage/caching_index_client_test.go
index 6f92690d7dc68..3cbee18e3a308 100644
--- a/storage/caching_index_client_test.go
+++ b/storage/caching_index_client_test.go
@@ -5,11 +5,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 var ctx = user.InjectOrgID(context.Background(), "1")
diff --git a/storage/factory.go b/storage/factory.go
index 3e04d4289401a..fb41d803bc882 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -7,6 +7,9 @@ import (
 	"strings"
 	"time"
 
+	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/aws"
 	"github.com/cortexproject/cortex/pkg/chunk/azure"
@@ -15,8 +18,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/go-kit/kit/log/level"
-	"github.com/pkg/errors"
 )
 
 // Supported storage engines
diff --git a/storage/object_client_test.go b/storage/object_client_test.go
index f6515cc28f3f7..367019849366b 100644
--- a/storage/object_client_test.go
+++ b/storage/object_client_test.go
@@ -10,9 +10,10 @@ import (
 
 	"github.com/stretchr/testify/require"
 
+	"github.com/prometheus/common/model"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/prometheus/common/model"
 )
 
 func TestChunksBasic(t *testing.T) {
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 421d3ba54c3c2..8838929abecb2 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -3,13 +3,14 @@ package storage
 import (
 	"testing"
 
+	"github.com/stretchr/testify/require"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/aws"
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/stretchr/testify/require"
 )
 
 const (
diff --git a/table_manager.go b/table_manager.go
index 6001cb7f12bfb..c33fa5a87c40a 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -15,9 +15,10 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
diff --git a/testutils/testutils.go b/testutils/testutils.go
index d8365fa0d1d7a..f0a3e67a319cc 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -5,11 +5,12 @@ import (
 	"strconv"
 	"time"
 
-	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
+	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
+
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 )

From 20395b470a2f998f3425eeba853414c2f93666e6 Mon Sep 17 00:00:00 2001
From: Ed Welch <ed@oqqer.com>
Date: Tue, 11 Feb 2020 02:43:08 -0500
Subject: [PATCH 448/660] In practice bigtable latency is often more than 1 s,
 adding more buckets to better display this. Also updated the dynamodb and
 cassandra clients to have the same buckets for consistency. (#2091)

Signed-off-by: Edward Welch <edward.welch@grafana.com>
---
 aws/dynamodb_storage_client.go | 6 +++---
 cassandra/instrumentation.go   | 2 +-
 gcp/instrumentation.go         | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index fe2c904e1d52f..011da16d3e7bd 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -53,9 +53,9 @@ var (
 		Name:      "dynamo_request_duration_seconds",
 		Help:      "Time spent doing DynamoDB requests.",
 
-		// DynamoDB latency seems to range from a few ms to a few sec and is
-		// important.  So use 8 buckets from 128us to 2s.
-		Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
+		// DynamoDB latency seems to range from a few ms to a several seconds and is
+		// important.  So use 9 buckets from 1ms to just over 1 minute (65s).
+		Buckets: prometheus.ExponentialBuckets(0.001, 4, 9),
 	}, []string{"operation", "status_code"}))
 	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "cortex",
diff --git a/cassandra/instrumentation.go b/cassandra/instrumentation.go
index b24d222e4d458..045b51ef6dd0b 100644
--- a/cassandra/instrumentation.go
+++ b/cassandra/instrumentation.go
@@ -12,7 +12,7 @@ var requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 	Namespace: "cortex",
 	Name:      "cassandra_request_duration_seconds",
 	Help:      "Time spent doing Cassandra requests.",
-	Buckets:   prometheus.ExponentialBuckets(0.001, 4, 6),
+	Buckets:   prometheus.ExponentialBuckets(0.001, 4, 9),
 }, []string{"operation", "status_code"})
 
 func init() {
diff --git a/gcp/instrumentation.go b/gcp/instrumentation.go
index 1347ee69cef78..44803e36bd5ee 100644
--- a/gcp/instrumentation.go
+++ b/gcp/instrumentation.go
@@ -23,9 +23,9 @@ var (
 		Name:      "bigtable_request_duration_seconds",
 		Help:      "Time spent doing Bigtable requests.",
 
-		// Bigtable latency seems to range from a few ms to a few hundred ms and is
-		// important.  So use 6 buckets from 1ms to 1s.
-		Buckets: prometheus.ExponentialBuckets(0.001, 4, 6),
+		// Bigtable latency seems to range from a few ms to a several seconds and is
+		// important.  So use 9 buckets from 1ms to just over 1 minute (65s).
+		Buckets: prometheus.ExponentialBuckets(0.001, 4, 9),
 	}, []string{"operation", "status_code"})
 
 	gcsRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{

From 187ad9748e459616edfa678b4dc9ba1bede47e2b Mon Sep 17 00:00:00 2001
From: Wei He <weihe924stephen@gmail.com>
Date: Tue, 11 Feb 2020 19:22:39 +0900
Subject: [PATCH 449/660] don't exec create keyspace if it exists (#2032)

* don't exec create keyspace if it exists

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>
---
 cassandra/storage_client.go | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 9420a634a169d..e43efea6a5065 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -62,10 +62,6 @@ func (cfg *Config) session() (*gocql.Session, error) {
 		return nil, errors.WithStack(err)
 	}
 
-	if err := cfg.createKeyspace(); err != nil {
-		return nil, errors.WithStack(err)
-	}
-
 	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
 	cluster.Port = cfg.Port
 	cluster.Keyspace = cfg.Keyspace
@@ -83,7 +79,21 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	}
 	cfg.setClusterConfig(cluster)
 
-	return cluster.CreateSession()
+	session, err := cluster.CreateSession()
+	if err == nil {
+		return session, nil
+	}
+	// ErrNoConnectionsStarted will be returned if keyspace don't exist or is invalid.
+	// ref. https://github.com/gocql/gocql/blob/07ace3bab0f84bb88477bab5d79ba1f7e1da0169/cassandra_test.go#L85-L97
+	if err != gocql.ErrNoConnectionsStarted {
+		return nil, errors.WithStack(err)
+	}
+	// keyspace not exist
+	if err := cfg.createKeyspace(); err != nil {
+		return nil, errors.WithStack(err)
+	}
+	session, err = cluster.CreateSession()
+	return session, errors.WithStack(err)
 }
 
 // apply config settings to a cassandra ClusterConfig

From d0e5ecb0ed3a587c448b98f3eafef8134191bcaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Tue, 11 Feb 2020 12:03:51 +0100
Subject: [PATCH 450/660] Remove extra newlines in imports and document Cortex
 formatting (#2111)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Removed extra empty lines in imports.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added section on Go files formatting

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 aws/aws_autoscaling.go         | 1 -
 aws/dynamodb_storage_client.go | 1 -
 aws/dynamodb_table_client.go   | 3 +--
 aws/s3_storage_client.go       | 1 -
 chunk.go                       | 3 +--
 chunk_store.go                 | 1 -
 chunk_store_test.go            | 1 -
 table_manager.go               | 1 -
 table_manager_test.go          | 1 -
 9 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
index f1a7bf7806023..2de3209329d8d 100644
--- a/aws/aws_autoscaling.go
+++ b/aws/aws_autoscaling.go
@@ -9,7 +9,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
-
 	"github.com/weaveworks/common/instrument"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 011da16d3e7bd..c6744f99491ed 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -20,7 +20,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/prometheus/client_golang/prometheus"
-
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 1e741ee82696e..8a41ae4f70590 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -10,9 +10,8 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
-	"golang.org/x/time/rate"
-
 	"github.com/weaveworks/common/instrument"
+	"golang.org/x/time/rate"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index c3834107ad00d..aa13b4467e83d 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -15,7 +15,6 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/prometheus/client_golang/prometheus"
-
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 
diff --git a/chunk.go b/chunk.go
index 295da5d84ec18..417b60a6ada25 100644
--- a/chunk.go
+++ b/chunk.go
@@ -14,11 +14,10 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+	errs "github.com/weaveworks/common/errors"
 
 	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
-
-	errs "github.com/weaveworks/common/errors"
 )
 
 // Errors that decode can return
diff --git a/chunk_store.go b/chunk_store.go
index a48b842ce63d7..cbe43d7240150 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -16,7 +16,6 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
-
 	"github.com/weaveworks/common/httpgrpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
diff --git a/chunk_store_test.go b/chunk_store_test.go
index b6b91de93aa58..41cd0944deb6b 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -13,7 +13,6 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-
 	"github.com/weaveworks/common/test"
 	"github.com/weaveworks/common/user"
 
diff --git a/table_manager.go b/table_manager.go
index c33fa5a87c40a..0bed0785bc111 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -14,7 +14,6 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
-
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
 
diff --git a/table_manager_test.go b/table_manager_test.go
index cecc7edfa2f91..7aab3d70c898b 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -7,7 +7,6 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
-
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 )

From b440866756149c034481a77c456efbee6721a353 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Thu, 13 Feb 2020 08:40:46 +0100
Subject: [PATCH 451/660] Stop table-manager loop early, if it is done (#2121)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Stop early if done.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Include last found value in error message, if any.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Don't ignore labels :) Wrong move.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 table_manager.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/table_manager.go b/table_manager.go
index 0bed0785bc111..1be5fd1c2035b 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -177,7 +177,11 @@ func (m *TableManager) loop() {
 	}
 
 	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
-	time.Sleep(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval))))
+	select {
+	case <-time.After(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval)))):
+	case <-m.done:
+		return
+	}
 
 	for {
 		select {

From 6d667ec9e363cbf2038a7d86c0aa19de7beb5a03 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Fri, 14 Feb 2020 14:00:31 -0500
Subject: [PATCH 452/660] add option to configure approved password
 authenticators in Cassandra (#2093)

* add option to configure approved password authenticators in Cassandra

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update docs and change log

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update field name to custom_authenticators

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 cassandra/authenticator.go  | 43 +++++++++++++++++++++++++++++++++++
 cassandra/storage_client.go | 45 +++++++++++++++++++++++--------------
 2 files changed, 71 insertions(+), 17 deletions(-)
 create mode 100644 cassandra/authenticator.go

diff --git a/cassandra/authenticator.go b/cassandra/authenticator.go
new file mode 100644
index 0000000000000..d13741e7b5c28
--- /dev/null
+++ b/cassandra/authenticator.go
@@ -0,0 +1,43 @@
+package cassandra
+
+import (
+	"fmt"
+
+	"github.com/gocql/gocql"
+)
+
+// CustomPasswordAuthenticator provides the default behaviour for Username/Password authentication with
+// Cassandra while allowing users to specify a non-default Authenticator to accept.
+type CustomPasswordAuthenticator struct {
+	ApprovedAuthenticators []string
+	Username               string
+	Password               string
+}
+
+func (p CustomPasswordAuthenticator) approve(authenticator string) bool {
+	for _, s := range p.ApprovedAuthenticators {
+		if authenticator == s {
+			return true
+		}
+	}
+	return false
+}
+
+// Challenge verifies the name of the authenticator and formats the provided username and password
+// into a response
+func (p CustomPasswordAuthenticator) Challenge(req []byte) ([]byte, gocql.Authenticator, error) {
+	if !p.approve(string(req)) {
+		return nil, nil, fmt.Errorf("unexpected authenticator %q", req)
+	}
+	resp := make([]byte, 2+len(p.Username)+len(p.Password))
+	resp[0] = 0
+	copy(resp[1:], p.Username)
+	resp[len(p.Username)+1] = 0
+	copy(resp[2+len(p.Username):], p.Password)
+	return resp, nil, nil
+}
+
+// Success returns nil by default, identical to the default PasswordAuthenticator
+func (p CustomPasswordAuthenticator) Success(data []byte) error {
+	return nil
+}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index e43efea6a5065..627dc5f920822 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -12,27 +12,29 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 // Config for a StorageClient
 type Config struct {
-	Addresses                string        `yaml:"addresses,omitempty"`
-	Port                     int           `yaml:"port,omitempty"`
-	Keyspace                 string        `yaml:"keyspace,omitempty"`
-	Consistency              string        `yaml:"consistency,omitempty"`
-	ReplicationFactor        int           `yaml:"replication_factor,omitempty"`
-	DisableInitialHostLookup bool          `yaml:"disable_initial_host_lookup,omitempty"`
-	SSL                      bool          `yaml:"SSL,omitempty"`
-	HostVerification         bool          `yaml:"host_verification,omitempty"`
-	CAPath                   string        `yaml:"CA_path,omitempty"`
-	Auth                     bool          `yaml:"auth,omitempty"`
-	Username                 string        `yaml:"username,omitempty"`
-	Password                 string        `yaml:"password,omitempty"`
-	Timeout                  time.Duration `yaml:"timeout,omitempty"`
-	ConnectTimeout           time.Duration `yaml:"connect_timeout,omitempty"`
-	Retries                  int           `yaml:"max_retries"`
-	MaxBackoff               time.Duration `yaml:"retry_max_backoff"`
-	MinBackoff               time.Duration `yaml:"retry_min_backoff"`
+	Addresses                string              `yaml:"addresses,omitempty"`
+	Port                     int                 `yaml:"port,omitempty"`
+	Keyspace                 string              `yaml:"keyspace,omitempty"`
+	Consistency              string              `yaml:"consistency,omitempty"`
+	ReplicationFactor        int                 `yaml:"replication_factor,omitempty"`
+	DisableInitialHostLookup bool                `yaml:"disable_initial_host_lookup,omitempty"`
+	SSL                      bool                `yaml:"SSL,omitempty"`
+	HostVerification         bool                `yaml:"host_verification,omitempty"`
+	CAPath                   string              `yaml:"CA_path,omitempty"`
+	Auth                     bool                `yaml:"auth,omitempty"`
+	Username                 string              `yaml:"username,omitempty"`
+	Password                 string              `yaml:"password,omitempty"`
+	CustomAuthenticators     flagext.StringSlice `yaml:"custom_authenticators"`
+	Timeout                  time.Duration       `yaml:"timeout,omitempty"`
+	ConnectTimeout           time.Duration       `yaml:"connect_timeout,omitempty"`
+	Retries                  int                 `yaml:"max_retries"`
+	MaxBackoff               time.Duration       `yaml:"retry_max_backoff"`
+	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -49,6 +51,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
+	f.Var(&cfg.CustomAuthenticators, "cassandra.custom-authenticator", "If set, when authenticating with cassandra a custom authenticator will be expected during the handshake. This flag can be set multiple times.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
 	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 5*time.Second, "Initial connection timeout, used during initial dial to server.")
 	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
@@ -107,6 +110,14 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
 		}
 	}
 	if cfg.Auth {
+		if len(cfg.CustomAuthenticators) != 0 {
+			cluster.Authenticator = CustomPasswordAuthenticator{
+				ApprovedAuthenticators: cfg.CustomAuthenticators,
+				Username:               cfg.Username,
+				Password:               cfg.Password,
+			}
+			return
+		}
 		cluster.Authenticator = gocql.PasswordAuthenticator{
 			Username: cfg.Username,
 			Password: cfg.Password,

From 2d12832890ff76d62a208f6f0af2da6d23e6da1e Mon Sep 17 00:00:00 2001
From: Trevor Wood <trevor.g.wood@gmail.com>
Date: Mon, 17 Feb 2020 09:21:22 -0500
Subject: [PATCH 453/660] Fix default_validity cache setting typo (#2140)

Signed-off-by: Trevor Wood <Trevor.G.Wood@gmail.com>
---
 cache/cache.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cache/cache.go b/cache/cache.go
index d8cdf4914883b..5f2d0a4642fe2 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -24,7 +24,7 @@ type Cache interface {
 type Config struct {
 	EnableFifoCache bool `yaml:"enable_fifocache,omitempty"`
 
-	DefaultValidity time.Duration `yaml:"defaul_validity,omitempty"`
+	DefaultValidity time.Duration `yaml:"default_validity,omitempty"`
 
 	Background     BackgroundConfig      `yaml:"background,omitempty"`
 	Memcache       MemcachedConfig       `yaml:"memcached,omitempty"`

From 1218ac8c08a5decef591e7912bb1043edc713cd2 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Mon, 17 Feb 2020 12:37:34 -0500
Subject: [PATCH 454/660] Rename chunk.ObjectClient and implement generic
 object store chunk client (#2108)

* rename ObjectClient to Client and create ObjectClient interface and generic ObjectClient chunk.Client

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* return errors withstack

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix imports of file

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add errors package

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update comments to reflect renamed interface

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 aws/dynamodb_storage_client.go                |  10 +-
 aws/fixtures.go                               |  13 ++-
 aws/s3_storage_client.go                      |  66 +----------
 azure/blob_storage_client.go                  |  47 +-------
 cassandra/fixtures.go                         |   4 +-
 chunk_store.go                                |   4 +-
 chunk_store_utils.go                          |   4 +-
 composite_store.go                            |   2 +-
 gcp/bigtable_object_client.go                 |   6 +-
 gcp/fixtures.go                               |   7 +-
 gcp/gcs_object_client.go                      |  44 +------
 local/fixtures.go                             |   7 +-
 local/fs_object_client.go                     |  51 +-------
 objectclient/client.go                        | 110 ++++++++++++++++++
 series_store.go                               |   2 +-
 storage/caching_fixtures.go                   |   6 +-
 ...ct_client_test.go => chunk_client_test.go} |   2 +-
 storage/factory.go                            |  31 +++--
 storage/index_client_test.go                  |   6 +-
 storage/utils_test.go                         |   2 +-
 storage_client.go                             |  14 ++-
 testutils/testutils.go                        |   9 +-
 22 files changed, 196 insertions(+), 251 deletions(-)
 create mode 100644 objectclient/client.go
 rename storage/{object_client_test.go => chunk_client_test.go} (98%)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index c6744f99491ed..05ad48570e551 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -154,12 +154,12 @@ func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (c
 	return newDynamoDBStorageClient(cfg, schemaCfg)
 }
 
-// NewDynamoDBObjectClient makes a new DynamoDB-backed ObjectClient.
-func NewDynamoDBObjectClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+// NewDynamoDBChunkClient makes a new DynamoDB-backed chunk.Client.
+func NewDynamoDBChunkClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
 	return newDynamoDBStorageClient(cfg, schemaCfg)
 }
 
-// newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and ObjectClient.
+// newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and chunk.Client.
 func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*dynamoDBStorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
@@ -461,7 +461,7 @@ type chunksPlusError struct {
 	err    error
 }
 
-// GetChunks implements chunk.ObjectClient.
+// GetChunks implements chunk.Client.
 func (a dynamoDBStorageClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
 	log, ctx := spanlogger.New(ctx, "GetChunks.DynamoDB", ot.Tag{Key: "numChunks", Value: len(chunks)})
 	defer log.Span.Finish()
@@ -639,7 +639,7 @@ func (a dynamoDBStorageClient) PutChunkAndIndex(ctx context.Context, c chunk.Chu
 	return a.BatchWrite(ctx, dynamoDBWrites)
 }
 
-// PutChunks implements chunk.ObjectClient.
+// PutChunks implements chunk.Client.
 func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	dynamoDBWrites, err := a.writesForChunks(chunks)
 	if err != nil {
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 9df2ddc3578a4..d0f41e4685b88 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -7,20 +7,21 @@ import (
 	"golang.org/x/time/rate"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
 type fixture struct {
 	name    string
-	clients func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error)
+	clients func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error)
 }
 
 func (f fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
 	return f.clients()
 }
 
@@ -32,7 +33,7 @@ func (f fixture) Teardown() error {
 var Fixtures = []testutils.Fixture{
 	fixture{
 		name: "S3 chunks",
-		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
 			schemaConfig := testutils.DefaultSchemaConfig("s3")
 			dynamoDB := newMockDynamoDB(0, 0)
 			table := &dynamoTableClient{
@@ -45,9 +46,9 @@ var Fixtures = []testutils.Fixture{
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaConfig,
 			}
-			object := &S3ObjectClient{
+			object := objectclient.NewClient(&S3ObjectClient{
 				S3: newMockS3(),
-			}
+			}, nil)
 			return index, object, table, schemaConfig, nil
 		},
 	},
@@ -60,7 +61,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 	return fixture{
 		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
 			provisionedErr, gangsize, maxParallelism),
-		clients: func() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
 			dynamoDB := newMockDynamoDB(0, provisionedErr)
 			schemaCfg := testutils.DefaultSchemaConfig("aws")
 			table := &dynamoTableClient{
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index aa13b4467e83d..6d12f2b0d3f09 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -1,13 +1,11 @@
 package aws
 
 import (
-	"bytes"
 	"context"
 	"flag"
 	"fmt"
 	"hash/fnv"
 	"io"
-	"io/ioutil"
 	"strings"
 
 	"github.com/aws/aws-sdk-go/aws"
@@ -19,7 +17,6 @@ import (
 	"github.com/weaveworks/common/instrument"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
@@ -90,65 +87,8 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	return &client, nil
 }
 
-func (a *S3ObjectClient) Stop() {
-}
-
-func (a *S3ObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	return util.GetParallelChunks(ctx, chunks, a.getChunk)
-}
-
-func (a *S3ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
-	readCloser, err := a.GetObject(ctx, c.ExternalKey())
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	defer readCloser.Close()
-
-	buf, err := ioutil.ReadAll(readCloser)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	if err := c.Decode(decodeContext, buf); err != nil {
-		return chunk.Chunk{}, err
-	}
-	return c, nil
-}
-
-func (a *S3ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-	var (
-		s3ChunkKeys []string
-		s3ChunkBufs [][]byte
-	)
-
-	for i := range chunks {
-		buf, err := chunks[i].Encoded()
-		if err != nil {
-			return err
-		}
-		key := chunks[i].ExternalKey()
-
-		s3ChunkKeys = append(s3ChunkKeys, key)
-		s3ChunkBufs = append(s3ChunkBufs, buf)
-	}
-
-	incomingErrors := make(chan error)
-	for i := range s3ChunkBufs {
-		go func(i int) {
-			incomingErrors <- a.PutObject(ctx, s3ChunkKeys[i], bytes.NewReader(s3ChunkBufs[i]))
-		}(i)
-	}
-
-	var lastErr error
-	for range s3ChunkKeys {
-		err := <-incomingErrors
-		if err != nil {
-			lastErr = err
-		}
-	}
-	return lastErr
-}
+// Stop fulfills the chunk.ObjectClient interface
+func (a *S3ObjectClient) Stop() {}
 
 // bucketFromKey maps a key to a bucket name
 func (a *S3ObjectClient) bucketFromKey(key string) string {
@@ -157,7 +97,7 @@ func (a *S3ObjectClient) bucketFromKey(key string) string {
 	}
 
 	hasher := fnv.New32a()
-	hasher.Write([]byte(key))
+	hasher.Write([]byte(key)) //nolint: errcheck
 	hash := hasher.Sum32()
 
 	return a.bucketNames[hash%uint32(len(a.bucketNames))]
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index d8edb1ea1255e..428474cf04c4e 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -1,12 +1,10 @@
 package azure
 
 import (
-	"bytes"
 	"context"
 	"flag"
 	"fmt"
 	"io"
-	"io/ioutil"
 	"net/url"
 	"strings"
 	"time"
@@ -15,7 +13,6 @@ import (
 	"github.com/Azure/azure-storage-blob-go/azblob"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
@@ -73,12 +70,7 @@ func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
 // Stop is a no op, as there are no background workers with this driver currently
 func (b *BlobStorage) Stop() {}
 
-// GetChunks retrieves the requested data chunks from blob storage.
-func (b *BlobStorage) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	return util.GetParallelChunks(ctx, chunks, b.getChunk)
-}
-
-func (b *BlobStorage) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+func (b *BlobStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	if b.cfg.RequestTimeout > 0 {
 		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
 		var cancel context.CancelFunc
@@ -86,43 +78,6 @@ func (b *BlobStorage) getChunk(ctx context.Context, decodeContext *chunk.DecodeC
 		defer cancel()
 	}
 
-	readCloser, err := b.GetObject(ctx, input.ExternalKey())
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	defer readCloser.Close()
-
-	buf, err := ioutil.ReadAll(readCloser)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	if err := input.Decode(decodeContext, buf); err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	return input, nil
-}
-
-// PutChunks writes a set of chunks to azure blob storage using block blobs.
-func (b *BlobStorage) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-
-	for _, chunk := range chunks {
-		buf, err := chunk.Encoded()
-		if err != nil {
-			return err
-		}
-
-		err = b.PutObject(ctx, chunk.ExternalKey(), bytes.NewReader(buf))
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (b *BlobStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	blockBlobURL, err := b.getBlobURL(objectKey)
 	if err != nil {
 		return nil, err
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index 20194fbb927cd..b0d04786ba1a6 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -16,7 +16,7 @@ import (
 type fixture struct {
 	name         string
 	indexClient  chunk.IndexClient
-	objectClient chunk.ObjectClient
+	objectClient chunk.Client
 	tableClient  chunk.TableClient
 	schemaConfig chunk.SchemaConfig
 }
@@ -25,7 +25,7 @@ func (f fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
 	return f.indexClient, f.objectClient, f.tableClient, f.schemaConfig, nil
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index cbe43d7240150..a3dd5fcad190a 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -73,13 +73,13 @@ type store struct {
 	cfg StoreConfig
 
 	index  IndexClient
-	chunks ObjectClient
+	chunks Client
 	schema Schema
 	limits StoreLimits
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits StoreLimits) (Store, error) {
+func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 114e2b301066c..eb6ced986d181 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -80,7 +80,7 @@ outer:
 // and writing back any misses to the cache.  Also responsible for decoding
 // chunks from the cache, in parallel.
 type Fetcher struct {
-	storage    ObjectClient
+	storage    Client
 	cache      cache.Cache
 	cacheStubs bool
 
@@ -99,7 +99,7 @@ type decodeResponse struct {
 }
 
 // NewChunkFetcher makes a new ChunkFetcher.
-func NewChunkFetcher(cfg cache.Config, cacheStubs bool, storage ObjectClient) (*Fetcher, error) {
+func NewChunkFetcher(cfg cache.Config, cacheStubs bool, storage Client) (*Fetcher, error) {
 	cfg.Prefix = "chunks"
 	cache, err := cache.New(cfg)
 	if err != nil {
diff --git a/composite_store.go b/composite_store.go
index f0f52b6d70c81..4f735afcb7fb1 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -50,7 +50,7 @@ func NewCompositeStore() CompositeStore {
 }
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
-func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks ObjectClient, limits StoreLimits) error {
+func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks Client, limits StoreLimits) error {
 	schema := cfg.CreateSchema()
 	var store Store
 	var err error
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index adf155f6c5e5f..31b6670d8f404 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -19,9 +19,9 @@ type bigtableObjectClient struct {
 	client    *bigtable.Client
 }
 
-// NewBigtableObjectClient makes a new chunk.ObjectClient that stores chunks in
+// NewBigtableObjectClient makes a new chunk.Client that stores chunks in
 // Bigtable.
-func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
 	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
 	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
 	if err != nil {
@@ -30,7 +30,7 @@ func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.Sc
 	return newBigtableObjectClient(cfg, schemaCfg, client), nil
 }
 
-func newBigtableObjectClient(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) chunk.ObjectClient {
+func newBigtableObjectClient(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) chunk.Client {
 	return &bigtableObjectClient{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index eededc576f28b..0275e7ebe8a9f 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -11,6 +11,7 @@ import (
 	"google.golang.org/grpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 )
 
@@ -34,7 +35,7 @@ func (f *fixture) Name() string {
 }
 
 func (f *fixture) Clients() (
-	iClient chunk.IndexClient, cClient chunk.ObjectClient, tClient chunk.TableClient,
+	iClient chunk.IndexClient, cClient chunk.Client, tClient chunk.TableClient,
 	schemaConfig chunk.SchemaConfig, err error,
 ) {
 	f.btsrv, err = bttest.NewServer("localhost:0")
@@ -76,9 +77,9 @@ func (f *fixture) Clients() (
 	}
 
 	if f.gcsObjectClient {
-		cClient = newGCSObjectClient(GCSConfig{
+		cClient = objectclient.NewClient(newGCSObjectClient(GCSConfig{
 			BucketName: "chunks",
-		}, f.gcssrv.Client())
+		}, f.gcssrv.Client()), nil)
 	} else {
 		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 8c82aadf9956f..040dd3effcd5a 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -1,19 +1,15 @@
 package gcp
 
 import (
-	"bytes"
 	"context"
 	"flag"
 	"io"
-	"io/ioutil"
 	"time"
 
 	"cloud.google.com/go/storage"
-	"github.com/pkg/errors"
 	"google.golang.org/api/iterator"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 type GCSObjectClient struct {
@@ -41,7 +37,7 @@ func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	f.DurationVar(&cfg.RequestTimeout, prefix+"gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
 }
 
-// NewGCSObjectClient makes a new chunk.ObjectClient that writes chunks to GCS.
+// NewGCSObjectClient makes a new chunk.Client that writes chunks to GCS.
 func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, error) {
 	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
@@ -68,44 +64,6 @@ func (s *GCSObjectClient) Stop() {
 	s.client.Close()
 }
 
-func (s *GCSObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-	for _, chunk := range chunks {
-		buf, err := chunk.Encoded()
-		if err != nil {
-			return err
-		}
-
-		if err := s.PutObject(ctx, chunk.ExternalKey(), bytes.NewReader(buf)); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (s *GCSObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
-	return util.GetParallelChunks(ctx, input, s.getChunk)
-}
-
-func (s *GCSObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
-	readCloser, err := s.GetObject(ctx, input.ExternalKey())
-	if err != nil {
-		return chunk.Chunk{}, errors.WithStack(err)
-	}
-
-	defer readCloser.Close()
-
-	buf, err := ioutil.ReadAll(readCloser)
-	if err != nil {
-		return chunk.Chunk{}, errors.WithStack(err)
-	}
-
-	if err := input.Decode(decodeContext, buf); err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	return input, nil
-}
-
 // Get object from the store
 func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	if s.cfg.RequestTimeout > 0 {
diff --git a/local/fixtures.go b/local/fixtures.go
index cc7673cab9541..5a6a6f1344dde 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -8,6 +8,7 @@ import (
 	"github.com/prometheus/common/model"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 )
 
@@ -21,7 +22,7 @@ func (f *fixture) Name() string {
 }
 
 func (f *fixture) Clients() (
-	indexClient chunk.IndexClient, objectClient chunk.ObjectClient, tableClient chunk.TableClient,
+	indexClient chunk.IndexClient, chunkClient chunk.Client, tableClient chunk.TableClient,
 	schemaConfig chunk.SchemaConfig, err error,
 ) {
 	f.dirname, err = ioutil.TempDir(os.TempDir(), "boltdb")
@@ -36,13 +37,15 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	objectClient, err = NewFSObjectClient(FSConfig{
+	oClient, err := NewFSObjectClient(FSConfig{
 		Directory: f.dirname,
 	})
 	if err != nil {
 		return
 	}
 
+	chunkClient = objectclient.NewClient(oClient, objectclient.Base64Encoder)
+
 	tableClient, err = NewTableClient(f.dirname)
 	if err != nil {
 		return
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 9a4bd4051467c..0f4e33443ff0a 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -1,9 +1,7 @@
 package local
 
 import (
-	"bytes"
 	"context"
-	"encoding/base64"
 	"flag"
 	"io"
 	"io/ioutil"
@@ -39,7 +37,7 @@ type FSObjectClient struct {
 	cfg FSConfig
 }
 
-// NewFSObjectClient makes a chunk.ObjectClient which stores chunks as files in the local filesystem.
+// NewFSObjectClient makes a chunk.Client which stores chunks as files in the local filesystem.
 func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 	if err := util.EnsureDirectory(cfg.Directory); err != nil {
 		return nil, err
@@ -53,55 +51,12 @@ func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 // Stop implements ObjectClient
 func (FSObjectClient) Stop() {}
 
-// PutChunks implements ObjectClient
-func (f *FSObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
-	for i := range chunks {
-		buf, err := chunks[i].Encoded()
-		if err != nil {
-			return err
-		}
-
-		filename := base64.StdEncoding.EncodeToString([]byte(chunks[i].ExternalKey()))
-		if err := f.PutObject(ctx, filename, bytes.NewReader(buf)); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-// GetChunks implements ObjectClient
-func (f *FSObjectClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
-	return util.GetParallelChunks(ctx, chunks, f.getChunk)
-}
-
-func (f *FSObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
-	filename := base64.StdEncoding.EncodeToString([]byte(c.ExternalKey()))
-
-	readCloser, err := f.GetObject(ctx, filename)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	defer readCloser.Close()
-
-	buf, err := ioutil.ReadAll(readCloser)
-	if err != nil {
-		return chunk.Chunk{}, err
-	}
-
-	if err := c.Decode(decodeContext, buf); err != nil {
-		return c, err
-	}
-
-	return c, nil
-}
-
-// Get object from the store
+// GetObject from the store
 func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	return os.Open(path.Join(f.cfg.Directory, objectKey))
 }
 
-// Put object into the store
+// PutObject into the store
 func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
 	fullPath := path.Join(f.cfg.Directory, objectKey)
 	err := util.EnsureDirectory(path.Dir(fullPath))
diff --git a/objectclient/client.go b/objectclient/client.go
new file mode 100644
index 0000000000000..318ff29d6807a
--- /dev/null
+++ b/objectclient/client.go
@@ -0,0 +1,110 @@
+package objectclient
+
+import (
+	"bytes"
+	"context"
+	"encoding/base64"
+	"io/ioutil"
+
+	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+// KeyEncoder is used to encode chunk keys before writing/retrieving chunks
+// from the underlying ObjectClient
+type KeyEncoder func(string) string
+
+// Base64Encoder is used to encode chunk keys in base64 before storing/retrieving
+// them from the ObjectClient
+var Base64Encoder = func(key string) string {
+	return base64.StdEncoding.EncodeToString([]byte(key))
+}
+
+// Client is used to store chunks in object store backends
+type Client struct {
+	store      chunk.ObjectClient
+	keyEncoder KeyEncoder
+}
+
+// NewClient wraps the provided ObjectClient with a chunk.Client implementation
+func NewClient(store chunk.ObjectClient, encoder KeyEncoder) *Client {
+	return &Client{
+		store:      store,
+		keyEncoder: encoder,
+	}
+}
+
+// Stop shuts down the object store and any underlying clients
+func (o *Client) Stop() {
+	o.store.Stop()
+}
+
+// PutChunks stores the provided chunks in the configured backend. If multiple errors are
+// returned, the last one sequentially will be propagated up.
+func (o *Client) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	var (
+		chunkKeys []string
+		chunkBufs [][]byte
+	)
+
+	for i := range chunks {
+		buf, err := chunks[i].Encoded()
+		if err != nil {
+			return err
+		}
+		key := chunks[i].ExternalKey()
+		if o.keyEncoder != nil {
+			key = o.keyEncoder(key)
+		}
+
+		chunkKeys = append(chunkKeys, key)
+		chunkBufs = append(chunkBufs, buf)
+	}
+
+	incomingErrors := make(chan error)
+	for i := range chunkBufs {
+		go func(i int) {
+			incomingErrors <- o.store.PutObject(ctx, chunkKeys[i], bytes.NewReader(chunkBufs[i]))
+		}(i)
+	}
+
+	var lastErr error
+	for range chunkKeys {
+		err := <-incomingErrors
+		if err != nil {
+			lastErr = err
+		}
+	}
+	return lastErr
+}
+
+// GetChunks retrieves the specified chunks from the configured backend
+func (o *Client) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	return util.GetParallelChunks(ctx, chunks, o.getChunk)
+}
+
+func (o *Client) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+	key := c.ExternalKey()
+	if o.keyEncoder != nil {
+		key = o.keyEncoder(key)
+	}
+
+	readCloser, err := o.store.GetObject(ctx, key)
+	if err != nil {
+		return chunk.Chunk{}, errors.WithStack(err)
+	}
+
+	defer readCloser.Close()
+
+	buf, err := ioutil.ReadAll(readCloser)
+	if err != nil {
+		return chunk.Chunk{}, errors.WithStack(err)
+	}
+
+	if err := c.Decode(decodeContext, buf); err != nil {
+		return chunk.Chunk{}, errors.WithStack(err)
+	}
+	return c, nil
+}
diff --git a/series_store.go b/series_store.go
index 09d3c2beb0aed..03cc64c3a37ca 100644
--- a/series_store.go
+++ b/series_store.go
@@ -66,7 +66,7 @@ type seriesStore struct {
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks ObjectClient, limits StoreLimits) (Store, error) {
+func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits) (Store, error) {
 	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index ece8043098692..bf08a64ab8f7f 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -18,17 +18,17 @@ type fixture struct {
 }
 
 func (f fixture) Name() string { return "caching-store" }
-func (f fixture) Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
 	limits, err := defaultLimits()
 	if err != nil {
 		return nil, nil, nil, chunk.SchemaConfig{}, err
 	}
-	indexClient, objectClient, tableClient, schemaConfig, err := f.fixture.Clients()
+	indexClient, chunkClient, tableClient, schemaConfig, err := f.fixture.Clients()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		Size:     500,
 		Validity: 5 * time.Minute,
 	}), 5*time.Minute, limits)
-	return indexClient, objectClient, tableClient, schemaConfig, err
+	return indexClient, chunkClient, tableClient, schemaConfig, err
 }
 func (f fixture) Teardown() error { return f.fixture.Teardown() }
 
diff --git a/storage/object_client_test.go b/storage/chunk_client_test.go
similarity index 98%
rename from storage/object_client_test.go
rename to storage/chunk_client_test.go
index 367019849366b..21eeb791de65d 100644
--- a/storage/object_client_test.go
+++ b/storage/chunk_client_test.go
@@ -17,7 +17,7 @@ import (
 )
 
 func TestChunksBasic(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, _ chunk.IndexClient, client chunk.ObjectClient) {
+	forAllFixtures(t, func(t *testing.T, _ chunk.IndexClient, client chunk.Client) {
 		const batchSize = 5
 		ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 		defer cancel()
diff --git a/storage/factory.go b/storage/factory.go
index fb41d803bc882..6605e562d0871 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -17,6 +17,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
+	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -114,7 +115,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		if objectStoreType == "" {
 			objectStoreType = s.IndexType
 		}
-		chunks, err := NewObjectClient(objectStoreType, cfg, schemaCfg)
+		chunks, err := NewChunkClient(objectStoreType, cfg, schemaCfg)
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating object client")
 		}
@@ -163,14 +164,13 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	}
 }
 
-// NewObjectClient makes a new ObjectClient of the desired types.
-func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.ObjectClient, error) {
+// NewChunkClient makes a new chunk.Client of the desired types.
+func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
 	switch name {
 	case "inmemory":
-		store := chunk.NewMockStorage()
-		return store, nil
+		return chunk.NewMockStorage(), nil
 	case "aws", "s3":
-		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config)
+		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config))
 	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
@@ -179,24 +179,35 @@ func NewObjectClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chu
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBObjectClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "azure":
-		return azure.NewBlobStorage(&cfg.AzureStorageConfig)
+		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig))
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
-		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig)
+		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig))
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":
-		return local.NewFSObjectClient(cfg.FSConfig)
+		store, err := local.NewFSObjectClient(cfg.FSConfig)
+		if err != nil {
+			return nil, err
+		}
+		return objectclient.NewClient(store, objectclient.Base64Encoder), nil
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, azure, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
 }
 
+func newChunkClientFromStore(store chunk.ObjectClient, err error) (chunk.Client, error) {
+	if err != nil {
+		return nil, err
+	}
+	return objectclient.NewClient(store, nil), nil
+}
+
 // NewTableClient makes a new table client based on the configuration.
 func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 	switch name {
diff --git a/storage/index_client_test.go b/storage/index_client_test.go
index 8f88721ecc409..40514346a4035 100644
--- a/storage/index_client_test.go
+++ b/storage/index_client_test.go
@@ -13,7 +13,7 @@ import (
 )
 
 func TestIndexBasic(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.Client) {
 		// Write out 30 entries, into different hash and range values.
 		batch := client.NewWriteBatch()
 		for i := 0; i < 30; i++ {
@@ -100,7 +100,7 @@ var entries = []chunk.IndexEntry{
 }
 
 func TestQueryPages(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.Client) {
 		batch := client.NewWriteBatch()
 		for _, entry := range entries {
 			batch.Add(entry.TableName, entry.HashValue, entry.RangeValue, entry.Value)
@@ -200,7 +200,7 @@ func TestQueryPages(t *testing.T) {
 }
 
 func TestCardinalityLimit(t *testing.T) {
-	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.ObjectClient) {
+	forAllFixtures(t, func(t *testing.T, client chunk.IndexClient, _ chunk.Client) {
 		limits, err := defaultLimits()
 		require.NoError(t, err)
 
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 8838929abecb2..159e36885708c 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -18,7 +18,7 @@ const (
 	tableName = "test"
 )
 
-type storageClientTest func(*testing.T, chunk.IndexClient, chunk.ObjectClient)
+type storageClientTest func(*testing.T, chunk.IndexClient, chunk.Client)
 
 func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 	var fixtures []testutils.Fixture
diff --git a/storage_client.go b/storage_client.go
index 71ec3ab7a0f92..ef4d954cdbb0b 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"io"
 	"time"
 )
 
@@ -20,8 +21,8 @@ type IndexClient interface {
 	QueryPages(ctx context.Context, queries []IndexQuery, callback func(IndexQuery, ReadBatch) (shouldContinue bool)) error
 }
 
-// ObjectClient is for storing and retrieving chunks.
-type ObjectClient interface {
+// Client is for storing and retrieving chunks.
+type Client interface {
 	Stop()
 
 	PutChunks(ctx context.Context, chunks []Chunk) error
@@ -50,6 +51,15 @@ type ReadBatchIterator interface {
 	Value() []byte
 }
 
+// ObjectClient is used to store arbitrary data in Object Store (S3/GCS/Azure/Etc)
+type ObjectClient interface {
+	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
+	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
+	List(ctx context.Context, prefix string) ([]StorageObject, error)
+	Stop()
+}
+
+// StorageObject represents an object being stored in an Object Store
 type StorageObject struct {
 	Key        string
 	ModifiedAt time.Time
diff --git a/testutils/testutils.go b/testutils/testutils.go
index f0a3e67a319cc..6aa3a7462093a 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -22,7 +22,7 @@ const (
 // Fixture type for per-backend testing.
 type Fixture interface {
 	Name() string
-	Clients() (chunk.IndexClient, chunk.ObjectClient, chunk.TableClient, chunk.SchemaConfig, error)
+	Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error)
 	Teardown() error
 }
 
@@ -33,10 +33,10 @@ func DefaultSchemaConfig(kind string) chunk.SchemaConfig {
 }
 
 // Setup a fixture with initial tables
-func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectClient, error) {
+func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client, error) {
 	var tbmConfig chunk.TableManagerConfig
 	flagext.DefaultValues(&tbmConfig)
-	indexClient, objectClient, tableClient, schemaConfig, err := fixture.Clients()
+	indexClient, chunkClient, tableClient, schemaConfig, err := fixture.Clients()
 	if err != nil {
 		return nil, nil, err
 	}
@@ -54,7 +54,8 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.ObjectCl
 	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
 		Name: tableName,
 	})
-	return indexClient, objectClient, err
+
+	return indexClient, chunkClient, err
 }
 
 // CreateChunks creates some chunks for testing

From 107d24acd68c7fbc9be8c2a94afd37e5b253920d Mon Sep 17 00:00:00 2001
From: Wei He <weihe924stephen@gmail.com>
Date: Wed, 19 Feb 2020 03:41:14 +0900
Subject: [PATCH 455/660] added `password_file` configuration options to enable
 reading Cassandra password from file (#2096)

* added `password_file` configuration options to enable reading Cassandra password from file

Signed-off-by: Wing924 <weihe924stephen@gmail.com>
---
 cassandra/storage_client.go                   | 37 +++++++--
 cassandra/storage_client_test.go              | 80 +++++++++++++++++++
 .../password-with-trailing-newline.txt        |  1 +
 .../password-without-trailing-newline.txt     |  1 +
 storage/factory.go                            |  4 +-
 5 files changed, 116 insertions(+), 7 deletions(-)
 create mode 100644 cassandra/storage_client_test.go
 create mode 100644 cassandra/testdata/password-with-trailing-newline.txt
 create mode 100644 cassandra/testdata/password-without-trailing-newline.txt

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 627dc5f920822..c653dc724095b 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -1,9 +1,11 @@
 package cassandra
 
 import (
+	"bytes"
 	"context"
 	"flag"
 	"fmt"
+	"io/ioutil"
 	"strings"
 	"time"
 
@@ -29,6 +31,7 @@ type Config struct {
 	Auth                     bool                `yaml:"auth,omitempty"`
 	Username                 string              `yaml:"username,omitempty"`
 	Password                 string              `yaml:"password,omitempty"`
+	PasswordFile             string              `yaml:"password_file,omitempty"`
 	CustomAuthenticators     flagext.StringSlice `yaml:"custom_authenticators"`
 	Timeout                  time.Duration       `yaml:"timeout,omitempty"`
 	ConnectTimeout           time.Duration       `yaml:"connect_timeout,omitempty"`
@@ -51,6 +54,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
+	f.StringVar(&cfg.PasswordFile, "cassandra.password-file", "", "File containing password to use when connecting to cassandra.")
 	f.Var(&cfg.CustomAuthenticators, "cassandra.custom-authenticator", "If set, when authenticating with cassandra a custom authenticator will be expected during the handshake. This flag can be set multiple times.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
 	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 5*time.Second, "Initial connection timeout, used during initial dial to server.")
@@ -59,6 +63,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
 }
 
+func (cfg *Config) Validate() error {
+	if cfg.Password != "" && cfg.PasswordFile != "" {
+		return errors.Errorf("The password and password_file config options are mutually exclusive.")
+	}
+	return nil
+}
+
 func (cfg *Config) session() (*gocql.Session, error) {
 	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
 	if err != nil {
@@ -80,7 +91,9 @@ func (cfg *Config) session() (*gocql.Session, error) {
 			Max:        cfg.MaxBackoff,
 		}
 	}
-	cfg.setClusterConfig(cluster)
+	if err = cfg.setClusterConfig(cluster); err != nil {
+		return nil, errors.WithStack(err)
+	}
 
 	session, err := cluster.CreateSession()
 	if err == nil {
@@ -100,7 +113,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 }
 
 // apply config settings to a cassandra ClusterConfig
-func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
+func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
 	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
 
 	if cfg.SSL {
@@ -110,19 +123,29 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) {
 		}
 	}
 	if cfg.Auth {
+		password := cfg.Password
+		if cfg.PasswordFile != "" {
+			passwordBytes, err := ioutil.ReadFile(cfg.PasswordFile)
+			if err != nil {
+				return errors.Errorf("Could not read Cassandra password file: %v", err)
+			}
+			passwordBytes = bytes.TrimRight(passwordBytes, "\n")
+			password = string(passwordBytes)
+		}
 		if len(cfg.CustomAuthenticators) != 0 {
 			cluster.Authenticator = CustomPasswordAuthenticator{
 				ApprovedAuthenticators: cfg.CustomAuthenticators,
 				Username:               cfg.Username,
-				Password:               cfg.Password,
+				Password:               password,
 			}
-			return
+			return nil
 		}
 		cluster.Authenticator = gocql.PasswordAuthenticator{
 			Username: cfg.Username,
-			Password: cfg.Password,
+			Password: password,
 		}
 	}
+	return nil
 }
 
 // createKeyspace will create the desired keyspace if it doesn't exist.
@@ -133,7 +156,9 @@ func (cfg *Config) createKeyspace() error {
 	cluster.Timeout = 20 * time.Second
 	cluster.ConnectTimeout = 20 * time.Second
 
-	cfg.setClusterConfig(cluster)
+	if err := cfg.setClusterConfig(cluster); err != nil {
+		return errors.WithStack(err)
+	}
 
 	session, err := cluster.CreateSession()
 	if err != nil {
diff --git a/cassandra/storage_client_test.go b/cassandra/storage_client_test.go
new file mode 100644
index 0000000000000..8cf5281c17fd1
--- /dev/null
+++ b/cassandra/storage_client_test.go
@@ -0,0 +1,80 @@
+package cassandra
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/gocql/gocql"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestConfig_setClusterConfig_noAuth(t *testing.T) {
+	cqlCfg := gocql.NewCluster()
+	cfg := Config{
+		Auth: false,
+	}
+	require.NoError(t, cfg.Validate())
+
+	err := cfg.setClusterConfig(cqlCfg)
+	require.NoError(t, err)
+	assert.Nil(t, cqlCfg.Authenticator)
+}
+
+func TestConfig_setClusterConfig_authWithPassword(t *testing.T) {
+	cqlCfg := gocql.NewCluster()
+	cfg := Config{
+		Auth:     true,
+		Username: "user",
+		Password: "pass",
+	}
+	require.NoError(t, cfg.Validate())
+
+	err := cfg.setClusterConfig(cqlCfg)
+	require.NoError(t, err)
+	assert.NotNil(t, cqlCfg.Authenticator)
+	assert.Equal(t, "user", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Username)
+	assert.Equal(t, "pass", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Password)
+}
+
+func TestConfig_setClusterConfig_authWithPasswordFile_withoutTrailingNewline(t *testing.T) {
+	cqlCfg := gocql.NewCluster()
+	cfg := Config{
+		Auth:         true,
+		Username:     "user",
+		PasswordFile: "testdata/password-without-trailing-newline.txt",
+	}
+	require.NoError(t, cfg.Validate())
+
+	err := cfg.setClusterConfig(cqlCfg)
+	require.NoError(t, err)
+	assert.NotNil(t, cqlCfg.Authenticator)
+	assert.Equal(t, "user", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Username)
+	assert.Equal(t, "pass", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Password)
+}
+
+func TestConfig_setClusterConfig_authWithPasswordFile_withTrailingNewline(t *testing.T) {
+	cqlCfg := gocql.NewCluster()
+	cfg := Config{
+		Auth:         true,
+		Username:     "user",
+		PasswordFile: "testdata/password-with-trailing-newline.txt",
+	}
+	require.NoError(t, cfg.Validate())
+
+	err := cfg.setClusterConfig(cqlCfg)
+	require.NoError(t, err)
+	assert.NotNil(t, cqlCfg.Authenticator)
+	assert.Equal(t, "user", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Username)
+	assert.Equal(t, "pass", cqlCfg.Authenticator.(gocql.PasswordAuthenticator).Password)
+}
+
+func TestConfig_setClusterConfig_authWithPasswordAndPasswordFile(t *testing.T) {
+	cfg := Config{
+		Auth:         true,
+		Username:     "user",
+		Password:     "pass",
+		PasswordFile: "testdata/password-with-trailing-newline.txt",
+	}
+	assert.Error(t, cfg.Validate())
+}
diff --git a/cassandra/testdata/password-with-trailing-newline.txt b/cassandra/testdata/password-with-trailing-newline.txt
new file mode 100644
index 0000000000000..2ae28399f5fda
--- /dev/null
+++ b/cassandra/testdata/password-with-trailing-newline.txt
@@ -0,0 +1 @@
+pass
diff --git a/cassandra/testdata/password-without-trailing-newline.txt b/cassandra/testdata/password-without-trailing-newline.txt
new file mode 100644
index 0000000000000..fc80254b619d4
--- /dev/null
+++ b/cassandra/testdata/password-without-trailing-newline.txt
@@ -0,0 +1 @@
+pass
\ No newline at end of file
diff --git a/storage/factory.go b/storage/factory.go
index 6605e562d0871..db9f11e55f702 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -83,7 +83,9 @@ func (cfg *Config) Validate() error {
 	if cfg.Engine != StorageEngineChunks && cfg.Engine != StorageEngineTSDB {
 		return errors.New("unsupported storage engine")
 	}
-
+	if err := cfg.CassandraStorageConfig.Validate(); err != nil {
+		return errors.Wrap(err, "invalid Cassandra Storage config")
+	}
 	return nil
 }
 

From b8c71bc365b8b16c2dbbb2c383151733046e53f1 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 20 Feb 2020 15:39:48 +0530
Subject: [PATCH 456/660] chunk slice support (#2157)

* chunk slice support

add support for slicing chunks for given time range
slice method iterates through the data to build a new chunk out of it with same encoding

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* added code for building chunk for backwards compatibility test to the test function itself

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk.go      |  55 +++++++++++++++++++++++++--
 chunk_test.go | 101 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 149 insertions(+), 7 deletions(-)

diff --git a/chunk.go b/chunk.go
index 417b60a6ada25..986c8dca5af1b 100644
--- a/chunk.go
+++ b/chunk.go
@@ -22,10 +22,13 @@ import (
 
 // Errors that decode can return
 const (
-	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
-	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
-	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
-	ErrDataLength      = errs.Error("chunk data wrong length")
+	ErrInvalidChecksum    = errs.Error("invalid chunk checksum")
+	ErrWrongMetadata      = errs.Error("wrong chunk metadata")
+	ErrMetadataLength     = errs.Error("chunk metadata wrong length")
+	ErrDataLength         = errs.Error("chunk data wrong length")
+	ErrSliceOutOfRange    = errs.Error("chunk can't be sliced out of its data range")
+	ErrSliceNoDataInRange = errs.Error("chunk has no data for given range to slice")
+	ErrSliceChunkOverflow = errs.Error("slicing should not overflow a chunk")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -327,3 +330,47 @@ func (c *Chunk) Samples(from, through model.Time) ([]model.SamplePair, error) {
 	interval := metric.Interval{OldestInclusive: from, NewestInclusive: through}
 	return prom_chunk.RangeValues(it, interval)
 }
+
+// Slice builds a new smaller chunk with data only from given time range (inclusive)
+func (c *Chunk) Slice(from, through model.Time) (*Chunk, error) {
+	// there should be atleast some overlap between chunk interval and slice interval
+	if from > c.Through || through < c.From {
+		return nil, ErrSliceOutOfRange
+	}
+
+	itr := c.Data.NewIterator(nil)
+	if !itr.FindAtOrAfter(from) {
+		return nil, ErrSliceNoDataInRange
+	}
+
+	pc, err := prom_chunk.NewForEncoding(c.Data.Encoding())
+	if err != nil {
+		return nil, err
+	}
+
+	for !itr.Value().Timestamp.After(through) {
+		oc, err := pc.Add(itr.Value())
+		if err != nil {
+			return nil, err
+		}
+
+		if oc != nil {
+			return nil, ErrSliceChunkOverflow
+		}
+		if !itr.Scan() {
+			break
+		}
+	}
+
+	err = itr.Err()
+	if err != nil {
+		return nil, err
+	}
+
+	if pc.Len() == 0 {
+		return nil, ErrSliceNoDataInRange
+	}
+
+	nc := NewChunk(c.UserID, c.Fingerprint, c.Metric, pc, from, through)
+	return &nc, nil
+}
diff --git a/chunk_test.go b/chunk_test.go
index 46e3f97ba053a..d81a296ab9463 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -35,9 +35,11 @@ func dummyChunk(now model.Time) Chunk {
 
 func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.Encoding, samples int) Chunk {
 	c, _ := encoding.NewForEncoding(enc)
+	chunkStart := now.Add(-time.Hour)
+
 	for i := 0; i < samples; i++ {
 		t := time.Duration(i) * 15 * time.Second
-		nc, err := c.Add(model.SamplePair{Timestamp: now.Add(t), Value: 0})
+		nc, err := c.Add(model.SamplePair{Timestamp: chunkStart.Add(t), Value: model.SampleValue(i)})
 		if err != nil {
 			panic(err)
 		}
@@ -45,12 +47,13 @@ func dummyChunkForEncoding(now model.Time, metric labels.Labels, enc encoding.En
 			panic("returned chunk was not nil")
 		}
 	}
+
 	chunk := NewChunk(
 		userID,
 		client.Fingerprint(metric),
 		metric,
 		c,
-		now.Add(-time.Hour),
+		chunkStart,
 		now,
 	)
 	// Force checksum calculation.
@@ -132,13 +135,30 @@ func TestChunkCodec(t *testing.T) {
 const fixedTimestamp = model.Time(1557654321000)
 
 func TestChunkDecodeBackwardsCompatibility(t *testing.T) {
+	// lets build a new chunk same as what was built using code at commit b1777a50ab19
+	c, _ := encoding.NewForEncoding(encoding.Bigchunk)
+	nc, err := c.Add(model.SamplePair{Timestamp: fixedTimestamp, Value: 0})
+	require.NoError(t, err)
+	require.Equal(t, nil, nc, "returned chunk should be nil")
+
+	chunk := NewChunk(
+		userID,
+		client.Fingerprint(labelsForDummyChunks),
+		labelsForDummyChunks,
+		c,
+		fixedTimestamp.Add(-time.Hour),
+		fixedTimestamp,
+	)
+	// Force checksum calculation.
+	require.NoError(t, chunk.Encode())
+
 	// Chunk encoded using code at commit b1777a50ab19
 	rawData := []byte("\x00\x00\x00\xb7\xff\x06\x00\x00sNaPpY\x01\xa5\x00\x00\x04\xc7a\xba{\"fingerprint\":18245339272195143978,\"userID\":\"userID\",\"from\":1557650721,\"through\":1557654321,\"metric\":{\"bar\":\"baz\",\"toms\":\"code\",\"__name__\":\"foo\"},\"encoding\":3}\n\x00\x00\x00\x15\x01\x00\x11\x00\x00\x01\xd0\xdd\xf5\xb6\xd5Z\x00\x00\x00\x00\x00\x00\x00\x00\x00")
 	decodeContext := NewDecodeContext()
 	have, err := ParseExternalKey(userID, "userID/fd3477666dacf92a:16aab37c8e8:16aab6eb768:38eb373c")
 	require.NoError(t, err)
 	require.NoError(t, have.Decode(decodeContext, rawData))
-	want := dummyChunkForEncoding(fixedTimestamp, labelsForDummyChunks, encoding.Bigchunk, 1)
+	want := chunk
 	// We can't just compare these two chunks, since the Bigchunk internals are different on construction and read-in.
 	// Compare the serialised version instead
 	require.NoError(t, have.Encode())
@@ -282,3 +302,78 @@ func benchmarkDecode(b *testing.B, batchSize int) {
 		}
 	}
 }
+
+func TestChunk_Slice(t *testing.T) {
+	chunkEndTime := model.Now()
+	chunkStartTime := chunkEndTime.Add(-time.Hour)
+
+	for _, tc := range []struct {
+		name       string
+		sliceRange model.Interval
+		err        error
+	}{
+		{
+			name:       "slice first 10 mins",
+			sliceRange: model.Interval{Start: chunkStartTime, End: chunkStartTime.Add(10 * time.Minute)},
+		},
+		{
+			name:       "slice last 10 mins",
+			sliceRange: model.Interval{Start: chunkEndTime.Add(-10 * time.Minute), End: chunkEndTime},
+		},
+		{
+			name:       "slice in the middle",
+			sliceRange: model.Interval{Start: chunkStartTime.Add(20 * time.Minute), End: chunkEndTime.Add(-20 * time.Minute)},
+		},
+		{
+			name:       "slice out of range",
+			sliceRange: model.Interval{Start: chunkEndTime.Add(20 * time.Minute), End: chunkEndTime.Add(30 * time.Minute)},
+			err:        ErrSliceOutOfRange,
+		},
+		{
+			name:       "slice no data in range",
+			sliceRange: model.Interval{Start: chunkStartTime.Add(time.Second), End: chunkStartTime.Add(10 * time.Second)},
+			err:        ErrSliceNoDataInRange,
+		},
+		{
+			name:       "slice interval not aligned with sample intervals",
+			sliceRange: model.Interval{Start: chunkStartTime.Add(time.Second), End: chunkStartTime.Add(10 * time.Minute).Add(10 * time.Second)},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			// dummy chunk is created with time range chunkEndTime-1hour to chunkEndTime
+			originalChunk := dummyChunkForEncoding(chunkEndTime, labelsForDummyChunks, encoding.DefaultEncoding, 241)
+
+			newChunk, err := originalChunk.Slice(tc.sliceRange.Start, tc.sliceRange.End)
+			if tc.err != nil {
+				require.Equal(t, tc.err, err)
+				return
+			}
+			require.NoError(t, err)
+
+			require.Equal(t, tc.sliceRange.Start, newChunk.From)
+			require.Equal(t, tc.sliceRange.End, newChunk.Through)
+
+			chunkItr := originalChunk.Data.NewIterator(nil)
+			chunkItr.FindAtOrAfter(tc.sliceRange.Start)
+
+			newChunkItr := newChunk.Data.NewIterator(nil)
+			newChunkItr.Scan()
+
+			for {
+				require.Equal(t, chunkItr.Value(), newChunkItr.Value())
+
+				originalChunksHasMoreSamples := chunkItr.Scan()
+				newChunkHasMoreSamples := newChunkItr.Scan()
+
+				// originalChunk and newChunk both should end at same time or newChunk should end before or at slice end time
+				if !originalChunksHasMoreSamples || chunkItr.Value().Timestamp > tc.sliceRange.End {
+					require.Equal(t, false, newChunkHasMoreSamples)
+					break
+				}
+
+				require.Equal(t, true, newChunkHasMoreSamples)
+			}
+
+		})
+	}
+}

From 31d12a954beca94cac7d57ae9c13716b2af95ab1 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Thu, 20 Feb 2020 08:34:07 -0500
Subject: [PATCH 457/660] Upgrade gogo/protobuf to 1.3.0 (#2055)

* Upgrade gogo/protobuf to 1.3.0

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 storage/caching_index_client.pb.go | 128 ++++++++++++++++-------------
 1 file changed, 73 insertions(+), 55 deletions(-)

diff --git a/storage/caching_index_client.pb.go b/storage/caching_index_client.pb.go
index 6f60ba68010ad..c90cffbbc3477 100644
--- a/storage/caching_index_client.pb.go
+++ b/storage/caching_index_client.pb.go
@@ -9,6 +9,7 @@ import (
 	proto "github.com/gogo/protobuf/proto"
 	io "io"
 	math "math"
+	math_bits "math/bits"
 	reflect "reflect"
 	strings "strings"
 )
@@ -22,7 +23,7 @@ var _ = math.Inf
 // is compatible with the proto package it is being compiled against.
 // A compilation error at this line likely means your copy of the
 // proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 type Entry struct {
 	Column Bytes `protobuf:"bytes,1,opt,name=Column,json=column,proto3,customtype=Bytes" json:"Column"`
@@ -42,7 +43,7 @@ func (m *Entry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Entry.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -84,7 +85,7 @@ func (m *ReadBatch) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_ReadBatch.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -268,7 +269,7 @@ func valueToGoStringCachingIndexClient(v interface{}, typ string) string {
 func (m *Entry) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -276,33 +277,42 @@ func (m *Entry) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Entry) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Column.Size()))
-	n1, err := m.Column.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	{
+		size := m.Value.Size()
+		i -= size
+		if _, err := m.Value.MarshalTo(dAtA[i:]); err != nil {
+			return 0, err
+		}
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(size))
 	}
-	i += n1
+	i--
 	dAtA[i] = 0x12
-	i++
-	i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Value.Size()))
-	n2, err := m.Value.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	{
+		size := m.Column.Size()
+		i -= size
+		if _, err := m.Column.MarshalTo(dAtA[i:]); err != nil {
+			return 0, err
+		}
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(size))
 	}
-	i += n2
-	return i, nil
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *ReadBatch) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -310,49 +320,59 @@ func (m *ReadBatch) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *ReadBatch) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ReadBatch) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Entries) > 0 {
-		for _, msg := range m.Entries {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintCachingIndexClient(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
-			}
-			i += n
-		}
-	}
-	if len(m.Key) > 0 {
-		dAtA[i] = 0x12
-		i++
-		i = encodeVarintCachingIndexClient(dAtA, i, uint64(len(m.Key)))
-		i += copy(dAtA[i:], m.Key)
+	if m.Cardinality != 0 {
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Cardinality))
+		i--
+		dAtA[i] = 0x20
 	}
 	if m.Expiry != 0 {
-		dAtA[i] = 0x18
-		i++
 		i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Expiry))
+		i--
+		dAtA[i] = 0x18
 	}
-	if m.Cardinality != 0 {
-		dAtA[i] = 0x20
-		i++
-		i = encodeVarintCachingIndexClient(dAtA, i, uint64(m.Cardinality))
+	if len(m.Key) > 0 {
+		i -= len(m.Key)
+		copy(dAtA[i:], m.Key)
+		i = encodeVarintCachingIndexClient(dAtA, i, uint64(len(m.Key)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Entries) > 0 {
+		for iNdEx := len(m.Entries) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Entries[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintCachingIndexClient(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func encodeVarintCachingIndexClient(dAtA []byte, offset int, v uint64) int {
+	offset -= sovCachingIndexClient(v)
+	base := offset
 	for v >= 1<<7 {
 		dAtA[offset] = uint8(v&0x7f | 0x80)
 		v >>= 7
 		offset++
 	}
 	dAtA[offset] = uint8(v)
-	return offset + 1
+	return base
 }
 func (m *Entry) Size() (n int) {
 	if m == nil {
@@ -393,14 +413,7 @@ func (m *ReadBatch) Size() (n int) {
 }
 
 func sovCachingIndexClient(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
+	return (math_bits.Len64(x|1) + 6) / 7
 }
 func sozCachingIndexClient(x uint64) (n int) {
 	return sovCachingIndexClient(uint64((x << 1) ^ uint64((int64(x) >> 63))))
@@ -420,8 +433,13 @@ func (this *ReadBatch) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForEntries := "[]Entry{"
+	for _, f := range this.Entries {
+		repeatedStringForEntries += strings.Replace(strings.Replace(f.String(), "Entry", "Entry", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForEntries += "}"
 	s := strings.Join([]string{`&ReadBatch{`,
-		`Entries:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Entries), "Entry", "Entry", 1), `&`, ``, 1) + `,`,
+		`Entries:` + repeatedStringForEntries + `,`,
 		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
 		`Expiry:` + fmt.Sprintf("%v", this.Expiry) + `,`,
 		`Cardinality:` + fmt.Sprintf("%v", this.Cardinality) + `,`,

From 1247427014b34a78d57e34146cb5b529f2e81000 Mon Sep 17 00:00:00 2001
From: Owen Diehl <ow.diehl@gmail.com>
Date: Thu, 20 Feb 2020 23:19:46 -0500
Subject: [PATCH 458/660] Distributing sum queries (#1878)

* querier.sum-shards

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* addresses pr comments

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* instruments frontend sharding, splitby

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* LabelsSeriesID unexported again

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* removes unnecessary codec interface in astmapping

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* simplifies VectorSquasher as we never use matrices
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* combines queryrange series & value files
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* removes noops struct embedding strategy in schema, provides noop impls on all schemas instead
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* NewSubtreeFolder no longer can return an error as it inlines the jsonCodec
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* account for QueryIngestersWithin renaming
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* fixes rebase import collision

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* fixes rebase conflicts
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* -marks absent as non parallelizable

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* upstream promql compatibility changes

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* addresses pr comments
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* import collisions

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* linting - fixes goimports -local requirement

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* fixes merge conflicts

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* addresses pr comments

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* stylistic changes

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* s/downstream/sharded/

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* s/sum_shards/parallelise_shardable_queries/

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* query-audit docs

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* notes sharded parallelizations are only supported by chunk store

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* doc suggestions

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>
---
 chunk_store.go       |  3 ++
 chunk_store_test.go  |  2 ++
 chunk_store_utils.go |  9 ++++--
 schema.go            | 61 ++++++++++++++++++++++++++++++++++++++--
 schema_config.go     | 12 ++++++--
 schema_test.go       | 67 ++++++++++++++++++++++++++++++++++++++++++++
 series_store.go      | 41 +++++++++++++++++++++++++--
 7 files changed, 185 insertions(+), 10 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index a3dd5fcad190a..566c0e89f5573 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -429,6 +429,9 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
+	defer log.Span.Finish()
+
 	var lock sync.Mutex
 	var entries []IndexEntry
 	err := c.index.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 41cd0944deb6b..f7f96ae9271e7 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -77,6 +77,8 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 		tbmConfig TableManagerConfig
 		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
 	)
+	err := schemaCfg.Validate()
+	require.NoError(t, err)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
 	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil)
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index eb6ced986d181..27a5a84fe97cb 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -146,13 +146,13 @@ func (c *Fetcher) worker() {
 // FetchChunks fetches a set of chunks from cache and store. Note that the keys passed in must be
 // lexicographically sorted, while the returned chunks are not in the same order as the passed in chunks.
 func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
-	log, ctx := spanlogger.New(ctx, "ChunkStore.fetchChunks")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.FetchChunks")
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
 	cacheHits, cacheBufs, _ := c.cache.Fetch(ctx, keys)
 
-	fromCache, missing, err := c.processCacheResponse(chunks, cacheHits, cacheBufs)
+	fromCache, missing, err := c.processCacheResponse(ctx, chunks, cacheHits, cacheBufs)
 	if err != nil {
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
@@ -199,12 +199,14 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 
 // ProcessCacheResponse decodes the chunks coming back from the cache, separating
 // hits and misses.
-func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
+func (c *Fetcher) processCacheResponse(ctx context.Context, chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
 	var (
 		requests  = make([]decodeRequest, 0, len(keys))
 		responses = make(chan decodeResponse)
 		missing   []Chunk
 	)
+	log, _ := spanlogger.New(ctx, "Fetcher.processCacheResponse")
+	defer log.Span.Finish()
 
 	i, j := 0, 0
 	for i < len(chunks) && j < len(keys) {
@@ -229,6 +231,7 @@ func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]b
 	for ; i < len(chunks); i++ {
 		missing = append(missing, chunks[i])
 	}
+	level.Debug(log).Log("chunks", len(chunks), "decodeRequests", len(requests), "missing", len(missing))
 
 	go func() {
 		for _, request := range requests {
diff --git a/schema.go b/schema.go
index e52a5eed11530..7a9441e0d736e 100644
--- a/schema.go
+++ b/schema.go
@@ -5,11 +5,16 @@ import (
 	"encoding/hex"
 	"errors"
 	"fmt"
+	"strconv"
 	"strings"
 
+	"github.com/go-kit/kit/log/level"
 	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 const (
@@ -48,6 +53,7 @@ type Schema interface {
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
+	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
 
 	// If the query resulted in series IDs, use this method to find chunks.
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
@@ -218,6 +224,10 @@ func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string,
 	return result, nil
 }
 
+func (s schema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return s.entries.FilterReadQueries(queries, shard)
+}
+
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 	GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
@@ -228,6 +238,7 @@ type entries interface {
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 	GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
+	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
 }
 
 // original entries:
@@ -303,6 +314,10 @@ func (originalEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery,
 	return nil, ErrNotSupported
 }
 
+func (originalEntries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
+}
+
 // v3Schema went to base64 encoded label values & a version ID
 // - range key: <label name>\0<base64(label value)>\0<chunk name>\0<version 1>
 
@@ -422,6 +437,10 @@ func (labelNameInHashKeyEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]I
 	return nil, ErrNotSupported
 }
 
+func (labelNameInHashKeyEntries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
+}
+
 // v5 schema is an extension of v4, with the chunk end time in the
 // range key to improve query latency.  However, it did it wrong
 // so the chunk end times are ignored.
@@ -496,6 +515,10 @@ func (v5Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error
 	return nil, ErrNotSupported
 }
 
+func (v5Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
+}
+
 // v6Entries fixes issues with v5 time encoding being wrong (see #337), and
 // moves label value out of range key (see #199).
 type v6Entries struct{}
@@ -576,10 +599,13 @@ func (v6Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error
 	return nil, ErrNotSupported
 }
 
-// v9Entries adds a layer of indirection between labels -> series -> chunks.
-type v9Entries struct {
+func (v6Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
 }
 
+// v9Entries adds a layer of indirection between labels -> series -> chunks.
+type v9Entries struct{}
+
 func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	return nil, ErrNotSupported
 }
@@ -675,6 +701,10 @@ func (v9Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error
 	return nil, ErrNotSupported
 }
 
+func (v9Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
+}
+
 // v10Entries builds on v9 by sharding index rows to reduce their size.
 type v10Entries struct {
 	rowShards uint32
@@ -784,6 +814,33 @@ func (v10Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, erro
 	return nil, ErrNotSupported
 }
 
+// FilterReadQueries will return only queries that match a certain shard
+func (v10Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) (matches []IndexQuery) {
+	if shard == nil {
+		return queries
+	}
+
+	for _, query := range queries {
+		s := strings.Split(query.HashValue, ":")[0]
+		n, err := strconv.Atoi(s)
+		if err != nil {
+			level.Error(util.Logger).Log(
+				"msg",
+				"Unable to determine shard from IndexQuery",
+				"HashValue",
+				query.HashValue,
+				"schema",
+				"v10",
+			)
+		}
+
+		if err == nil && n == shard.Shard {
+			matches = append(matches, query)
+		}
+	}
+	return matches
+}
+
 // v11Entries builds on v10 but adds index entries for each series to store respective labels.
 type v11Entries struct {
 	v10Entries
diff --git a/schema_config.go b/schema_config.go
index 06312f827bfcf..4d794f7a8ed1d 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -197,10 +197,18 @@ func (cfg *SchemaConfig) Validate() error {
 			return err
 		}
 	}
-
 	return nil
 }
 
+func defaultRowShards(schema string) uint32 {
+	switch schema {
+	case "v1", "v2", "v3", "v4", "v5", "v6", "v9":
+		return 0
+	default:
+		return 16
+	}
+}
+
 // ForEachAfter will call f() on every entry after t, splitting
 // entries if necessary so there is an entry starting at t
 func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
@@ -219,7 +227,7 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() Schema {
-	rowShards := uint32(16)
+	rowShards := defaultRowShards(cfg.Schema)
 	if cfg.RowShards > 0 {
 		rowShards = cfg.RowShards
 	}
diff --git a/schema_test.go b/schema_test.go
index f42b4108768b3..0393b118ee560 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -14,6 +14,8 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 )
 
 type ByHashRangeKey []IndexEntry
@@ -387,5 +389,70 @@ func BenchmarkEncodeLabelsString(b *testing.B) {
 	}
 	b.Log("data size", len(data))
 	b.Log("decode", decoded)
+}
+
+// Ensure all currently defined entries can inhabit the entries interface
+func TestEnsureEntriesInhabitInterface(t *testing.T) {
+	var _ = []entries{
+		originalEntries{},
+		base64Entries{},
+		labelNameInHashKeyEntries{},
+		v5Entries{},
+		v6Entries{},
+		v9Entries{},
+		v10Entries{},
+		v11Entries{},
+	}
+}
+
+func TestV10IndexQueries(t *testing.T) {
+	fromShards := func(n int) (res []IndexQuery) {
+		for i := 0; i < n; i++ {
+			res = append(res, IndexQuery{
+				TableName:       "tbl",
+				HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, "hash", "metric", "label"),
+				RangeValueStart: []byte(string(i)),
+				ValueEqual:      []byte(string(i)),
+			})
+		}
+		return res
+	}
+
+	var testExprs = []struct {
+		name     string
+		queries  []IndexQuery
+		shard    *astmapper.ShardAnnotation
+		expected []IndexQuery
+	}{
+		{
+			name:     "passthrough when no shard specified",
+			queries:  fromShards(2),
+			shard:    nil,
+			expected: fromShards(2),
+		},
+		{
+			name:    "out of bounds shard returns 0 matches",
+			queries: fromShards(2),
+			shard: &astmapper.ShardAnnotation{
+				Shard: 3,
+			},
+			expected: nil,
+		},
+		{
+			name:    "return correct shard",
+			queries: fromShards(3),
+			shard: &astmapper.ShardAnnotation{
+				Shard: 1,
+			},
+			expected: []IndexQuery{fromShards(2)[1]},
+		},
+	}
 
+	for _, c := range testExprs {
+		t.Run(c.name, func(t *testing.T) {
+			s := v10Entries{}
+			filtered := s.FilterReadQueries(c.queries, c.shard)
+			require.Equal(t, c.expected, filtered)
+		})
+	}
 }
diff --git a/series_store.go b/series_store.go
index 03cc64c3a37ca..c4bb30518a5d1 100644
--- a/series_store.go
+++ b/series_store.go
@@ -14,6 +14,7 @@ import (
 	"github.com/weaveworks/common/httpgrpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
@@ -131,6 +132,15 @@ func (c *seriesStore) Get(ctx context.Context, userID string, from, through mode
 		return nil, err
 	}
 
+	// inject artificial __cortex_shard__ labels if present in the query. GetChunkRefs guarantees any chunk refs match the shard.
+	shard, _, err := astmapper.ShardFromMatchers(allMatchers)
+	if err != nil {
+		return nil, err
+	}
+	if shard != nil {
+		injectShardLabels(allChunks, *shard)
+	}
+
 	// Filter out chunks based on the empty matchers in the query.
 	filteredChunks := filterChunksByMatchers(allChunks, allMatchers)
 	return filteredChunks, nil
@@ -252,10 +262,21 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
 
+	// Check if one of the labels is a shard annotation, pass that information to lookupSeriesByMetricNameMatcher,
+	// and remove the label.
+	shard, shardLabelIndex, err := astmapper.ShardFromMatchers(matchers)
+	if err != nil {
+		return nil, err
+	}
+
+	if shard != nil {
+		matchers = append(matchers[:shardLabelIndex], matchers[shardLabelIndex+1:]...)
+	}
+
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
 		indexLookupsPerQuery.Observe(1)
-		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, nil)
+		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, nil, shard)
 		if err != nil {
 			preIntersectionPerQuery.Observe(float64(len(series)))
 			postIntersectionPerQuery.Observe(float64(len(series)))
@@ -269,7 +290,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	indexLookupsPerQuery.Observe(float64(len(matchers)))
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
-			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, matcher)
+			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, matcher, shard)
 			if err != nil {
 				incomingErrors <- err
 				return
@@ -320,7 +341,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	return ids, nil
 }
 
-func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher) ([]string, error) {
+func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, shard *astmapper.ShardAnnotation) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
 	defer log.Span.Finish()
 
@@ -341,6 +362,10 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 	level.Debug(log).Log("queries", len(queries))
 
+	queries = c.schema.FilterReadQueries(queries, shard)
+
+	level.Debug(log).Log("filteredQueries", len(queries))
+
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
 	if e, ok := err.(CardinalityExceededError); ok {
 		e.MetricName = metricName
@@ -509,3 +534,13 @@ func (c *seriesStore) calculateIndexEntries(ctx context.Context, from, through m
 
 	return result, missing, nil
 }
+
+func injectShardLabels(chunks []Chunk, shard astmapper.ShardAnnotation) {
+	for i, chunk := range chunks {
+		b := labels.NewBuilder(chunk.Metric)
+		l := shard.Label()
+		b.Set(l.Name, l.Value)
+		chunk.Metric = b.Labels()
+		chunks[i] = chunk
+	}
+}

From 72cc4acd33aaf7a3780cf3acecdb8e3cbaac7e5c Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 21 Feb 2020 15:20:17 +0530
Subject: [PATCH 459/660] delete chunk and seriesIDs (#2168)

* delete chunk and seriesIDs

add functionality to allow deletion of chunks and seriesIDs(series store only) for given time range

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* some changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed broken test

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_storage_client.go |  10 +
 aws/s3_storage_client.go       |   5 +
 azure/blob_storage_client.go   |   5 +
 cassandra/storage_client.go    |  10 +
 chunk.go                       |   8 +
 chunk_store.go                 | 132 ++++++++++-
 chunk_store_test.go            | 388 ++++++++++++++++++++++++++-------
 composite_store.go             |  21 ++
 composite_store_test.go        |   7 +
 gcp/bigtable_index_client.go   |   5 +
 gcp/bigtable_object_client.go  |   5 +
 gcp/gcs_object_client.go       |   5 +
 inmemory_storage_client.go     | 123 +++++++++--
 local/boltdb_index_client.go   |  50 ++++-
 local/fs_object_client.go      |  16 +-
 objectclient/client.go         |   5 +
 schema.go                      |  80 +++++++
 schema_config.go               |  27 ++-
 schema_config_test.go          | 126 ++++++-----
 series_store.go                |  54 +++++
 storage_client.go              |  11 +
 21 files changed, 916 insertions(+), 177 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 05ad48570e551..0657b7ac27ff5 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -598,6 +598,11 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 	return result, nil
 }
 
+func (a dynamoDBStorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from DynamoDB
+	return chunk.ErrMethodNotImplemented
+}
+
 func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]chunk.Chunk) ([]chunk.Chunk, error) {
 	result := []chunk.Chunk{}
 	decodeContext := chunk.NewDecodeContext()
@@ -749,6 +754,11 @@ func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 	})
 }
 
+func (b dynamoDBWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	// ToDo: implement this to support deleting index entries from DynamoDB
+	panic("DynamoDB does not support Deleting index entries yet")
+}
+
 // Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
 func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
 	outLen, inLen := b.Len(), from.Len()
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 6d12f2b0d3f09..339945cf35d47 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -90,6 +90,11 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 // Stop fulfills the chunk.ObjectClient interface
 func (a *S3ObjectClient) Stop() {}
 
+func (a *S3ObjectClient) DeleteObject(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from S3
+	return chunk.ErrMethodNotImplemented
+}
+
 // bucketFromKey maps a key to a bucket name
 func (a *S3ObjectClient) bucketFromKey(key string) string {
 	if len(a.bucketNames) == 0 {
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 428474cf04c4e..a4310e614bc82 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -182,3 +182,8 @@ func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageO
 
 	return storageObjects, nil
 }
+
+func (b *BlobStorage) DeleteObject(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from Azure BlobStorage
+	return chunk.ErrMethodNotImplemented
+}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index c653dc724095b..b9555a664b1d7 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -223,6 +223,11 @@ func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value [
 	})
 }
 
+func (b *writeBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	// ToDo: implement this to support deleting index entries from Cassandra
+	panic("Cassandra does not support Deleting index entries yet")
+}
+
 // BatchWrite implement chunk.IndexClient.
 func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
 	b := batch.(*writeBatch)
@@ -364,3 +369,8 @@ func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.Decod
 	err = input.Decode(decodeContext, buf)
 	return input, err
 }
+
+func (s *StorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from Cassandra
+	return chunk.ErrMethodNotImplemented
+}
diff --git a/chunk.go b/chunk.go
index 986c8dca5af1b..d52acf4bc4dee 100644
--- a/chunk.go
+++ b/chunk.go
@@ -374,3 +374,11 @@ func (c *Chunk) Slice(from, through model.Time) (*Chunk, error) {
 	nc := NewChunk(c.UserID, c.Fingerprint, c.Metric, pc, from, through)
 	return &nc, nil
 }
+
+func intervalsOverlap(interval1, interval2 model.Interval) bool {
+	if interval1.Start > interval2.End || interval2.Start > interval1.End {
+		return false
+	}
+
+	return true
+}
diff --git a/chunk_store.go b/chunk_store.go
index 566c0e89f5573..f27be924a0872 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -2,7 +2,6 @@ package chunk
 
 import (
 	"context"
-	"errors"
 	"flag"
 	"fmt"
 	"net/http"
@@ -11,6 +10,7 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -26,6 +26,11 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
+var (
+	ErrMetricNameLabelMissing     = errors.New("metric name label missing")
+	ErrParialDeleteChunkNoOverlap = errors.New("interval for partial deletion has not overlap with chunk interval")
+)
+
 var (
 	indexEntriesPerChunk = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
@@ -136,7 +141,7 @@ func (c *store) calculateIndexEntries(userID string, from, through model.Time, c
 
 	metricName := chunk.Metric.Get(labels.MetricName)
 	if metricName == "" {
-		return nil, fmt.Errorf("no MetricNameLabel for chunk")
+		return nil, ErrMetricNameLabelMissing
 	}
 
 	entries, err := c.schema.GetWriteEntries(from, through, userID, metricName, chunk.Metric, chunk.ExternalKey())
@@ -485,3 +490,126 @@ func (c *store) convertChunkIDsToChunks(ctx context.Context, userID string, chun
 
 	return chunkSet, nil
 }
+
+func (c *store) DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error {
+	metricName := metric.Get(model.MetricNameLabel)
+	if metricName == "" {
+		return ErrMetricNameLabelMissing
+	}
+
+	chunkWriteEntries, err := c.schema.GetWriteEntries(from, through, userID, string(metricName), metric, chunkID)
+	if err != nil {
+		return errors.Wrapf(err, "when getting index entries to delete for chunkID=%s", chunkID)
+	}
+
+	return c.deleteChunk(ctx, userID, chunkID, metric, chunkWriteEntries, partiallyDeletedInterval, func(chunk Chunk) error {
+		return c.PutOne(ctx, chunk.From, chunk.Through, chunk)
+	})
+}
+
+func (c *store) deleteChunk(ctx context.Context,
+	userID string,
+	chunkID string,
+	metric labels.Labels,
+	chunkWriteEntries []IndexEntry,
+	partiallyDeletedInterval *model.Interval,
+	putChunkFunc func(chunk Chunk) error) error {
+
+	metricName := metric.Get(model.MetricNameLabel)
+	if metricName == "" {
+		return ErrMetricNameLabelMissing
+	}
+
+	// if chunk is partially deleted, fetch it, slice non-deleted portion and put it to store before deleting original chunk
+	if partiallyDeletedInterval != nil {
+		err := c.reboundChunk(ctx, userID, chunkID, *partiallyDeletedInterval, putChunkFunc)
+		if err != nil {
+			return errors.Wrapf(err, "chunkID=%s", chunkID)
+		}
+	}
+
+	batch := c.index.NewWriteBatch()
+	for i := range chunkWriteEntries {
+		batch.Delete(chunkWriteEntries[i].TableName, chunkWriteEntries[i].HashValue, chunkWriteEntries[i].RangeValue)
+	}
+
+	err := c.index.BatchWrite(ctx, batch)
+	if err != nil {
+		return errors.Wrapf(err, "when deleting index entries for chunkID=%s", chunkID)
+	}
+
+	err = c.chunks.DeleteChunk(ctx, chunkID)
+	if err != nil {
+		if err == ErrStorageObjectNotFound {
+			return nil
+		}
+		return errors.Wrapf(err, "when deleting chunk from storage with chunkID=%s", chunkID)
+	}
+
+	return nil
+}
+
+func (c *store) reboundChunk(ctx context.Context, userID, chunkID string, partiallyDeletedInterval model.Interval, putChunkFunc func(chunk Chunk) error) error {
+	chunk, err := ParseExternalKey(userID, chunkID)
+	if err != nil {
+		return errors.Wrap(err, "when parsing external key")
+	}
+
+	if !intervalsOverlap(model.Interval{Start: chunk.From, End: chunk.Through}, partiallyDeletedInterval) {
+		return ErrParialDeleteChunkNoOverlap
+	}
+
+	chunks, err := c.Fetcher.FetchChunks(ctx, []Chunk{chunk}, []string{chunkID})
+	if err != nil {
+		if err == ErrStorageObjectNotFound {
+			return nil
+		}
+		return errors.Wrap(err, "when fetching chunk from storage for slicing")
+	}
+
+	if len(chunks) != 1 {
+		return fmt.Errorf("expected to get 1 chunk from storage got %d instead", len(chunks))
+	}
+
+	chunk = chunks[0]
+	var newChunks []*Chunk
+	if partiallyDeletedInterval.Start > chunk.From {
+		newChunk, err := chunk.Slice(chunk.From, partiallyDeletedInterval.Start-1)
+		if err != nil && err != ErrSliceNoDataInRange {
+			return errors.Wrapf(err, "when slicing chunk for interval %d - %d", chunk.From, partiallyDeletedInterval.Start-1)
+		}
+
+		if newChunk != nil {
+			newChunks = append(newChunks, newChunk)
+		}
+	}
+
+	if partiallyDeletedInterval.End < chunk.Through {
+		newChunk, err := chunk.Slice(partiallyDeletedInterval.End+1, chunk.Through)
+		if err != nil && err != ErrSliceNoDataInRange {
+			return errors.Wrapf(err, "when slicing chunk for interval %d - %d", partiallyDeletedInterval.End+1, chunk.Through)
+		}
+
+		if newChunk != nil {
+			newChunks = append(newChunks, newChunk)
+		}
+	}
+
+	for _, newChunk := range newChunks {
+		if err := newChunk.Encode(); err != nil {
+			return errors.Wrapf(err, "when encoding new chunk formed after slicing for interval %d - %d", newChunk.From, newChunk.Through)
+		}
+
+		err = putChunkFunc(*newChunk)
+		if err != nil {
+			return errors.Wrapf(err, "when putting new chunk formed after slicing for interval %d - %d", newChunk.From, newChunk.Through)
+		}
+	}
+
+	return nil
+}
+
+func (c *store) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
+	// SeriesID is something which is only used in SeriesStore so we need not do anything here
+	return nil
+}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index f7f96ae9271e7..f3faa766e9b0d 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -8,6 +8,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
@@ -18,27 +19,15 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 type configFactory func() StoreConfig
 
-var schemas = []struct {
-	name              string
-	requireMetricName bool
-}{
-	{"v1", true},
-	{"v2", true},
-	{"v3", true},
-	{"v4", true},
-	{"v5", true},
-	{"v6", true},
-	{"v9", true},
-	{"v10", true},
-	{"v11", true},
-}
+var seriesStoreSchemas = []string{"v9", "v10", "v11"}
+
+var schemas = append([]string{"v1", "v2", "v3", "v4", "v5", "v6"}, seriesStoreSchemas...)
 
 var stores = []struct {
 	name     string
@@ -136,80 +125,53 @@ func TestChunkStore_Get(t *testing.T) {
 	testCases := []struct {
 		query  string
 		expect []Chunk
+		err    string
 	}{
 		{
-			`foo`,
-			[]Chunk{fooChunk1, fooChunk2},
-		},
-		{
-			`foo{flip=""}`,
-			[]Chunk{fooChunk2},
-		},
-		{
-			`foo{bar="baz"}`,
-			[]Chunk{fooChunk1},
-		},
-		{
-			`foo{bar="beep"}`,
-			[]Chunk{fooChunk2},
-		},
-		{
-			`foo{toms="code"}`,
-			[]Chunk{fooChunk1, fooChunk2},
-		},
-		{
-			`foo{bar!="baz"}`,
-			[]Chunk{fooChunk2},
-		},
-		{
-			`foo{bar=~"beep|baz"}`,
-			[]Chunk{fooChunk1, fooChunk2},
-		},
-		{
-			`foo{toms="code", bar=~"beep|baz"}`,
-			[]Chunk{fooChunk1, fooChunk2},
+			query:  `foo`,
+			expect: []Chunk{fooChunk1, fooChunk2},
 		},
 		{
-			`foo{toms="code", bar="baz"}`,
-			[]Chunk{fooChunk1},
+			query:  `foo{flip=""}`,
+			expect: []Chunk{fooChunk2},
 		},
 		{
-			`{__name__=~"foo"}`,
-			[]Chunk{fooChunk1, fooChunk2},
+			query:  `foo{bar="baz"}`,
+			expect: []Chunk{fooChunk1},
 		},
 		{
-			`{__name__=~"foobar"}`,
-			[]Chunk{},
+			query:  `foo{bar="beep"}`,
+			expect: []Chunk{fooChunk2},
 		},
 		{
-			`{__name__=~"fo.*"}`,
-			[]Chunk{fooChunk1, fooChunk2},
+			query:  `foo{toms="code"}`,
+			expect: []Chunk{fooChunk1, fooChunk2},
 		},
 		{
-			`{__name__=~"foo", toms="code"}`,
-			[]Chunk{fooChunk1, fooChunk2},
+			query:  `foo{bar!="baz"}`,
+			expect: []Chunk{fooChunk2},
 		},
 		{
-			`{__name__!="foo", toms="code"}`,
-			[]Chunk{barChunk2},
+			query:  `foo{bar=~"beep|baz"}`,
+			expect: []Chunk{fooChunk1, fooChunk2},
 		},
 		{
-			`{__name__!="bar", toms="code"}`,
-			[]Chunk{fooChunk1, fooChunk2},
+			query:  `foo{toms="code", bar=~"beep|baz"}`,
+			expect: []Chunk{fooChunk1, fooChunk2},
 		},
 		{
-			`{__name__=~"bar", bar="baz"}`,
-			[]Chunk{barChunk1, barChunk2},
+			query:  `foo{toms="code", bar="baz"}`,
+			expect: []Chunk{fooChunk1},
 		},
 		{
-			`{__name__=~"bar", bar="baz",toms!="code"}`,
-			[]Chunk{barChunk1},
+			query: `{__name__=~"foo"}`,
+			err:   "rpc error: code = Code(400) desc = query must contain metric name",
 		},
 	}
 	for _, schema := range schemas {
 		for _, storeCase := range stores {
 			storeCfg := storeCase.configFn()
-			store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+			store := newTestChunkStoreConfig(t, schema, storeCfg)
 			defer store.Stop()
 
 			if err := store.Put(ctx, []Chunk{
@@ -222,20 +184,20 @@ func TestChunkStore_Get(t *testing.T) {
 			}
 
 			for _, tc := range testCases {
-				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
-					t.Log("========= Running query", tc.query, "with schema", schema.name)
+				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running query", tc.query, "with schema", schema)
 					matchers, err := promql.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)
 					}
 
-					metricNameMatcher, _, ok := extract.MetricNameMatcherFromMatchers(matchers)
-					if schema.requireMetricName && (!ok || metricNameMatcher.Type != labels.MatchEqual) {
-						return
-					}
-
 					// Query with ordinary time-range
 					chunks1, err := store.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
+					if tc.err != "" {
+						require.Error(t, err)
+						require.Equal(t, tc.err, err.Error())
+						return
+					}
 					require.NoError(t, err)
 					if !reflect.DeepEqual(tc.expect, chunks1) {
 						t.Fatalf("%s: wrong chunks - %s", tc.query, test.Diff(tc.expect, chunks1))
@@ -327,10 +289,10 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 	} {
 		for _, schema := range schemas {
 			for _, storeCase := range stores {
-				t.Run(fmt.Sprintf("%s / %s / %s / %s", tc.metricName, tc.labelName, schema.name, storeCase.name), func(t *testing.T) {
-					t.Log("========= Running labelValues with metricName", tc.metricName, "with labelName", tc.labelName, "with schema", schema.name)
+				t.Run(fmt.Sprintf("%s / %s / %s / %s", tc.metricName, tc.labelName, schema, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running labelValues with metricName", tc.metricName, "with labelName", tc.labelName, "with schema", schema)
 					storeCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+					store := newTestChunkStoreConfig(t, schema, storeCfg)
 					defer store.Stop()
 
 					if err := store.Put(ctx, []Chunk{
@@ -428,10 +390,10 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 	} {
 		for _, schema := range schemas {
 			for _, storeCase := range stores {
-				t.Run(fmt.Sprintf("%s / %s / %s ", tc.metricName, schema.name, storeCase.name), func(t *testing.T) {
-					t.Log("========= Running labelNames with metricName", tc.metricName, "with schema", schema.name)
+				t.Run(fmt.Sprintf("%s / %s / %s ", tc.metricName, schema, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running labelNames with metricName", tc.metricName, "with schema", schema)
 					storeCfg := storeCase.configFn()
-					store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+					store := newTestChunkStoreConfig(t, schema, storeCfg)
 					defer store.Stop()
 
 					if err := store.Put(ctx, []Chunk{
@@ -535,7 +497,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	for _, schema := range schemas {
 		for _, storeCase := range stores {
 			storeCfg := storeCase.configFn()
-			store := newTestChunkStoreConfig(t, schema.name, storeCfg)
+			store := newTestChunkStoreConfig(t, schema, storeCfg)
 			defer store.Stop()
 
 			if err := store.Put(ctx, []Chunk{chunk1, chunk2}); err != nil {
@@ -543,8 +505,8 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			}
 
 			for _, tc := range testCases {
-				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema.name, storeCase.name), func(t *testing.T) {
-					t.Log("========= Running query", tc.query, "with schema", schema.name)
+				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema, storeCase.name), func(t *testing.T) {
+					t.Log("========= Running query", tc.query, "with schema", schema)
 					matchers, err := promql.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)
@@ -574,8 +536,8 @@ func TestChunkStoreRandom(t *testing.T) {
 	ctx := context.Background()
 
 	for _, schema := range schemas {
-		t.Run(schema.name, func(t *testing.T) {
-			store := newTestChunkStore(t, schema.name)
+		t.Run(schema, func(t *testing.T) {
+			store := newTestChunkStore(t, schema)
 			defer store.Stop()
 
 			// put 100 chunks from 0 to 99
@@ -786,8 +748,8 @@ func TestChunkStoreError(t *testing.T) {
 		},
 	} {
 		for _, schema := range schemas {
-			t.Run(fmt.Sprintf("%s / %s", tc.query, schema.name), func(t *testing.T) {
-				store := newTestChunkStore(t, schema.name)
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema), func(t *testing.T) {
+				store := newTestChunkStore(t, schema)
 				defer store.Stop()
 
 				matchers, err := promql.ParseMetricSelector(tc.query)
@@ -899,3 +861,261 @@ func generateIndexEntries(n int64) []IndexEntry {
 	}
 	return res
 }
+
+func getNonDeletedIntervals(originalInterval, deletedInterval model.Interval) []model.Interval {
+	if !intervalsOverlap(originalInterval, deletedInterval) {
+		return []model.Interval{originalInterval}
+	}
+
+	nonDeletedIntervals := []model.Interval{}
+	if deletedInterval.Start > originalInterval.Start {
+		nonDeletedIntervals = append(nonDeletedIntervals, model.Interval{Start: originalInterval.Start, End: deletedInterval.Start - 1})
+	}
+
+	if deletedInterval.End < originalInterval.End {
+		nonDeletedIntervals = append(nonDeletedIntervals, model.Interval{Start: deletedInterval.End + 1, End: originalInterval.End})
+	}
+
+	return nonDeletedIntervals
+}
+
+func TestStore_DeleteChunk(t *testing.T) {
+	ctx := context.Background()
+
+	metric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+	}
+
+	metric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz2"},
+	}
+
+	metric3 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz3"},
+	}
+
+	fooChunk1 := dummyChunkForEncoding(model.Now(), metric1, encoding.Varbit, 200)
+	err := fooChunk1.Encode()
+	require.NoError(t, err)
+
+	fooChunk2 := dummyChunkForEncoding(model.Now(), metric2, encoding.Varbit, 200)
+	err = fooChunk2.Encode()
+	require.NoError(t, err)
+
+	nonExistentChunk := dummyChunkForEncoding(model.Now(), metric3, encoding.Varbit, 200)
+
+	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, tc := range []struct {
+		name                           string
+		chunks                         []Chunk
+		chunkToDelete                  Chunk
+		partialDeleteInterval          *model.Interval
+		err                            error
+		numChunksToExpectAfterDeletion int
+	}{
+		{
+			name:                           "delete whole chunk",
+			chunkToDelete:                  fooChunk1,
+			numChunksToExpectAfterDeletion: 1,
+		},
+		{
+			name:                           "delete chunk partially at start",
+			chunkToDelete:                  fooChunk1,
+			partialDeleteInterval:          &model.Interval{Start: fooChunk1.From, End: fooChunk1.From.Add(30 * time.Minute)},
+			numChunksToExpectAfterDeletion: 2,
+		},
+		{
+			name:                           "delete chunk partially at end",
+			chunkToDelete:                  fooChunk1,
+			partialDeleteInterval:          &model.Interval{Start: fooChunk1.Through.Add(-30 * time.Minute), End: fooChunk1.Through},
+			numChunksToExpectAfterDeletion: 2,
+		},
+		{
+			name:                           "delete chunk partially in the middle",
+			chunkToDelete:                  fooChunk1,
+			partialDeleteInterval:          &model.Interval{Start: fooChunk1.From.Add(15 * time.Minute), End: fooChunk1.Through.Add(-15 * time.Minute)},
+			numChunksToExpectAfterDeletion: 3,
+		},
+		{
+			name:                           "delete non-existent chunk",
+			chunkToDelete:                  nonExistentChunk,
+			numChunksToExpectAfterDeletion: 2,
+		},
+		{
+			name:                           "delete first second",
+			chunkToDelete:                  fooChunk1,
+			partialDeleteInterval:          &model.Interval{Start: fooChunk1.From, End: fooChunk1.From},
+			numChunksToExpectAfterDeletion: 2,
+		},
+		{
+			name:                           "delete chunk out of range",
+			chunkToDelete:                  fooChunk1,
+			partialDeleteInterval:          &model.Interval{Start: fooChunk1.Through.Add(time.Minute), End: fooChunk1.Through.Add(10 * time.Minute)},
+			numChunksToExpectAfterDeletion: 2,
+			err:                            errors.Wrapf(ErrParialDeleteChunkNoOverlap, "chunkID=%s", fooChunk1.ExternalKey()),
+		},
+	} {
+		for _, schema := range schemas {
+			t.Run(fmt.Sprintf("%s / %s", schema, tc.name), func(t *testing.T) {
+				store := newTestChunkStore(t, schema)
+				defer store.Stop()
+
+				// inserting 2 chunks with different labels but same metric name
+				err = store.Put(ctx, []Chunk{fooChunk1, fooChunk2})
+				require.NoError(t, err)
+
+				// we expect to get 2 chunks back using just metric name matcher
+				chunks, err := store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), fooMetricNameMatcher...)
+				require.NoError(t, err)
+				require.Equal(t, 2, len(chunks))
+
+				err = store.DeleteChunk(ctx, tc.chunkToDelete.From, tc.chunkToDelete.Through, userID,
+					tc.chunkToDelete.ExternalKey(), tc.chunkToDelete.Metric, tc.partialDeleteInterval)
+
+				if tc.err != nil {
+					require.Error(t, err)
+					require.Equal(t, tc.err.Error(), err.Error())
+
+					// we expect to get same results back if delete operation is expected to fail
+					chunks, err := store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), fooMetricNameMatcher...)
+					require.NoError(t, err)
+
+					require.Equal(t, 2, len(chunks))
+
+					return
+				}
+				require.NoError(t, err)
+
+				matchersForDeletedChunk, err := promql.ParseMetricSelector(tc.chunkToDelete.Metric.String())
+				require.NoError(t, err)
+
+				var nonDeletedIntervals []model.Interval
+
+				if tc.partialDeleteInterval != nil {
+					nonDeletedIntervals = getNonDeletedIntervals(model.Interval{
+						Start: tc.chunkToDelete.From,
+						End:   tc.chunkToDelete.Through,
+					}, *tc.partialDeleteInterval)
+				}
+
+				// we expect to get 1 non deleted chunk + new chunks that were created (if any) after partial deletion
+				chunks, err = store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), fooMetricNameMatcher...)
+				require.NoError(t, err)
+				require.Equal(t, tc.numChunksToExpectAfterDeletion, len(chunks))
+
+				chunks, err = store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), matchersForDeletedChunk...)
+				require.NoError(t, err)
+				require.Equal(t, len(nonDeletedIntervals), len(chunks))
+
+				// comparing intervals of new chunks that were created after partial deletion
+				for i, nonDeletedInterval := range nonDeletedIntervals {
+					require.Equal(t, chunks[i].From, nonDeletedInterval.Start)
+					require.Equal(t, chunks[i].Through, nonDeletedInterval.End)
+				}
+			})
+		}
+	}
+}
+
+func TestStore_DeleteSeriesIDs(t *testing.T) {
+	ctx := context.Background()
+	metric1 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz"},
+	}
+
+	metric2 := labels.Labels{
+		{Name: labels.MetricName, Value: "foo"},
+		{Name: "bar", Value: "baz2"},
+	}
+
+	matchers, err := promql.ParseMetricSelector(`foo`)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, schema := range seriesStoreSchemas {
+		t.Run(schema, func(t *testing.T) {
+			store := newTestChunkStore(t, schema)
+			defer store.Stop()
+
+			seriesStore := store.(CompositeStore).stores[0].Store.(*seriesStore)
+
+			fooChunk1 := dummyChunkForEncoding(model.Now(), metric1, encoding.Varbit, 200)
+			err := fooChunk1.Encode()
+			require.NoError(t, err)
+
+			fooChunk2 := dummyChunkForEncoding(model.Now(), metric2, encoding.Varbit, 200)
+			err = fooChunk2.Encode()
+			require.NoError(t, err)
+
+			err = store.Put(ctx, []Chunk{fooChunk1, fooChunk2})
+			require.NoError(t, err)
+
+			// we expect to have 2 series IDs in index for the chunks that were added above
+			seriesIDs, err := seriesStore.lookupSeriesByMetricNameMatcher(ctx, model.Now().Add(-time.Hour), model.Now(),
+				userID, "foo", nil, nil)
+			require.NoError(t, err)
+			require.Equal(t, 2, len(seriesIDs))
+
+			// we expect to have 2 chunks in store that were added above
+			chunks, err := store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), matchers...)
+			require.NoError(t, err)
+			require.Equal(t, 2, len(chunks))
+
+			// lets try deleting series ID without deleting the chunk
+			err = store.DeleteSeriesIDs(ctx, fooChunk1.From, fooChunk1.Through, userID, fooChunk1.Metric)
+			require.NoError(t, err)
+
+			// series IDs should still be there since chunks for them still exist
+			seriesIDs, err = seriesStore.lookupSeriesByMetricNameMatcher(ctx, model.Now().Add(-time.Hour), model.Now(),
+				userID, "foo", nil, nil)
+			require.NoError(t, err)
+			require.Equal(t, 2, len(seriesIDs))
+
+			// lets delete a chunk and then delete its series ID
+			err = store.DeleteChunk(ctx, fooChunk1.From, fooChunk1.Through, userID, fooChunk1.ExternalKey(), metric1, nil)
+			require.NoError(t, err)
+
+			err = store.DeleteSeriesIDs(ctx, fooChunk1.From, fooChunk1.Through, userID, fooChunk1.Metric)
+			require.NoError(t, err)
+
+			// there should be only be 1 chunk and 1 series ID left for it
+			chunks, err = store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), matchers...)
+			require.NoError(t, err)
+			require.Equal(t, 1, len(chunks))
+
+			seriesIDs, err = seriesStore.lookupSeriesByMetricNameMatcher(ctx, model.Now().Add(-time.Hour), model.Now(),
+				userID, "foo", nil, nil)
+			require.NoError(t, err)
+			require.Equal(t, 1, len(seriesIDs))
+			require.Equal(t, string(labelsSeriesID(fooChunk2.Metric)), seriesIDs[0])
+
+			// lets delete the other chunk partially and try deleting the series ID
+			err = store.DeleteChunk(ctx, fooChunk2.From, fooChunk2.Through, userID, fooChunk2.ExternalKey(), metric2,
+				&model.Interval{Start: fooChunk2.From, End: fooChunk2.From.Add(30 * time.Minute)})
+			require.NoError(t, err)
+
+			err = store.DeleteSeriesIDs(ctx, fooChunk1.From, fooChunk1.Through, userID, fooChunk1.Metric)
+			require.NoError(t, err)
+
+			// partial deletion should have left another chunk and a series ID in store
+			chunks, err = store.Get(ctx, userID, model.Now().Add(-time.Hour), model.Now(), matchers...)
+			require.NoError(t, err)
+			require.Equal(t, 1, len(chunks))
+
+			seriesIDs, err = seriesStore.lookupSeriesByMetricNameMatcher(ctx, model.Now().Add(-time.Hour), model.Now(),
+				userID, "foo", nil, nil)
+			require.NoError(t, err)
+			require.Equal(t, 1, len(seriesIDs))
+			require.Equal(t, string(labelsSeriesID(fooChunk2.Metric)), seriesIDs[0])
+		})
+	}
+}
diff --git a/composite_store.go b/composite_store.go
index 4f735afcb7fb1..fdb30d339fa32 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -25,6 +25,12 @@ type Store interface {
 	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
 	LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error)
 	LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error)
+
+	// DeleteChunk deletes a chunks index entry and then deletes the actual chunk from chunk storage.
+	// It takes care of chunks which are deleting partially by creating and inserting a new chunk first and then deleting the original chunk
+	DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error
+	// DeleteSeriesIDs is only relevant for SeriesStore.
+	DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error
 	Stop()
 }
 
@@ -142,6 +148,21 @@ func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, t
 	return chunkIDs, fetchers, err
 }
 
+// DeleteSeriesIDs deletes series IDs from index in series store
+func (c CompositeStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
+	return c.forStores(from, through, func(from, through model.Time, store Store) error {
+		return store.DeleteSeriesIDs(ctx, from, through, userID, metric)
+	})
+}
+
+// DeleteChunk deletes a chunks index entry and then deletes the actual chunk from chunk storage.
+// It takes care of chunks which are deleting partially by creating and inserting a new chunk first and then deleting the original chunk
+func (c CompositeStore) DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error {
+	return c.forStores(from, through, func(from, through model.Time, store Store) error {
+		return store.DeleteChunk(ctx, from, through, userID, chunkID, metric, partiallyDeletedInterval)
+	})
+}
+
 func (c compositeStore) Stop() {
 	for _, store := range c.stores {
 		store.Stop()
diff --git a/composite_store_test.go b/composite_store_test.go
index f0e1344b53cbc..60bd5eb0c74f4 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -36,6 +36,13 @@ func (m mockStore) LabelNamesForMetricName(ctx context.Context, userID string, f
 	return nil, nil
 }
 
+func (m mockStore) DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error {
+	return nil
+}
+func (m mockStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
+	return nil
+}
+
 func (m mockStore) Stop() {}
 
 func TestCompositeStore(t *testing.T) {
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index ec305577b8e8c..7b1f949b7cdef 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -169,6 +169,11 @@ func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 	mutation.Set(columnFamily, columnKey, 0, value)
 }
 
+func (b bigtableWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	// ToDo: implement this to support deleting index entries from Bigtable
+	panic("Bigtable does not support Deleting index entries yet")
+}
+
 func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
 	bigtableBatch := batch.(bigtableWriteBatch)
 
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index 31b6670d8f404..46fbe2c2da9b0 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -160,3 +160,8 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 
 	return output, nil
 }
+
+func (s *bigtableObjectClient) DeleteChunk(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from Bigtable
+	return chunk.ErrMethodNotImplemented
+}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 040dd3effcd5a..4785e1951a54f 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -127,3 +127,8 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stor
 
 	return storageObjects, nil
 }
+
+func (s *GCSObjectClient) DeleteObject(ctx context.Context, chunkID string) error {
+	// ToDo: implement this to support deleting chunks from GCS
+	return chunk.ErrMethodNotImplemented
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 9456d13649a00..80c17678b928c 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -4,6 +4,8 @@ import (
 	"bytes"
 	"context"
 	"fmt"
+	"io"
+	"io/ioutil"
 	"sort"
 	"sync"
 
@@ -135,9 +137,9 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 	mockBatch := *batch.(*mockWriteBatch)
 	seenWrites := map[string]bool{}
 
-	m.numWrites += len(mockBatch)
+	m.numWrites += len(mockBatch.inserts)
 
-	for _, req := range mockBatch {
+	for _, req := range mockBatch.inserts {
 		table, ok := m.tables[req.tableName]
 		if !ok {
 			return fmt.Errorf("table not found")
@@ -162,15 +164,9 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 			items = append(items, mockItem{})
 			copy(items[i+1:], items[i:])
 		} else {
-			// Return error if duplicate write and not metric name entry or series entry
-			itemComponents := decodeRangeKey(items[i].rangeValue)
-			keyType := itemComponents[3][0]
-			if keyType != metricNameRangeKeyV1 &&
-				keyType != seriesRangeKeyV1 &&
-				keyType != labelNamesRangeKeyV1 &&
-				keyType != labelSeriesRangeKeyV1 {
-				return fmt.Errorf("Dupe write")
-			}
+			// if duplicate write then just update the value
+			items[i].value = req.value
+			continue
 		}
 		items[i] = mockItem{
 			rangeValue: req.rangeValue,
@@ -179,6 +175,31 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 
 		table.items[req.hashValue] = items
 	}
+
+	for _, req := range mockBatch.deletes {
+		table, ok := m.tables[req.tableName]
+		if !ok {
+			return fmt.Errorf("table not found")
+		}
+
+		items := table.items[req.hashValue]
+
+		i := sort.Search(len(items), func(i int) bool {
+			return bytes.Compare(items[i].rangeValue, req.rangeValue) >= 0
+		})
+
+		if i >= len(items) || !bytes.Equal(items[i].rangeValue, req.rangeValue) {
+			continue
+		}
+
+		if len(items) == 1 {
+			items = nil
+		} else {
+			items = items[:i+copy(items[i:], items[i+1:])]
+		}
+
+		table.items[req.hashValue] = items
+	}
 	return nil
 }
 
@@ -301,7 +322,7 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 		key := chunk.ExternalKey()
 		buf, ok := m.objects[key]
 		if !ok {
-			return nil, fmt.Errorf("%v not found", key)
+			return nil, ErrStorageObjectNotFound
 		}
 		if err := chunk.Decode(decodeContext, buf); err != nil {
 			return nil, err
@@ -311,14 +332,82 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 	return result, nil
 }
 
-type mockWriteBatch []struct {
-	tableName, hashValue string
-	rangeValue           []byte
-	value                []byte
+// DeleteChunk implements StorageClient.
+func (m *MockStorage) DeleteChunk(ctx context.Context, chunkID string) error {
+	return m.DeleteObject(ctx, chunkID)
+}
+
+func (m *MockStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	buf, ok := m.objects[objectKey]
+	if !ok {
+		return nil, ErrStorageObjectNotFound
+	}
+
+	return ioutil.NopCloser(bytes.NewReader(buf)), nil
+}
+
+func (m *MockStorage) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	buf, err := ioutil.ReadAll(object)
+	if err != nil {
+		return err
+	}
+
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	m.objects[objectKey] = buf
+	return nil
+}
+
+func (m *MockStorage) DeleteObject(ctx context.Context, objectKey string) error {
+	m.mtx.Lock()
+	defer m.mtx.Unlock()
+
+	if _, ok := m.objects[objectKey]; !ok {
+		return ErrStorageObjectNotFound
+	}
+
+	delete(m.objects, objectKey)
+	return nil
+}
+
+func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject, error) {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	storageObjects := make([]StorageObject, 0, len(m.objects))
+	for key := range m.objects {
+		// ToDo: Store mtime when we have mtime based use-cases for storage objects
+		storageObjects = append(storageObjects, StorageObject{Key: key})
+	}
+
+	return storageObjects, nil
+}
+
+type mockWriteBatch struct {
+	inserts []struct {
+		tableName, hashValue string
+		rangeValue           []byte
+		value                []byte
+	}
+	deletes []struct {
+		tableName, hashValue string
+		rangeValue           []byte
+	}
+}
+
+func (b *mockWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	b.deletes = append(b.deletes, struct {
+		tableName, hashValue string
+		rangeValue           []byte
+	}{tableName: tableName, hashValue: hashValue, rangeValue: rangeValue})
 }
 
 func (b *mockWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
-	*b = append(*b, struct {
+	b.inserts = append(b.inserts, struct {
 		tableName, hashValue string
 		rangeValue           []byte
 		value                []byte
diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 4bad3d2f9e594..b9aadd338219c 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"errors"
 	"flag"
+	"fmt"
 	"os"
 	"path"
 	"sync"
@@ -127,7 +128,8 @@ func (b *BoltIndexClient) Stop() {
 
 func (b *BoltIndexClient) NewWriteBatch() chunk.WriteBatch {
 	return &boltWriteBatch{
-		tables: map[string]map[string][]byte{},
+		puts:    map[string]map[string][]byte{},
+		deletes: map[string]map[string]struct{}{},
 	}
 }
 
@@ -170,7 +172,8 @@ func (b *BoltIndexClient) GetDB(name string, operation int) (*bbolt.DB, error) {
 }
 
 func (b *BoltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
-	for table, kvps := range batch.(*boltWriteBatch).tables {
+	// ToDo: too much code duplication, refactor this
+	for table, kvps := range batch.(*boltWriteBatch).puts {
 		db, err := b.GetDB(table, DBOperationWrite)
 		if err != nil {
 			return err
@@ -193,6 +196,31 @@ func (b *BoltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch
 			return err
 		}
 	}
+
+	for table, kvps := range batch.(*boltWriteBatch).deletes {
+		db, err := b.GetDB(table, DBOperationWrite)
+		if err != nil {
+			return err
+		}
+
+		if err := db.Update(func(tx *bbolt.Tx) error {
+			b := tx.Bucket(bucketName)
+			if b == nil {
+				return fmt.Errorf("Bucket %s not found in table %s", bucketName, table)
+			}
+
+			for key := range kvps {
+				if err := b.Delete([]byte(key)); err != nil {
+					return err
+				}
+			}
+
+			return nil
+		}); err != nil {
+			return err
+		}
+	}
+
 	return nil
 }
 
@@ -258,14 +286,26 @@ func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk
 }
 
 type boltWriteBatch struct {
-	tables map[string]map[string][]byte
+	puts    map[string]map[string][]byte
+	deletes map[string]map[string]struct{}
+}
+
+func (b *boltWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	table, ok := b.deletes[tableName]
+	if !ok {
+		table = map[string]struct{}{}
+		b.deletes[tableName] = table
+	}
+
+	key := hashValue + separator + string(rangeValue)
+	table[key] = struct{}{}
 }
 
 func (b *boltWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
-	table, ok := b.tables[tableName]
+	table, ok := b.puts[tableName]
 	if !ok {
 		table = map[string][]byte{}
-		b.tables[tableName] = table
+		b.puts[tableName] = table
 	}
 
 	key := hashValue + separator + string(rangeValue)
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 0f4e33443ff0a..a10bd297648ab 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -53,7 +53,12 @@ func (FSObjectClient) Stop() {}
 
 // GetObject from the store
 func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
-	return os.Open(path.Join(f.cfg.Directory, objectKey))
+	fl, err := os.Open(path.Join(f.cfg.Directory, objectKey))
+	if err != nil && os.IsNotExist(err) {
+		return nil, chunk.ErrStorageObjectNotFound
+	}
+
+	return fl, err
 }
 
 // PutObject into the store
@@ -106,6 +111,15 @@ func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 	return storageObjects, nil
 }
 
+func (f *FSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
+	err := os.Remove(path.Join(f.cfg.Directory, objectKey))
+	if err != nil && os.IsNotExist(err) {
+		return chunk.ErrStorageObjectNotFound
+	}
+
+	return err
+}
+
 // DeleteChunksBefore implements BucketClient
 func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
 	return filepath.Walk(f.cfg.Directory, func(path string, info os.FileInfo, err error) error {
diff --git a/objectclient/client.go b/objectclient/client.go
index 318ff29d6807a..70b2ff1128bde 100644
--- a/objectclient/client.go
+++ b/objectclient/client.go
@@ -108,3 +108,8 @@ func (o *Client) getChunk(ctx context.Context, decodeContext *chunk.DecodeContex
 	}
 	return c, nil
 }
+
+// GetChunks retrieves the specified chunks from the configured backend
+func (o *Client) DeleteChunk(ctx context.Context, chunkID string) error {
+	return o.store.DeleteObject(ctx, chunkID)
+}
diff --git a/schema.go b/schema.go
index 7a9441e0d736e..c7af79a2763ca 100644
--- a/schema.go
+++ b/schema.go
@@ -38,6 +38,8 @@ var (
 	ErrNotSupported = errors.New("not supported")
 )
 
+type hasChunksForIntervalFunc func(userID, seriesID string, from, through model.Time) (bool, error)
+
 // Schema interface defines methods to calculate the hash and range keys needed
 // to write or read chunks from the external index.
 type Schema interface {
@@ -59,6 +61,12 @@ type Schema interface {
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
 	// Returns queries to retrieve all label names of multiple series by id.
 	GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
+
+	// GetSeriesDeleteEntries returns IndexEntry's for deleting SeriesIDs from SeriesStore.
+	// Since SeriesIDs are created per bucket, it makes sure that we don't include series entries which are in use by verifying using hasChunksForIntervalFunc i.e
+	// It checks first and last buckets covered by the time interval to see if a SeriesID still has chunks in the store,
+	// if yes then it doesn't include IndexEntry's for that bucket for deletion.
+	GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error)
 }
 
 // IndexQuery describes a query for entries
@@ -210,6 +218,78 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 	return result, nil
 }
 
+// GetSeriesDeleteEntries returns IndexEntry's for deleting SeriesIDs from SeriesStore.
+// Since SeriesIDs are created per bucket, it makes sure that we don't include series entries which are in use by verifying using hasChunksForIntervalFunc i.e
+// It checks first and last buckets covered by the time interval to see if a SeriesID still has chunks in the store,
+// if yes then it doesn't include IndexEntry's for that bucket for deletion.
+func (s schema) GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error) {
+	metricName := metric.Get(model.MetricNameLabel)
+	if metricName == "" {
+		return nil, ErrMetricNameLabelMissing
+	}
+
+	buckets := s.buckets(from, through, userID)
+	if len(buckets) == 0 {
+		return nil, nil
+	}
+
+	seriesID := string(labelsSeriesID(metric))
+
+	// Only first and last buckets needs to be checked for in-use series ids.
+	// Only partially deleted first/last deleted bucket needs to be checked otherwise
+	// not since whole bucket is anyways considered for deletion.
+
+	// Bucket times are relative to the bucket i.e for a per-day bucket
+	// bucket.from would be the number of milliseconds elapsed since the start of that day.
+	// If bucket.from is not 0, it means the from param doesn't align with the start of the bucket.
+	if buckets[0].from != 0 {
+		bucketStartTime := from - model.Time(buckets[0].from)
+		hasChunks, err := hasChunksForIntervalFunc(userID, seriesID, bucketStartTime, bucketStartTime+model.Time(buckets[0].bucketSize)-1)
+		if err != nil {
+			return nil, err
+		}
+
+		if hasChunks {
+			buckets = buckets[1:]
+			if len(buckets) == 0 {
+				return nil, nil
+			}
+		}
+	}
+
+	lastBucket := buckets[len(buckets)-1]
+
+	// Similar to bucket.from, bucket.through here is also relative i.e for a per-day bucket
+	// through would be the number of milliseconds elapsed since the start of that day
+	// If bucket.through is not equal to max size of bucket, it means the through param doesn't align with the end of the bucket.
+	if lastBucket.through != lastBucket.bucketSize {
+		bucketStartTime := through - model.Time(lastBucket.through)
+		hasChunks, err := hasChunksForIntervalFunc(userID, seriesID, bucketStartTime, bucketStartTime+model.Time(lastBucket.bucketSize)-1)
+		if err != nil {
+			return nil, err
+		}
+
+		if hasChunks {
+			buckets = buckets[:len(buckets)-1]
+			if len(buckets) == 0 {
+				return nil, nil
+			}
+		}
+	}
+
+	var result []IndexEntry
+
+	for _, bucket := range buckets {
+		entries, err := s.entries.GetLabelWriteEntries(bucket, metricName, metric, "")
+		if err != nil {
+			return nil, err
+		}
+		result = append(result, entries...)
+	}
+
+	return result, nil
+}
+
 func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
 	var result []IndexQuery
 
diff --git a/schema_config.go b/schema_config.go
index 4d794f7a8ed1d..8ef17395f73c4 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -328,10 +328,11 @@ func (cfg SchemaConfig) PrintYaml() {
 
 // Bucket describes a range of time with a tableName and hashKey
 type Bucket struct {
-	from      uint32
-	through   uint32
-	tableName string
-	hashKey   string
+	from       uint32
+	through    uint32
+	tableName  string
+	hashKey    string
+	bucketSize uint32 // helps with deletion of series ids in series store. Size in milliseconds.
 }
 
 func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string) []Bucket {
@@ -345,10 +346,11 @@ func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string)
 		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInHour))
 		relativeThrough := util.Min64(millisecondsInHour, int64(through)-(i*millisecondsInHour))
 		result = append(result, Bucket{
-			from:      uint32(relativeFrom),
-			through:   uint32(relativeThrough),
-			tableName: cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInHour)),
-			hashKey:   fmt.Sprintf("%s:%d", userID, i),
+			from:       uint32(relativeFrom),
+			through:    uint32(relativeThrough),
+			tableName:  cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInHour)),
+			hashKey:    fmt.Sprintf("%s:%d", userID, i),
+			bucketSize: uint32(millisecondsInHour), // helps with deletion of series ids in series store
 		})
 	}
 	return result
@@ -375,10 +377,11 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInDay))
 		relativeThrough := util.Min64(millisecondsInDay, int64(through)-(i*millisecondsInDay))
 		result = append(result, Bucket{
-			from:      uint32(relativeFrom),
-			through:   uint32(relativeThrough),
-			tableName: cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInDay)),
-			hashKey:   fmt.Sprintf("%s:d%d", userID, i),
+			from:       uint32(relativeFrom),
+			through:    uint32(relativeThrough),
+			tableName:  cfg.IndexTables.TableFor(model.TimeFromUnix(i * secondsInDay)),
+			hashKey:    fmt.Sprintf("%s:d%d", userID, i),
+			bucketSize: uint32(millisecondsInDay), // helps with deletion of series ids in series store
 		})
 	}
 	return result
diff --git a/schema_config_test.go b/schema_config_test.go
index 54af767041890..cf9eba4a0c3a9 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -35,10 +35,11 @@ func TestHourlyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(0),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   0,
-				tableName: "table",
-				hashKey:   "0:0",
+				from:       0,
+				through:    0,
+				tableName:  "table",
+				hashKey:    "0:0",
+				bucketSize: uint32(millisecondsInHour),
 			}},
 		},
 		{
@@ -48,10 +49,11 @@ func TestHourlyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(1800),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   1800 * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:0",
+				from:       0,
+				through:    1800 * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:0",
+				bucketSize: uint32(millisecondsInHour),
 			}},
 		},
 		{
@@ -61,15 +63,17 @@ func TestHourlyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(3600),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   3600 * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:0",
+				from:       0,
+				through:    3600 * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:0",
+				bucketSize: uint32(millisecondsInHour),
 			}, {
-				from:      0,
-				through:   0, // ms
-				tableName: "table",
-				hashKey:   "0:1",
+				from:       0,
+				through:    0, // ms
+				tableName:  "table",
+				hashKey:    "0:1",
+				bucketSize: uint32(millisecondsInHour),
 			}},
 		},
 		{
@@ -79,20 +83,23 @@ func TestHourlyBuckets(t *testing.T) {
 				through: model.TimeFromUnix((2 * 3600) + 1800),
 			},
 			[]Bucket{{
-				from:      900 * 1000,  // ms
-				through:   3600 * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:0",
+				from:       900 * 1000,  // ms
+				through:    3600 * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:0",
+				bucketSize: uint32(millisecondsInHour),
 			}, {
-				from:      0,
-				through:   3600 * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:1",
+				from:       0,
+				through:    3600 * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:1",
+				bucketSize: uint32(millisecondsInHour),
 			}, {
-				from:      0,
-				through:   1800 * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:2",
+				from:       0,
+				through:    1800 * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:2",
+				bucketSize: uint32(millisecondsInHour),
 			}},
 		},
 	}
@@ -130,10 +137,11 @@ func TestDailyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(0),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   0,
-				tableName: "table",
-				hashKey:   "0:d0",
+				from:       0,
+				through:    0,
+				tableName:  "table",
+				hashKey:    "0:d0",
+				bucketSize: uint32(millisecondsInDay),
 			}},
 		},
 		{
@@ -143,10 +151,11 @@ func TestDailyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(6 * 3600),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   (6 * 3600) * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:d0",
+				from:       0,
+				through:    (6 * 3600) * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:d0",
+				bucketSize: uint32(millisecondsInDay),
 			}},
 		},
 		{
@@ -156,15 +165,17 @@ func TestDailyBuckets(t *testing.T) {
 				through: model.TimeFromUnix(24 * 3600),
 			},
 			[]Bucket{{
-				from:      0,
-				through:   (24 * 3600) * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:d0",
+				from:       0,
+				through:    (24 * 3600) * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:d0",
+				bucketSize: uint32(millisecondsInDay),
 			}, {
-				from:      0,
-				through:   0,
-				tableName: "table",
-				hashKey:   "0:d1",
+				from:       0,
+				through:    0,
+				tableName:  "table",
+				hashKey:    "0:d1",
+				bucketSize: uint32(millisecondsInDay),
 			}},
 		},
 		{
@@ -174,20 +185,23 @@ func TestDailyBuckets(t *testing.T) {
 				through: model.TimeFromUnix((2 * 24 * 3600) + (12 * 3600)),
 			},
 			[]Bucket{{
-				from:      (6 * 3600) * 1000,  // ms
-				through:   (24 * 3600) * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:d0",
+				from:       (6 * 3600) * 1000,  // ms
+				through:    (24 * 3600) * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:d0",
+				bucketSize: uint32(millisecondsInDay),
 			}, {
-				from:      0,
-				through:   (24 * 3600) * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:d1",
+				from:       0,
+				through:    (24 * 3600) * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:d1",
+				bucketSize: uint32(millisecondsInDay),
 			}, {
-				from:      0,
-				through:   (12 * 3600) * 1000, // ms
-				tableName: "table",
-				hashKey:   "0:d2",
+				from:       0,
+				through:    (12 * 3600) * 1000, // ms
+				tableName:  "table",
+				hashKey:    "0:d2",
+				bucketSize: uint32(millisecondsInDay),
 			}},
 		},
 	}
diff --git a/series_store.go b/series_store.go
index c4bb30518a5d1..30eb6c8290bb1 100644
--- a/series_store.go
+++ b/series_store.go
@@ -7,6 +7,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	jsoniter "github.com/json-iterator/go"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -544,3 +545,56 @@ func injectShardLabels(chunks []Chunk, shard astmapper.ShardAnnotation) {
 		chunks[i] = chunk
 	}
 }
+
+func (c *seriesStore) DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error {
+	metricName := metric.Get(model.MetricNameLabel)
+	if metricName == "" {
+		return ErrMetricNameLabelMissing
+	}
+
+	chunkWriteEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, string(metricName), metric, chunkID)
+	if err != nil {
+		return errors.Wrapf(err, "when getting chunk index entries to delete for chunkID=%s", chunkID)
+	}
+
+	return c.deleteChunk(ctx, userID, chunkID, metric, chunkWriteEntries, partiallyDeletedInterval, func(chunk Chunk) error {
+		return c.PutOne(ctx, chunk.From, chunk.Through, chunk)
+	})
+}
+
+func (c *seriesStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
+
+	entries, err := c.schema.GetSeriesDeleteEntries(from, through, userID, metric, func(userID, seriesID string, from, through model.Time) (b bool, e error) {
+		return c.hasChunksForInterval(ctx, userID, seriesID, from, through)
+	})
+	if err != nil {
+		return err
+	}
+
+	batch := c.index.NewWriteBatch()
+	for i := range entries {
+		batch.Delete(entries[i].TableName, entries[i].HashValue, entries[i].RangeValue)
+	}
+
+	return c.index.BatchWrite(ctx, batch)
+}
+
+func (c *seriesStore) hasChunksForInterval(ctx context.Context, userID, seriesID string, from, through model.Time) (bool, error) {
+	chunkIDs, err := c.lookupChunksBySeries(ctx, from, through, userID, []string{seriesID})
+	if err != nil {
+		return false, err
+	}
+
+	chunks, err := c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
+	if err != nil {
+		return false, err
+	}
+
+	for _, chunk := range chunks {
+		if intervalsOverlap(model.Interval{Start: from, End: through}, model.Interval{Start: chunk.From, End: chunk.Through}) {
+			return true, nil
+		}
+	}
+
+	return false, nil
+}
diff --git a/storage_client.go b/storage_client.go
index ef4d954cdbb0b..85c4babddca38 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"errors"
 	"io"
 	"time"
 )
@@ -9,6 +10,13 @@ import (
 // DirDelim is the delimiter used to model a directory structure in an object store.
 const DirDelim = "/"
 
+var (
+	// ErrStorageObjectNotFound when object storage does not have requested object
+	ErrStorageObjectNotFound = errors.New("object not found in storage")
+	// ErrMethodNotImplemented when any of the storage clients do not implement a method
+	ErrMethodNotImplemented = errors.New("method is not implemented")
+)
+
 // IndexClient is a client for the storage of the index (e.g. DynamoDB or Bigtable).
 type IndexClient interface {
 	Stop()
@@ -27,6 +35,7 @@ type Client interface {
 
 	PutChunks(ctx context.Context, chunks []Chunk) error
 	GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error)
+	DeleteChunk(ctx context.Context, chunkID string) error
 }
 
 // ObjectAndIndexClient allows optimisations where the same client handles both
@@ -37,6 +46,7 @@ type ObjectAndIndexClient interface {
 // WriteBatch represents a batch of writes.
 type WriteBatch interface {
 	Add(tableName, hashValue string, rangeValue []byte, value []byte)
+	Delete(tableName, hashValue string, rangeValue []byte)
 }
 
 // ReadBatch represents the results of a QueryPages.
@@ -56,6 +66,7 @@ type ObjectClient interface {
 	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
 	List(ctx context.Context, prefix string) ([]StorageObject, error)
+	DeleteObject(ctx context.Context, objectKey string) error
 	Stop()
 }
 

From 5dc761156d3f3836a07bbdf6fe192e289cad9dfb Mon Sep 17 00:00:00 2001
From: Nathan Zender <github@nathanzender.com>
Date: Mon, 24 Feb 2020 08:54:25 -0500
Subject: [PATCH 460/660] Update code to fix almost all pre-existing lint
 errors (#2008)

* Cleanup linting errors around deadcode

To be specific ineffassign, deadcode and unused errors

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Replaced by keysFn and if the config defines it then it will distribute
keys using hashing

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Hold over from when readBatch as also the iterator

Now that we have an iterator there is no need to also have a consumed
bool on the underlying object.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Necessary to fix the false sharing problem

Will never actually be used. Only necessary to pad out CPU cache lines.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Removing unused code

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Cleanup all gosimple suggestions

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Fixing all errcheck

Attempted not to change any existing logic so if an error was ignored we
will now either explicitly ignore it or in the case of a goroutine being
called with a func that is ignoring the error we will just put a
//noling:errcheck on that line.

If it was in a test case we went ahead and did the extra assertion
checks b/c its always good to know where something might have errored in
your test cases.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Fix most staticcheck lint issues

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Cleanup deprecated call to snappy.NewWriter

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Remove rev from Makefile

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Reorder imports

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Ignoring this for now

We have opened up issue
https://github.com/cortexproject/cortex/issues/2015 to address the
deprecation of this type.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Explicitly ignoring this error

The test is currently failing due to a data race. I believe it is due to
this bug in golang.

https://github.com/golang/go/issues/30597

As far as this test cares it does not really matter that this happens so
removing the need to check for NoError "fixes" it.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Require noerror since this is a test

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Switch over to use require.NoError

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Move func to test class since that is only place it was used

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Log warning if save to cache errors

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Condense a little

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Use returned error instead of capturing it

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Bringing back ctx and adding comment

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Log error if changing ring state fails when Leaving

Signed-off-by: Nathan Zender <github@nathanzender.com>

* If context deadline exceeded return the error

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Can't defer this otherwise we will have no data

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Comment to make it clear why this nolint was added

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Refactor method out

Since Fixture is already in testutils and it is being used in both
places pulled it out into a common helper method in the testutils
package.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* io.Copy added to global errcheck exclude

Signed-off-by: Nathan Zender <github@nathanzender.com>

* If error dont do anything else

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Cleanup unused function

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Adding tracer to global excludes

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Cleanup post rebase

Formatting and import issues that got missed when merging.

Signed-off-by: Nathan Zender <github@nathanzender.com>

* Ratelimiter returns resource exhausted error

This is necessary so that when it is used with the backoff retry it will
allow for the backoff to continue to work as expected.

Signed-off-by: Nathan Zender <github@nathanzender.com>
---
 aws/dynamodb_storage_client.go      |  4 ++--
 aws/dynamodb_storage_client_test.go |  3 ++-
 aws/dynamodb_table_client.go        |  2 +-
 aws/mock.go                         |  1 +
 cache/fifo_cache.go                 |  2 +-
 cache/fifo_cache_test.go            |  8 --------
 cache/instrumented.go               |  4 ++--
 cache/memcached.go                  | 12 ++++++++----
 cache/redis_cache_test.go           |  4 ++--
 cassandra/storage_client.go         |  1 -
 chunk_store.go                      |  6 +++++-
 chunk_store_test.go                 |  5 +++--
 chunk_test.go                       |  3 ++-
 composite_store_test.go             | 13 ++++++-------
 encoding/bigchunk_test.go           |  2 +-
 encoding/chunk_test.go              |  2 ++
 encoding/varbit.go                  |  2 +-
 encoding/varbit_helpers.go          |  2 +-
 gcp/bigtable_index_client.go        |  2 --
 schema_config.go                    |  6 ------
 schema_util.go                      | 18 ++++++------------
 schema_util_test.go                 |  7 +++++++
 series_store.go                     |  5 ++++-
 storage/utils_test.go               |  2 +-
 strings.go                          |  1 +
 table_manager_test.go               | 16 ++++++++++++----
 testutils/testutils.go              | 14 ++++++++++++--
 27 files changed, 83 insertions(+), 64 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 0657b7ac27ff5..88d71bd84e328 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -245,7 +245,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
 				logWriteRetry(ctx, requests)
 				unprocessed.TakeReqs(requests, -1)
-				a.writeThrottle.WaitN(ctx, len(requests))
+				_ = a.writeThrottle.WaitN(ctx, len(requests))
 				backoff.Wait()
 				continue
 			} else if ok && awsErr.Code() == validationException {
@@ -269,7 +269,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 		unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems)
 		if len(unprocessedItems) > 0 {
 			logWriteRetry(ctx, unprocessedItems)
-			a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
+			_ = a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}
 
diff --git a/aws/dynamodb_storage_client_test.go b/aws/dynamodb_storage_client_test.go
index 00b08eacaded7..1607b354be17d 100644
--- a/aws/dynamodb_storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -17,7 +17,8 @@ const (
 
 func TestChunksPartialError(t *testing.T) {
 	fixture := dynamoDBFixture(0, 10, 20)
-	defer fixture.Teardown()
+	defer testutils.TeardownFixture(t, fixture)
+
 	_, client, err := testutils.Setup(fixture, tableName)
 	require.NoError(t, err)
 
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 8a41ae4f70590..84ae8c3c22990 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -80,7 +80,7 @@ func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.
 
 func (d callManager) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
 	if d.limiter != nil { // Tests will have a nil limiter.
-		d.limiter.Wait(ctx)
+		_ = d.limiter.Wait(ctx)
 	}
 
 	backoff := util.NewBackoff(ctx, d.backoffConfig)
diff --git a/aws/mock.go b/aws/mock.go
index 5b33748a0a5d1..84fdad28eccd0 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -56,6 +56,7 @@ func (a dynamoDBStorageClient) setErrorParameters(provisionedErr, errAfter int)
 	}
 }
 
+//nolint:unused //Leaving this around in the case we need to create a table via mock this is useful.
 func (m *mockDynamoDBClient) createTable(name string) {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 164d6c645b3ef..0b8a6b30407cc 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -228,7 +228,7 @@ func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
 	index, ok := c.index[key]
 	if ok {
 		updated := c.entries[index].updated
-		if c.validity == 0 || time.Now().Sub(updated) < c.validity {
+		if c.validity == 0 || time.Since(updated) < c.validity {
 			return c.entries[index].value, true
 		}
 
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index b3461689d2759..7d533ad42aaa8 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -2,7 +2,6 @@ package cache
 
 import (
 	"context"
-	"fmt"
 	"strconv"
 	"testing"
 	"time"
@@ -88,10 +87,3 @@ func TestFifoCacheExpiry(t *testing.T) {
 	_, ok = c.Get(ctx, strconv.Itoa(0))
 	require.False(t, ok)
 }
-
-func (c *FifoCache) print() {
-	fmt.Println("first", c.first, "last", c.last)
-	for i, entry := range c.entries {
-		fmt.Printf("  %d -> key: %s, value: %v, next: %d, prev: %d\n", i, entry.key, entry.value, entry.next, entry.prev)
-	}
-}
diff --git a/cache/instrumented.go b/cache/instrumented.go
index f425cf21f94c1..c5c43b21cec18 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -75,7 +75,7 @@ func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]b
 	}
 
 	method := i.name + ".store"
-	instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
+	_ = instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys", len(keys)))
 		i.Cache.Store(ctx, keys, bufs)
@@ -91,7 +91,7 @@ func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string,
 		method  = i.name + ".fetch"
 	)
 
-	instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
+	_ = instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
diff --git a/cache/memcached.go b/cache/memcached.go
index c8a4868f76eb3..b56a9206c1bdc 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -36,7 +36,7 @@ type observableVecCollector struct {
 func (observableVecCollector) Register()                             {}
 func (observableVecCollector) Before(method string, start time.Time) {}
 func (o observableVecCollector) After(method, statusCode string, start time.Time) {
-	o.v.WithLabelValues(method, statusCode).Observe(time.Now().Sub(start).Seconds())
+	o.v.WithLabelValues(method, statusCode).Observe(time.Since(start).Seconds())
 }
 
 // MemcachedConfig is config to make a Memcached
@@ -135,7 +135,7 @@ func memcacheStatusCode(err error) string {
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	instr.CollectedRequest(ctx, "Memcache.Get", c.requestDuration, memcacheStatusCode, func(ctx context.Context) error {
+	_ = instr.CollectedRequest(ctx, "Memcache.Get", c.requestDuration, memcacheStatusCode, func(ctx context.Context) error {
 		if c.cfg.BatchSize == 0 {
 			found, bufs, missed = c.fetch(ctx, keys)
 			return nil
@@ -149,7 +149,7 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(_ context.Context) error {
+	err := instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(_ context.Context) error {
 		sp := opentracing.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
@@ -166,6 +166,10 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 		return err
 	})
 
+	if err != nil {
+		return found, bufs, keys
+	}
+
 	for _, key := range keys {
 		item, ok := items[key]
 		if ok {
@@ -248,7 +252,7 @@ func (c *Memcached) Stop() {
 // HashKey hashes key into something you can store in memcached.
 func HashKey(key string) string {
 	hasher := fnv.New64a()
-	hasher.Write([]byte(key)) // This'll never error.
+	_, _ = hasher.Write([]byte(key)) // This'll never error.
 
 	// Hex because memcache errors for the bytes produced by the hash.
 	return hex.EncodeToString(hasher.Sum(nil))
diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
index 7153a67f78ab9..1330cde97fc53 100644
--- a/cache/redis_cache_test.go
+++ b/cache/redis_cache_test.go
@@ -19,9 +19,9 @@ func TestRedisCache(t *testing.T) {
 
 	conn := redigomock.NewConn()
 	conn.Clear()
-	pool := redis.NewPool(func() (redis.Conn, error) {
+	pool := &redis.Pool{Dial: func() (redis.Conn, error) {
 		return conn, nil
-	}, 10)
+	}, MaxIdle: 10}
 
 	keys := []string{"key1", "key2", "key3"}
 	bufs := [][]byte{[]byte("data1"), []byte("data2"), []byte("data3")}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index b9555a664b1d7..07c22e2a1fc13 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -294,7 +294,6 @@ func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 
 // readBatch represents a batch of rows read from Cassandra.
 type readBatch struct {
-	consumed   bool
 	rangeValue []byte
 	value      []byte
 }
diff --git a/chunk_store.go b/chunk_store.go
index f27be924a0872..2f9b95e78aecc 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -118,6 +118,7 @@ func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 
 // PutOne implements ChunkStore
 func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	log, ctx := spanlogger.New(ctx, "ChunkStore.PutOne")
 	chunks := []Chunk{chunk}
 
 	err := c.storage.PutChunks(ctx, chunks)
@@ -125,7 +126,9 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 		return err
 	}
 
-	c.writeBackCache(ctx, chunks)
+	if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
+		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+	}
 
 	writeReqs, err := c.calculateIndexEntries(chunk.UserID, from, through, chunk)
 	if err != nil {
@@ -252,6 +255,7 @@ func (c *store) LabelNamesForMetricName(ctx context.Context, userID string, from
 }
 
 func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from *model.Time, through *model.Time) (bool, error) {
+	//nolint:ineffassign,staticcheck //Leaving ctx even though we don't currently use it, we want to make it available for when we might need it and hopefully will ensure us using the correct context at that time
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
 
diff --git a/chunk_store_test.go b/chunk_store_test.go
index f3faa766e9b0d..e391c9f06ac65 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -711,7 +711,8 @@ func BenchmarkIndexCaching(b *testing.B) {
 	b.ResetTimer()
 
 	for i := 0; i < b.N; i++ {
-		store.Put(ctx, []Chunk{fooChunk1})
+		err := store.Put(ctx, []Chunk{fooChunk1})
+		require.NoError(b, err)
 	}
 }
 
@@ -813,7 +814,7 @@ func TestStoreMaxLookBack(t *testing.T) {
 	chunks, err = storeWithLookBackLimit.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
 	require.NoError(t, err)
 	require.Equal(t, 1, len(chunks))
-	chunks[0].Through.Equal(now)
+	require.Equal(t, now, chunks[0].Through)
 }
 
 func benchmarkParseIndexEntries(i int64, b *testing.B) {
diff --git a/chunk_test.go b/chunk_test.go
index d81a296ab9463..d7b5570c6ab2d 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -268,7 +268,8 @@ func BenchmarkEncode(b *testing.B) {
 
 	for i := 0; i < b.N; i++ {
 		chunk.encoded = nil
-		chunk.Encode()
+		err := chunk.Encode()
+		require.NoError(b, err)
 	}
 }
 
diff --git a/composite_store_test.go b/composite_store_test.go
index 60bd5eb0c74f4..4c35ed5d666cc 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -6,6 +6,8 @@ import (
 	"reflect"
 	"testing"
 
+	"github.com/stretchr/testify/require"
+
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/weaveworks/common/test"
@@ -180,7 +182,8 @@ func TestCompositeStore(t *testing.T) {
 	} {
 		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
 			have := []result{}
-			tc.cs.forStores(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
+			err := tc.cs.forStores(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
+			require.NoError(t, err)
 			if !reflect.DeepEqual(tc.want, have) {
 				t.Fatalf("wrong stores - %s", test.Diff(tc.want, have))
 			}
@@ -231,16 +234,12 @@ func TestCompositeStoreLabels(t *testing.T) {
 	} {
 		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
 			have, err := cs.LabelNamesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "")
-			if err != nil {
-				t.Fatalf("err - %s", err)
-			}
+			require.NoError(t, err)
 			if !reflect.DeepEqual(tc.want, have) {
 				t.Fatalf("wrong label names - %s", test.Diff(tc.want, have))
 			}
 			have, err = cs.LabelValuesForMetricName(context.Background(), "", model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), "", "")
-			if err != nil {
-				t.Fatalf("err - %s", err)
-			}
+			require.NoError(t, err)
 			if !reflect.DeepEqual(tc.want, have) {
 				t.Fatalf("wrong label values - %s", test.Diff(tc.want, have))
 			}
diff --git a/encoding/bigchunk_test.go b/encoding/bigchunk_test.go
index b0c2db12d846d..d52cc44e8eba0 100644
--- a/encoding/bigchunk_test.go
+++ b/encoding/bigchunk_test.go
@@ -84,7 +84,7 @@ func BenchmarkBiggerChunkMemory(b *testing.B) {
 // printSize calculates various sizes of the chunk when encoded, and in memory.
 func (b *bigchunk) printSize() {
 	var buf bytes.Buffer
-	b.Marshal(&buf)
+	_ = b.Marshal(&buf)
 
 	var size, allocd int
 	for _, c := range b.chunks {
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index f3038941d8f92..f0815649611d8 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -117,6 +117,8 @@ func testChunkEncoding(t *testing.T, encoding Encoding, samples int) {
 
 	bs1 := buf.Bytes()
 	chunk, err = NewForEncoding(encoding)
+	require.NoError(t, err)
+
 	err = chunk.UnmarshalFromBuf(bs1)
 	require.NoError(t, err)
 
diff --git a/encoding/varbit.go b/encoding/varbit.go
index 2df8abc482716..c9580214d2cdd 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -13,7 +13,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
+//nolint //Since this was copied from Prometheus leave it as is
 package encoding
 
 import (
diff --git a/encoding/varbit_helpers.go b/encoding/varbit_helpers.go
index 9fe9c09feaf17..31f13b1647883 100644
--- a/encoding/varbit_helpers.go
+++ b/encoding/varbit_helpers.go
@@ -13,7 +13,7 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
+//nolint //Since this was copied from Prometheus leave it as is
 package encoding
 
 import "github.com/prometheus/common/model"
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 7b1f949b7cdef..c163e529f1c71 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -60,8 +60,6 @@ type storageClientColumnKey struct {
 	schemaCfg chunk.SchemaConfig
 	client    *bigtable.Client
 	keysFn    keysFn
-
-	distributeKeys bool
 }
 
 // storageClientV1 implements chunk.storageClient for GCP.
diff --git a/schema_config.go b/schema_config.go
index 8ef17395f73c4..0141fe3d9aaf4 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -320,12 +320,6 @@ func (cfg *SchemaConfig) Load() error {
 	return cfg.Validate()
 }
 
-// PrintYaml dumps the yaml to stdout, to aid in migration
-func (cfg SchemaConfig) PrintYaml() {
-	encoder := yaml.NewEncoder(os.Stdout)
-	encoder.Encode(cfg)
-}
-
 // Bucket describes a range of time with a tableName and hashKey
 type Bucket struct {
 	from       uint32
diff --git a/schema_util.go b/schema_util.go
index 0906288e93f66..ed13060615ea1 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -63,7 +63,7 @@ func buildRangeValue(extra int, ss ...[]byte) []byte {
 	for _, s := range ss {
 		length += len(s) + 1
 	}
-	output, i := make([]byte, length, length), 0
+	output, i := make([]byte, length), 0
 	for _, s := range ss {
 		i += copy(output[i:], s) + 1
 	}
@@ -99,21 +99,21 @@ func decodeRangeKey(value []byte) [][]byte {
 
 func encodeBase64Bytes(bytes []byte) []byte {
 	encodedLen := base64.RawStdEncoding.EncodedLen(len(bytes))
-	encoded := make([]byte, encodedLen, encodedLen)
+	encoded := make([]byte, encodedLen)
 	base64.RawStdEncoding.Encode(encoded, bytes)
 	return encoded
 }
 
 func encodeBase64Value(value string) []byte {
 	encodedLen := base64.RawStdEncoding.EncodedLen(len(value))
-	encoded := make([]byte, encodedLen, encodedLen)
+	encoded := make([]byte, encodedLen)
 	base64.RawStdEncoding.Encode(encoded, []byte(value))
 	return encoded
 }
 
 func decodeBase64Value(bs []byte) (model.LabelValue, error) {
 	decodedLen := base64.RawStdEncoding.DecodedLen(len(bs))
-	decoded := make([]byte, decodedLen, decodedLen)
+	decoded := make([]byte, decodedLen)
 	if _, err := base64.RawStdEncoding.Decode(decoded, bs); err != nil {
 		return "", err
 	}
@@ -123,19 +123,13 @@ func decodeBase64Value(bs []byte) (model.LabelValue, error) {
 func encodeTime(t uint32) []byte {
 	// timestamps are hex encoded such that it doesn't contain null byte,
 	// but is still lexicographically sortable.
-	throughBytes := make([]byte, 4, 4)
+	throughBytes := make([]byte, 4)
 	binary.BigEndian.PutUint32(throughBytes, t)
-	encodedThroughBytes := make([]byte, 8, 8)
+	encodedThroughBytes := make([]byte, 8)
 	hex.Encode(encodedThroughBytes, throughBytes)
 	return encodedThroughBytes
 }
 
-func decodeTime(bs []byte) uint32 {
-	buf := make([]byte, 4, 4)
-	hex.Decode(buf, bs)
-	return binary.BigEndian.Uint32(buf)
-}
-
 // parseMetricNameRangeValue returns the metric name stored in metric name
 // range values. Currently checks range value key and returns the value as the
 // metric name.
diff --git a/schema_util_test.go b/schema_util_test.go
index 3e5eb95a3d46f..ea0f5fa4faa28 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -3,6 +3,7 @@ package chunk
 import (
 	"bytes"
 	"encoding/binary"
+	"encoding/hex"
 	"encoding/json"
 	"math"
 	"math/rand"
@@ -140,3 +141,9 @@ func TestParseSeriesRangeValue(t *testing.T) {
 		assert.Equal(t, c.expMetric, metric)
 	}
 }
+
+func decodeTime(bs []byte) uint32 {
+	buf := make([]byte, 4)
+	_, _ = hex.Decode(buf, bs)
+	return binary.BigEndian.Uint32(buf)
+}
diff --git a/series_store.go b/series_store.go
index 30eb6c8290bb1..16f5fb2fe331d 100644
--- a/series_store.go
+++ b/series_store.go
@@ -457,6 +457,7 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
+	log, ctx := spanlogger.New(ctx, "SeriesStore.PutOne")
 	// If this chunk is in cache it must already be in the database so we don't need to write it again
 	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
 	if len(found) > 0 {
@@ -483,7 +484,9 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 			return err
 		}
 	}
-	c.writeBackCache(ctx, chunks)
+	if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
+		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+	}
 
 	bufs := make([][]byte, len(keysToCache))
 	c.writeDedupeCache.Store(ctx, keysToCache, bufs)
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 159e36885708c..6f7b263dbe2ee 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -35,7 +35,7 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 		t.Run(fixture.Name(), func(t *testing.T) {
 			indexClient, objectClient, err := testutils.Setup(fixture, tableName)
 			require.NoError(t, err)
-			defer fixture.Teardown()
+			defer testutils.TeardownFixture(t, fixture)
 
 			storageClientTest(t, indexClient, objectClient)
 		})
diff --git a/strings.go b/strings.go
index 1db675160212b..a2be670a7a576 100644
--- a/strings.go
+++ b/strings.go
@@ -41,6 +41,7 @@ func intersectStrings(left, right []string) []string {
 	return result
 }
 
+//nolint:unused //Ignoring linting as this might be useful
 func nWayIntersectStrings(sets [][]string) []string {
 	l := len(sets)
 	switch l {
diff --git a/table_manager_test.go b/table_manager_test.go
index 7aab3d70c898b..ed91ea46eae92 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -679,8 +679,12 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 	// Verify that without RetentionDeletesEnabled no tables are removed
 	tableManager.cfg.RetentionDeletesEnabled = false
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
-	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
-	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	err = client.CreateTable(context.Background(), TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
+	require.NoError(t, err)
+
+	err = client.CreateTable(context.Background(), TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	require.NoError(t, err)
+
 	tmTest(t, client, tableManager,
 		"Move forward by three table periods (no deletes)",
 		baseTableStart.Add(tablePeriod*3),
@@ -703,8 +707,12 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 	tableManager.cfg.RetentionPeriod = 0
 	tableManager.schemaCfg.Configs[0].From = DayTime{model.TimeFromUnix(baseTableStart.Add(tablePeriod).Unix())}
 	// Retention > 0 will prevent older tables from being created so we need to create the old tables manually for the test
-	client.CreateTable(nil, TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
-	client.CreateTable(nil, TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	err = client.CreateTable(context.Background(), TableDesc{Name: tablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite, WriteScale: inactiveScalingConfig})
+	require.NoError(t, err)
+
+	err = client.CreateTable(context.Background(), TableDesc{Name: chunkTablePrefix + "0", ProvisionedRead: inactiveRead, ProvisionedWrite: inactiveWrite})
+	require.NoError(t, err)
+
 	tmTest(t, client, tableManager,
 		"Move forward by three table periods (no deletes) and move From one table forward",
 		baseTableStart.Add(tablePeriod*3),
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 6aa3a7462093a..b539548f4ca95 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -3,8 +3,11 @@ package testutils
 import (
 	"context"
 	"strconv"
+	"testing"
 	"time"
 
+	"github.com/stretchr/testify/require"
+
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
@@ -75,7 +78,10 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 
 func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
 	cs := promchunk.New()
-	cs.Add(model.SamplePair{Timestamp: now, Value: 0})
+	_, err := cs.Add(model.SamplePair{Timestamp: now, Value: 0})
+	if err != nil {
+		panic(err)
+	}
 	chunk := chunk.NewChunk(
 		userID,
 		client.Fingerprint(metric),
@@ -85,9 +91,13 @@ func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
 		now,
 	)
 	// Force checksum calculation.
-	err := chunk.Encode()
+	err = chunk.Encode()
 	if err != nil {
 		panic(err)
 	}
 	return chunk
 }
+
+func TeardownFixture(t *testing.T, fixture Fixture) {
+	require.NoError(t, fixture.Teardown())
+}

From 547efd2eeecda6b00baeee85a6076b82e8939ea8 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 25 Feb 2020 16:57:35 +0000
Subject: [PATCH 461/660] Hide secretes on the /config endpoint. (#2176)

* Hide passwords in URLs from flags.
* Add secret flag handler that hides the value of the secret when marshalled to YAML.
* Use flagext.Secret for Azure, GCP, Redis, Cassandra secrets and block storage config.
* Teach doc generator how to handle flagext.Secret.

Signed-off-by: Tom Wilkie <tom@grafana.com>
---
 azure/blob_storage_client.go     | 25 +++++++++++++------------
 cache/redis_cache.go             | 21 +++++++++++----------
 cassandra/storage_client.go      |  8 ++++----
 cassandra/storage_client_test.go |  9 +++++----
 4 files changed, 33 insertions(+), 30 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index a4310e614bc82..df4a337351656 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -13,6 +13,7 @@ import (
 	"github.com/Azure/azure-storage-blob-go/azblob"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
@@ -20,23 +21,23 @@ const containerURLFmt = "https://%s.blob.core.windows.net/%s"
 
 // BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage.
 type BlobStorageConfig struct {
-	ContainerName      string        `yaml:"container_name"`
-	AccountName        string        `yaml:"account_name"`
-	AccountKey         string        `yaml:"account_key"`
-	DownloadBufferSize int           `yaml:"download_buffer_size"`
-	UploadBufferSize   int           `yaml:"upload_buffer_size"`
-	UploadBufferCount  int           `yaml:"upload_buffer_count"`
-	RequestTimeout     time.Duration `yaml:"request_timeout"`
-	MaxRetries         int           `yaml:"max_retries"`
-	MinRetryDelay      time.Duration `yaml:"min_retry_delay"`
-	MaxRetryDelay      time.Duration `yaml:"max_retry_delay"`
+	ContainerName      string         `yaml:"container_name"`
+	AccountName        string         `yaml:"account_name"`
+	AccountKey         flagext.Secret `yaml:"account_key"`
+	DownloadBufferSize int            `yaml:"download_buffer_size"`
+	UploadBufferSize   int            `yaml:"upload_buffer_size"`
+	UploadBufferCount  int            `yaml:"upload_buffer_count"`
+	RequestTimeout     time.Duration  `yaml:"request_timeout"`
+	MaxRetries         int            `yaml:"max_retries"`
+	MinRetryDelay      time.Duration  `yaml:"min_retry_delay"`
+	MaxRetryDelay      time.Duration  `yaml:"max_retry_delay"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&c.ContainerName, "azure.container-name", "cortex", "Name of the blob container used to store chunks. Defaults to `cortex`. This container must be created before running cortex.")
 	f.StringVar(&c.AccountName, "azure.account-name", "", "The Microsoft Azure account name to be used")
-	f.StringVar(&c.AccountKey, "azure.account-key", "", "The Microsoft Azure account key to use.")
+	f.Var(&c.AccountKey, "azure.account-key", "The Microsoft Azure account key to use.")
 	f.DurationVar(&c.RequestTimeout, "azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
 	f.IntVar(&c.DownloadBufferSize, "azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
 	f.IntVar(&c.UploadBufferSize, "azure.upload-buffer-size", 256000, "Preallocated buffer size for up;oads (default is 256KB)")
@@ -139,7 +140,7 @@ func (b *BlobStorage) buildContainerURL() (azblob.ContainerURL, error) {
 }
 
 func (b *BlobStorage) newPipeline() (pipeline.Pipeline, error) {
-	credential, err := azblob.NewSharedKeyCredential(b.cfg.AccountName, b.cfg.AccountKey)
+	credential, err := azblob.NewSharedKeyCredential(b.cfg.AccountName, b.cfg.AccountKey.Value)
 	if err != nil {
 		return nil, err
 	}
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index ff21399b0d3d4..7ab48d2c67d76 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -9,6 +9,7 @@ import (
 	"github.com/gomodule/redigo/redis"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 // RedisCache type caches chunks in redis
@@ -21,13 +22,13 @@ type RedisCache struct {
 
 // RedisConfig defines how a RedisCache should be constructed.
 type RedisConfig struct {
-	Endpoint       string        `yaml:"endpoint,omitempty"`
-	Timeout        time.Duration `yaml:"timeout,omitempty"`
-	Expiration     time.Duration `yaml:"expiration,omitempty"`
-	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
-	MaxActiveConns int           `yaml:"max_active_conns,omitempty"`
-	Password       string        `yaml:"password"`
-	EnableTLS      bool          `yaml:"enable_tls"`
+	Endpoint       string         `yaml:"endpoint,omitempty"`
+	Timeout        time.Duration  `yaml:"timeout,omitempty"`
+	Expiration     time.Duration  `yaml:"expiration,omitempty"`
+	MaxIdleConns   int            `yaml:"max_idle_conns,omitempty"`
+	MaxActiveConns int            `yaml:"max_active_conns,omitempty"`
+	Password       flagext.Secret `yaml:"password"`
+	EnableTLS      bool           `yaml:"enable_tls"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -37,7 +38,7 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
 	f.IntVar(&cfg.MaxIdleConns, prefix+"redis.max-idle-conns", 80, description+"Maximum number of idle connections in pool.")
 	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
-	f.StringVar(&cfg.Password, prefix+"redis.password", "", description+"Password to use when connecting to redis.")
+	f.Var(&cfg.Password, prefix+"redis.password", description+"Password to use when connecting to redis.")
 	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
 }
 
@@ -53,8 +54,8 @@ func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
 				if cfg.EnableTLS {
 					options = append(options, redis.DialUseTLS(true))
 				}
-				if cfg.Password != "" {
-					options = append(options, redis.DialPassword(cfg.Password))
+				if cfg.Password.Value != "" {
+					options = append(options, redis.DialPassword(cfg.Password.Value))
 				}
 
 				c, err := redis.Dial("tcp", cfg.Endpoint, options...)
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 07c22e2a1fc13..f3aac2f86fd8e 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -30,7 +30,7 @@ type Config struct {
 	CAPath                   string              `yaml:"CA_path,omitempty"`
 	Auth                     bool                `yaml:"auth,omitempty"`
 	Username                 string              `yaml:"username,omitempty"`
-	Password                 string              `yaml:"password,omitempty"`
+	Password                 flagext.Secret      `yaml:"password,omitempty"`
 	PasswordFile             string              `yaml:"password_file,omitempty"`
 	CustomAuthenticators     flagext.StringSlice `yaml:"custom_authenticators"`
 	Timeout                  time.Duration       `yaml:"timeout,omitempty"`
@@ -53,7 +53,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
 	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
-	f.StringVar(&cfg.Password, "cassandra.password", "", "Password to use when connecting to cassandra.")
+	f.Var(&cfg.Password, "cassandra.password", "Password to use when connecting to cassandra.")
 	f.StringVar(&cfg.PasswordFile, "cassandra.password-file", "", "File containing password to use when connecting to cassandra.")
 	f.Var(&cfg.CustomAuthenticators, "cassandra.custom-authenticator", "If set, when authenticating with cassandra a custom authenticator will be expected during the handshake. This flag can be set multiple times.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
@@ -64,7 +64,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 }
 
 func (cfg *Config) Validate() error {
-	if cfg.Password != "" && cfg.PasswordFile != "" {
+	if cfg.Password.Value != "" && cfg.PasswordFile != "" {
 		return errors.Errorf("The password and password_file config options are mutually exclusive.")
 	}
 	return nil
@@ -123,7 +123,7 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
 		}
 	}
 	if cfg.Auth {
-		password := cfg.Password
+		password := cfg.Password.Value
 		if cfg.PasswordFile != "" {
 			passwordBytes, err := ioutil.ReadFile(cfg.PasswordFile)
 			if err != nil {
diff --git a/cassandra/storage_client_test.go b/cassandra/storage_client_test.go
index 8cf5281c17fd1..83c4ed1718943 100644
--- a/cassandra/storage_client_test.go
+++ b/cassandra/storage_client_test.go
@@ -3,10 +3,11 @@ package cassandra
 import (
 	"testing"
 
-	"github.com/stretchr/testify/require"
-
 	"github.com/gocql/gocql"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 func TestConfig_setClusterConfig_noAuth(t *testing.T) {
@@ -26,7 +27,7 @@ func TestConfig_setClusterConfig_authWithPassword(t *testing.T) {
 	cfg := Config{
 		Auth:     true,
 		Username: "user",
-		Password: "pass",
+		Password: flagext.Secret{Value: "pass"},
 	}
 	require.NoError(t, cfg.Validate())
 
@@ -73,7 +74,7 @@ func TestConfig_setClusterConfig_authWithPasswordAndPasswordFile(t *testing.T) {
 	cfg := Config{
 		Auth:         true,
 		Username:     "user",
-		Password:     "pass",
+		Password:     flagext.Secret{Value: "pass"},
 		PasswordFile: "testdata/password-with-trailing-newline.txt",
 	}
 	assert.Error(t, cfg.Validate())

From 874ded102e8c32d5c662787ffd870deae6c2d283 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Fri, 28 Feb 2020 16:00:16 +0000
Subject: [PATCH 462/660] Break background cache writes into batches of 100
 (#2135)

* Break background cache writes into batches of 100

This improves parallelism and observability.
Fixes https://github.com/cortexproject/cortex/issues/2134

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 cache/background.go | 38 +++++++++++++++++++++++++-------------
 cache/cache_test.go |  6 +++---
 2 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 1cbdfdde9388b..861d7e8160430 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -33,7 +33,7 @@ type BackgroundConfig struct {
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *BackgroundConfig) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
 	f.IntVar(&cfg.WriteBackGoroutines, prefix+"memcache.write-back-goroutines", 10, description+"How many goroutines to use to write back to memcache.")
-	f.IntVar(&cfg.WriteBackBuffer, prefix+"memcache.write-back-buffer", 10000, description+"How many chunks to buffer for background write back.")
+	f.IntVar(&cfg.WriteBackBuffer, prefix+"memcache.write-back-buffer", 10000, description+"How many key batches to buffer for background write-back.")
 }
 
 type backgroundCache struct {
@@ -80,21 +80,33 @@ func (c *backgroundCache) Stop() {
 	c.Cache.Stop()
 }
 
+const keysPerBatch = 100
+
 // Store writes keys for the cache in the background.
 func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
-	bgWrite := backgroundWrite{
-		keys: keys,
-		bufs: bufs,
-	}
-	select {
-	case c.bgWrites <- bgWrite:
-		c.queueLength.Add(float64(len(keys)))
-	default:
-		c.droppedWriteBack.Add(float64(len(keys)))
-		sp := opentracing.SpanFromContext(ctx)
-		if sp != nil {
-			sp.LogFields(otlog.Int("dropped", len(keys)))
+	for len(keys) > 0 {
+		num := keysPerBatch
+		if num > len(keys) {
+			num = len(keys)
+		}
+
+		bgWrite := backgroundWrite{
+			keys: keys[:num],
+			bufs: bufs[:num],
+		}
+		select {
+		case c.bgWrites <- bgWrite:
+			c.queueLength.Add(float64(len(keys)))
+		default:
+			c.droppedWriteBack.Add(float64(len(keys)))
+			sp := opentracing.SpanFromContext(ctx)
+			if sp != nil {
+				sp.LogFields(otlog.Int("dropped", len(keys)))
+			}
+			return // queue is full; give up
 		}
+		keys = keys[num:]
+		bufs = bufs[num:]
 	}
 }
 
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 12035d7361fac..dcbaaae79727a 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -22,11 +22,11 @@ const userID = "1"
 func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 	const chunkLen = 13 * 3600 // in seconds
 
-	// put 100 chunks from 0 to 99
+	// put a set of chunks, larger than background batch size, with varying timestamps and values
 	keys := []string{}
 	bufs := [][]byte{}
 	chunks := []chunk.Chunk{}
-	for i := 0; i < 100; i++ {
+	for i := 0; i < 111; i++ {
 		ts := model.TimeFromUnix(int64(i * chunkLen))
 		promChunk := prom_chunk.New()
 		nc, err := promChunk.Add(model.SamplePair{
@@ -119,7 +119,7 @@ func (a byExternalKey) Less(i, j int) bool { return a[i].ExternalKey() < a[j].Ex
 
 func testCacheMiss(t *testing.T, cache cache.Cache) {
 	for i := 0; i < 100; i++ {
-		key := strconv.Itoa(rand.Int())
+		key := strconv.Itoa(rand.Int()) // arbitrary key which should fail: no chunk key is a single integer
 		found, bufs, missing := cache.Fetch(context.Background(), []string{key})
 		require.Empty(t, found)
 		require.Empty(t, bufs)

From e4a94bc89401ce3a974075b022e4661be65533a2 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 3 Mar 2020 21:31:27 +0530
Subject: [PATCH 463/660] delete series api and purger to purge data requested
 for deletion (#2103)

* delete series api and purger to purge data requested for deletion

delete_store manages delete requests and purge plan records in stores
purger builds delete plans(delete requests sharded by day) and executes them paralelly
only one requests per user would be in execution phase at a time
delete requests gets picked up for deletion after they get older by more than a day

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* moved delete store creation from initStore to initPurger, which is the only component that needs it

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* implemented new methods in MockStorage for writing tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* removed DeleteClient and using IndexClient in DeleteStore

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* refactored some code, added some tests for chunk store and purger

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add some tests and fixed some issues found during tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* rebased and fixed conflicts

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated route for delete handler to look same as prometheus

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* added test for purger restarts and fixed some issues

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* suggested changes from PR review and fixed linter, tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed panic in modules when stopping purger

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* config changes suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated config doc

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* updated changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* some changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* made init in Purger public to call it from modules to fail early

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_storage_client_test.go |    3 +-
 delete_requests_store.go            |  283 ++++++
 purger/delete_plan.pb.go            | 1354 +++++++++++++++++++++++++++
 purger/delete_plan.proto            |   34 +
 purger/purger.go                    |  553 +++++++++++
 purger/purger_test.go               |  363 +++++++
 purger/request_handler.go           |  108 +++
 storage/chunk_client_test.go        |    2 +-
 storage/factory.go                  |   15 +
 testutils/testutils.go              |   85 +-
 10 files changed, 2786 insertions(+), 14 deletions(-)
 create mode 100644 delete_requests_store.go
 create mode 100644 purger/delete_plan.pb.go
 create mode 100644 purger/delete_plan.proto
 create mode 100644 purger/purger.go
 create mode 100644 purger/purger_test.go
 create mode 100644 purger/request_handler.go

diff --git a/aws/dynamodb_storage_client_test.go b/aws/dynamodb_storage_client_test.go
index 1607b354be17d..a68c250bb6efd 100644
--- a/aws/dynamodb_storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -3,6 +3,7 @@ package aws
 import (
 	"context"
 	"testing"
+	"time"
 
 	"github.com/prometheus/common/model"
 
@@ -29,7 +30,7 @@ func TestChunksPartialError(t *testing.T) {
 	}
 	ctx := context.Background()
 	// Create more chunks than we can read in one batch
-	_, chunks, err := testutils.CreateChunks(0, dynamoDBMaxReadBatchSize+50, model.Now())
+	_, chunks, err := testutils.CreateChunks(0, dynamoDBMaxReadBatchSize+50, model.Now().Add(-time.Hour), model.Now())
 	require.NoError(t, err)
 	err = client.PutChunks(ctx, chunks)
 	require.NoError(t, err)
diff --git a/delete_requests_store.go b/delete_requests_store.go
new file mode 100644
index 0000000000000..963f1d29d088d
--- /dev/null
+++ b/delete_requests_store.go
@@ -0,0 +1,283 @@
+package chunk
+
+import (
+	"context"
+	"encoding/binary"
+	"encoding/hex"
+	"errors"
+	"flag"
+	"fmt"
+	"hash/fnv"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+)
+
+type DeleteRequestStatus string
+
+const (
+	StatusReceived     DeleteRequestStatus = "received"
+	StatusBuildingPlan DeleteRequestStatus = "buildingPlan"
+	StatusDeleting     DeleteRequestStatus = "deleting"
+	StatusProcessed    DeleteRequestStatus = "processed"
+
+	separator = "\000" // separator for series selectors in delete requests
+)
+
+var (
+	pendingDeleteRequestStatuses = []DeleteRequestStatus{StatusReceived, StatusBuildingPlan, StatusDeleting}
+
+	ErrDeleteRequestNotFound = errors.New("could not find matching delete request")
+)
+
+// DeleteRequest holds all the details about a delete request
+type DeleteRequest struct {
+	RequestID string              `json:"request_id"`
+	UserID    string              `json:"-"`
+	StartTime model.Time          `json:"start_time"`
+	EndTime   model.Time          `json:"end_time"`
+	Selectors []string            `json:"selectors"`
+	Status    DeleteRequestStatus `json:"status"`
+	Matchers  [][]*labels.Matcher `json:"-"`
+	CreatedAt model.Time          `json:"created_at"`
+}
+
+// DeleteStore provides all the methods required to manage lifecycle of delete request and things related to it
+type DeleteStore struct {
+	cfg         DeleteStoreConfig
+	indexClient IndexClient
+}
+
+// DeleteStoreConfig holds configuration for delete store
+type DeleteStoreConfig struct {
+	Store             string `yaml:"store"`
+	RequestsTableName string `yaml:"requests_table_name"`
+}
+
+// RegisterFlags adds the flags required to configure this flag set.
+func (cfg *DeleteStoreConfig) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Store, "deletes.store", "", "Store for keeping delete request")
+	f.StringVar(&cfg.RequestsTableName, "deletes.requests-table-name", "delete_requests", "Name of the table which stores delete requests")
+}
+
+// NewDeleteStore creates a store for managing delete requests
+func NewDeleteStore(cfg DeleteStoreConfig, indexClient IndexClient) (*DeleteStore, error) {
+	ds := DeleteStore{
+		cfg:         cfg,
+		indexClient: indexClient,
+	}
+
+	return &ds, nil
+}
+
+// Add creates entries for a new delete request
+func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error {
+	requestID := generateUniqueID(userID, selectors)
+
+	for {
+		_, err := ds.GetDeleteRequest(ctx, userID, string(requestID))
+		if err != nil {
+			if err == ErrDeleteRequestNotFound {
+				break
+			}
+			return err
+		}
+
+		// we have a collision here, lets recreate a new requestID and check for collision
+		time.Sleep(time.Millisecond)
+		requestID = generateUniqueID(userID, selectors)
+	}
+
+	// userID, requestID
+	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
+
+	// Add an entry with userID, requestID as range key and status as value to make it easy to manage and lookup status
+	// We don't want to set anything in hash key here since we would want to find delete requests by just status
+	writeBatch := ds.indexClient.NewWriteBatch()
+	writeBatch.Add(ds.cfg.RequestsTableName, "", []byte(userIDAndRequestID), []byte(StatusReceived))
+
+	// Add another entry with additional details like creation time, time range of delete request and selectors in value
+	rangeValue := fmt.Sprintf("%x:%x:%x", int64(model.Now()), int64(startTime), int64(endTime))
+	writeBatch.Add(ds.cfg.RequestsTableName, userIDAndRequestID, []byte(rangeValue), []byte(strings.Join(selectors, separator)))
+
+	return ds.indexClient.BatchWrite(ctx, writeBatch)
+}
+
+// GetDeleteRequestsByStatus returns all delete requests for given status
+func (ds *DeleteStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) {
+	return ds.queryDeleteRequests(ctx, []IndexQuery{{TableName: ds.cfg.RequestsTableName, ValueEqual: []byte(status)}})
+}
+
+// GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status
+func (ds *DeleteStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) {
+	return ds.queryDeleteRequests(ctx, []IndexQuery{
+		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID), ValueEqual: []byte(status)},
+	})
+}
+
+// GetAllDeleteRequestsForUser returns all delete requests for a user
+func (ds *DeleteStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
+	return ds.queryDeleteRequests(ctx, []IndexQuery{
+		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID)},
+	})
+}
+
+// UpdateStatus updates status of a delete request
+func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID string, newStatus DeleteRequestStatus) error {
+	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
+
+	writeBatch := ds.indexClient.NewWriteBatch()
+	writeBatch.Add(ds.cfg.RequestsTableName, "", []byte(userIDAndRequestID), []byte(newStatus))
+
+	return ds.indexClient.BatchWrite(ctx, writeBatch)
+}
+
+// GetDeleteRequest returns delete request with given requestID
+func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) {
+	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
+
+	deleteRequests, err := ds.queryDeleteRequests(ctx, []IndexQuery{
+		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userIDAndRequestID)},
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	if len(deleteRequests) == 0 {
+		return nil, ErrDeleteRequestNotFound
+	}
+
+	return &deleteRequests[0], nil
+}
+
+// GetPendingDeleteRequestsForUser returns all delete requests for a user which are not processed
+func (ds *DeleteStore) GetPendingDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
+	pendingDeleteRequests := []DeleteRequest{}
+	for _, status := range pendingDeleteRequestStatuses {
+		deleteRequests, err := ds.GetDeleteRequestsForUserByStatus(ctx, userID, status)
+		if err != nil {
+			return nil, err
+		}
+
+		pendingDeleteRequests = append(pendingDeleteRequests, deleteRequests...)
+	}
+
+	return pendingDeleteRequests, nil
+}
+
+func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []IndexQuery) ([]DeleteRequest, error) {
+	deleteRequests := []DeleteRequest{}
+	err := ds.indexClient.QueryPages(ctx, deleteQuery, func(query IndexQuery, batch ReadBatch) (shouldContinue bool) {
+		itr := batch.Iterator()
+		for itr.Next() {
+			userID, requestID := splitUserIDAndRequestID(string(itr.RangeValue()))
+
+			deleteRequests = append(deleteRequests, DeleteRequest{
+				UserID:    userID,
+				RequestID: requestID,
+				Status:    DeleteRequestStatus(itr.Value()),
+			})
+		}
+		return true
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	for i, deleteRequest := range deleteRequests {
+		deleteRequestQuery := []IndexQuery{{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s", deleteRequest.UserID, deleteRequest.RequestID)}}
+
+		var parseError error
+		err := ds.indexClient.QueryPages(ctx, deleteRequestQuery, func(query IndexQuery, batch ReadBatch) (shouldContinue bool) {
+			itr := batch.Iterator()
+			itr.Next()
+
+			deleteRequest, err = parseDeleteRequestTimestamps(itr.RangeValue(), deleteRequest)
+			if err != nil {
+				parseError = err
+				return false
+			}
+
+			deleteRequest.Selectors = strings.Split(string(itr.Value()), separator)
+			deleteRequests[i] = deleteRequest
+
+			return true
+		})
+
+		if err != nil {
+			return nil, err
+		}
+
+		if parseError != nil {
+			return nil, parseError
+		}
+	}
+
+	return deleteRequests, nil
+}
+
+func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest) (DeleteRequest, error) {
+	hexParts := strings.Split(string(rangeValue), ":")
+	if len(hexParts) != 3 {
+		return deleteRequest, errors.New("invalid key in parsing delete request lookup response")
+	}
+
+	createdAt, err := strconv.ParseInt(hexParts[0], 16, 64)
+	if err != nil {
+		return deleteRequest, err
+	}
+
+	from, err := strconv.ParseInt(hexParts[1], 16, 64)
+	if err != nil {
+		return deleteRequest, err
+
+	}
+	through, err := strconv.ParseInt(hexParts[2], 16, 64)
+	if err != nil {
+		return deleteRequest, err
+
+	}
+
+	deleteRequest.CreatedAt = model.Time(createdAt)
+	deleteRequest.StartTime = model.Time(from)
+	deleteRequest.EndTime = model.Time(through)
+
+	return deleteRequest, nil
+}
+
+// An id is useful in managing delete requests
+func generateUniqueID(orgID string, selectors []string) []byte {
+	uniqueID := fnv.New32()
+	_, _ = uniqueID.Write([]byte(orgID))
+
+	timeNow := make([]byte, 8)
+	binary.LittleEndian.PutUint64(timeNow, uint64(time.Now().UnixNano()))
+	_, _ = uniqueID.Write(timeNow)
+
+	for _, selector := range selectors {
+		_, _ = uniqueID.Write([]byte(selector))
+	}
+
+	return encodeUniqueID(uniqueID.Sum32())
+}
+
+func encodeUniqueID(t uint32) []byte {
+	throughBytes := make([]byte, 4)
+	binary.BigEndian.PutUint32(throughBytes, t)
+	encodedThroughBytes := make([]byte, 8)
+	hex.Encode(encodedThroughBytes, throughBytes)
+	return encodedThroughBytes
+}
+
+func splitUserIDAndRequestID(rangeValue string) (userID, requestID string) {
+	lastIndex := strings.LastIndex(rangeValue, ":")
+
+	userID = rangeValue[:lastIndex]
+	requestID = rangeValue[lastIndex+1:]
+
+	return
+}
diff --git a/purger/delete_plan.pb.go b/purger/delete_plan.pb.go
new file mode 100644
index 0000000000000..ab1ef599ac673
--- /dev/null
+++ b/purger/delete_plan.pb.go
@@ -0,0 +1,1354 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: delete_plan.proto
+
+package purger
+
+import (
+	fmt "fmt"
+	_ "github.com/cortexproject/cortex/pkg/ingester/client"
+	github_com_cortexproject_cortex_pkg_ingester_client "github.com/cortexproject/cortex/pkg/ingester/client"
+	_ "github.com/gogo/protobuf/gogoproto"
+	proto "github.com/gogo/protobuf/proto"
+	io "io"
+	math "math"
+	math_bits "math/bits"
+	reflect "reflect"
+	strings "strings"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
+
+// DeletePlan holds all the chunks that are supposed to be deleted within an interval(usually a day)
+// This Proto file is used just for storing Delete Plans in proto format.
+type DeletePlan struct {
+	PlanInterval *Interval     `protobuf:"bytes,1,opt,name=plan_interval,json=planInterval,proto3" json:"plan_interval,omitempty"`
+	ChunksGroup  []ChunksGroup `protobuf:"bytes,2,rep,name=chunks_group,json=chunksGroup,proto3" json:"chunks_group"`
+}
+
+func (m *DeletePlan) Reset()      { *m = DeletePlan{} }
+func (*DeletePlan) ProtoMessage() {}
+func (*DeletePlan) Descriptor() ([]byte, []int) {
+	return fileDescriptor_c38868cf63b27372, []int{0}
+}
+func (m *DeletePlan) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *DeletePlan) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_DeletePlan.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *DeletePlan) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_DeletePlan.Merge(m, src)
+}
+func (m *DeletePlan) XXX_Size() int {
+	return m.Size()
+}
+func (m *DeletePlan) XXX_DiscardUnknown() {
+	xxx_messageInfo_DeletePlan.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_DeletePlan proto.InternalMessageInfo
+
+func (m *DeletePlan) GetPlanInterval() *Interval {
+	if m != nil {
+		return m.PlanInterval
+	}
+	return nil
+}
+
+func (m *DeletePlan) GetChunksGroup() []ChunksGroup {
+	if m != nil {
+		return m.ChunksGroup
+	}
+	return nil
+}
+
+// ChunksGroup holds ChunkDetails and Labels for a group of chunks which have same series ID
+type ChunksGroup struct {
+	Labels []github_com_cortexproject_cortex_pkg_ingester_client.LabelAdapter `protobuf:"bytes,1,rep,name=labels,proto3,customtype=github.com/cortexproject/cortex/pkg/ingester/client.LabelAdapter" json:"labels"`
+	Chunks []ChunkDetails                                                     `protobuf:"bytes,2,rep,name=chunks,proto3" json:"chunks"`
+}
+
+func (m *ChunksGroup) Reset()      { *m = ChunksGroup{} }
+func (*ChunksGroup) ProtoMessage() {}
+func (*ChunksGroup) Descriptor() ([]byte, []int) {
+	return fileDescriptor_c38868cf63b27372, []int{1}
+}
+func (m *ChunksGroup) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *ChunksGroup) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_ChunksGroup.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *ChunksGroup) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_ChunksGroup.Merge(m, src)
+}
+func (m *ChunksGroup) XXX_Size() int {
+	return m.Size()
+}
+func (m *ChunksGroup) XXX_DiscardUnknown() {
+	xxx_messageInfo_ChunksGroup.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ChunksGroup proto.InternalMessageInfo
+
+func (m *ChunksGroup) GetChunks() []ChunkDetails {
+	if m != nil {
+		return m.Chunks
+	}
+	return nil
+}
+
+type ChunkDetails struct {
+	ID                       string    `protobuf:"bytes,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"`
+	PartiallyDeletedInterval *Interval `protobuf:"bytes,2,opt,name=partially_deleted_interval,json=partiallyDeletedInterval,proto3" json:"partially_deleted_interval,omitempty"`
+}
+
+func (m *ChunkDetails) Reset()      { *m = ChunkDetails{} }
+func (*ChunkDetails) ProtoMessage() {}
+func (*ChunkDetails) Descriptor() ([]byte, []int) {
+	return fileDescriptor_c38868cf63b27372, []int{2}
+}
+func (m *ChunkDetails) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *ChunkDetails) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_ChunkDetails.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *ChunkDetails) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_ChunkDetails.Merge(m, src)
+}
+func (m *ChunkDetails) XXX_Size() int {
+	return m.Size()
+}
+func (m *ChunkDetails) XXX_DiscardUnknown() {
+	xxx_messageInfo_ChunkDetails.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ChunkDetails proto.InternalMessageInfo
+
+func (m *ChunkDetails) GetID() string {
+	if m != nil {
+		return m.ID
+	}
+	return ""
+}
+
+func (m *ChunkDetails) GetPartiallyDeletedInterval() *Interval {
+	if m != nil {
+		return m.PartiallyDeletedInterval
+	}
+	return nil
+}
+
+type Interval struct {
+	StartTimestampMs int64 `protobuf:"varint,1,opt,name=start_timestamp_ms,json=startTimestampMs,proto3" json:"start_timestamp_ms,omitempty"`
+	EndTimestampMs   int64 `protobuf:"varint,2,opt,name=end_timestamp_ms,json=endTimestampMs,proto3" json:"end_timestamp_ms,omitempty"`
+}
+
+func (m *Interval) Reset()      { *m = Interval{} }
+func (*Interval) ProtoMessage() {}
+func (*Interval) Descriptor() ([]byte, []int) {
+	return fileDescriptor_c38868cf63b27372, []int{3}
+}
+func (m *Interval) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *Interval) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_Interval.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *Interval) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_Interval.Merge(m, src)
+}
+func (m *Interval) XXX_Size() int {
+	return m.Size()
+}
+func (m *Interval) XXX_DiscardUnknown() {
+	xxx_messageInfo_Interval.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Interval proto.InternalMessageInfo
+
+func (m *Interval) GetStartTimestampMs() int64 {
+	if m != nil {
+		return m.StartTimestampMs
+	}
+	return 0
+}
+
+func (m *Interval) GetEndTimestampMs() int64 {
+	if m != nil {
+		return m.EndTimestampMs
+	}
+	return 0
+}
+
+func init() {
+	proto.RegisterType((*DeletePlan)(nil), "purgeplan.DeletePlan")
+	proto.RegisterType((*ChunksGroup)(nil), "purgeplan.ChunksGroup")
+	proto.RegisterType((*ChunkDetails)(nil), "purgeplan.ChunkDetails")
+	proto.RegisterType((*Interval)(nil), "purgeplan.Interval")
+}
+
+func init() { proto.RegisterFile("delete_plan.proto", fileDescriptor_c38868cf63b27372) }
+
+var fileDescriptor_c38868cf63b27372 = []byte{
+	// 454 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x52, 0x31, 0x6f, 0xd4, 0x30,
+	0x18, 0x8d, 0x53, 0x74, 0xa2, 0xbe, 0xa3, 0x6a, 0x8d, 0x04, 0xa7, 0x1b, 0xdc, 0xea, 0xa6, 0x1b,
+	0x20, 0x91, 0x8a, 0x90, 0x18, 0x90, 0x80, 0xe3, 0x24, 0xa8, 0x04, 0x52, 0x89, 0x98, 0x58, 0x22,
+	0x27, 0xf9, 0x48, 0x4d, 0x7d, 0xb1, 0xb1, 0x1d, 0x04, 0x1b, 0x1b, 0x2b, 0x3f, 0x83, 0xbf, 0xc0,
+	0x3f, 0xe8, 0x78, 0x63, 0xc5, 0x50, 0x71, 0xb9, 0x85, 0xb1, 0x3f, 0x01, 0xc5, 0xc9, 0x5d, 0x03,
+	0x12, 0x0b, 0x5b, 0xde, 0xf7, 0xde, 0xf7, 0xfc, 0xfc, 0x62, 0xbc, 0x97, 0x81, 0x00, 0x0b, 0xb1,
+	0x12, 0xac, 0x08, 0x94, 0x96, 0x56, 0x92, 0x6d, 0x55, 0xea, 0x1c, 0xea, 0xc1, 0xe8, 0x6e, 0xce,
+	0xed, 0x49, 0x99, 0x04, 0xa9, 0x9c, 0x87, 0xb9, 0xcc, 0x65, 0xe8, 0x14, 0x49, 0xf9, 0xd6, 0x21,
+	0x07, 0xdc, 0x57, 0xb3, 0x39, 0x7a, 0xdc, 0x91, 0xa7, 0x52, 0x5b, 0xf8, 0xa8, 0xb4, 0x7c, 0x07,
+	0xa9, 0x6d, 0x51, 0xa8, 0x4e, 0xf3, 0x90, 0x17, 0x39, 0x18, 0x0b, 0x3a, 0x4c, 0x05, 0x87, 0x62,
+	0x4d, 0x35, 0x0e, 0xe3, 0x2f, 0x08, 0xe3, 0x99, 0x4b, 0x74, 0x2c, 0x58, 0x41, 0x1e, 0xe0, 0x1b,
+	0x75, 0x8e, 0x98, 0x17, 0x16, 0xf4, 0x07, 0x26, 0x86, 0xe8, 0x00, 0x4d, 0xfa, 0x87, 0x37, 0x83,
+	0x4d, 0xc4, 0xe0, 0xa8, 0xa5, 0xa2, 0x41, 0x0d, 0xd7, 0x88, 0x3c, 0xc2, 0x83, 0xf4, 0xa4, 0x2c,
+	0x4e, 0x4d, 0x9c, 0x6b, 0x59, 0xaa, 0xa1, 0x7f, 0xb0, 0x35, 0xe9, 0x1f, 0xde, 0xea, 0x2c, 0x3e,
+	0x75, 0xf4, 0xb3, 0x9a, 0x9d, 0x5e, 0x3b, 0xbb, 0xd8, 0xf7, 0xa2, 0x7e, 0x7a, 0x35, 0x1a, 0x7f,
+	0x47, 0xb8, 0xdf, 0x91, 0x10, 0x83, 0x7b, 0x82, 0x25, 0x20, 0xcc, 0x10, 0x39, 0xab, 0xbd, 0xa0,
+	0x0d, 0xfe, 0xa2, 0x9e, 0x1e, 0x33, 0xae, 0xa7, 0xcf, 0x6b, 0x97, 0x1f, 0x17, 0xfb, 0xff, 0x53,
+	0x43, 0x63, 0xf3, 0x24, 0x63, 0xca, 0x82, 0x8e, 0xda, 0xa3, 0xc8, 0x7d, 0xdc, 0x6b, 0x32, 0xb5,
+	0xf9, 0x6f, 0xff, 0x9d, 0x7f, 0x06, 0x96, 0x71, 0x61, 0xda, 0x0b, 0xb4, 0xe2, 0xf1, 0x7b, 0x3c,
+	0xe8, 0xb2, 0x64, 0x07, 0xfb, 0x47, 0x33, 0xd7, 0xdd, 0x76, 0xe4, 0xf3, 0x19, 0x79, 0x85, 0x47,
+	0x8a, 0x69, 0xcb, 0x99, 0x10, 0x9f, 0xe2, 0xe6, 0x01, 0x64, 0x57, 0x1d, 0xfb, 0xff, 0xee, 0x78,
+	0xb8, 0x59, 0x6b, 0x7e, 0x52, 0xb6, 0x66, 0xc6, 0x09, 0xbe, 0xbe, 0xe9, 0xfe, 0x0e, 0x26, 0xc6,
+	0x32, 0x6d, 0x63, 0xcb, 0xe7, 0x60, 0x2c, 0x9b, 0xab, 0x78, 0x6e, 0xdc, 0xf1, 0x5b, 0xd1, 0xae,
+	0x63, 0x5e, 0xaf, 0x89, 0x97, 0x86, 0x4c, 0xf0, 0x2e, 0x14, 0xd9, 0x9f, 0x5a, 0xdf, 0x69, 0x77,
+	0xa0, 0xc8, 0x3a, 0xca, 0xe9, 0xc3, 0xc5, 0x92, 0x7a, 0xe7, 0x4b, 0xea, 0x5d, 0x2e, 0x29, 0xfa,
+	0x5c, 0x51, 0xf4, 0xad, 0xa2, 0xe8, 0xac, 0xa2, 0x68, 0x51, 0x51, 0xf4, 0xb3, 0xa2, 0xe8, 0x57,
+	0x45, 0xbd, 0xcb, 0x8a, 0xa2, 0xaf, 0x2b, 0xea, 0x2d, 0x56, 0xd4, 0x3b, 0x5f, 0x51, 0xef, 0x4d,
+	0xcf, 0xdd, 0x43, 0x27, 0x3d, 0xf7, 0xc2, 0xee, 0xfd, 0x0e, 0x00, 0x00, 0xff, 0xff, 0x6d, 0xa1,
+	0xa8, 0x2d, 0xf2, 0x02, 0x00, 0x00,
+}
+
+func (this *DeletePlan) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*DeletePlan)
+	if !ok {
+		that2, ok := that.(DeletePlan)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !this.PlanInterval.Equal(that1.PlanInterval) {
+		return false
+	}
+	if len(this.ChunksGroup) != len(that1.ChunksGroup) {
+		return false
+	}
+	for i := range this.ChunksGroup {
+		if !this.ChunksGroup[i].Equal(&that1.ChunksGroup[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *ChunksGroup) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ChunksGroup)
+	if !ok {
+		that2, ok := that.(ChunksGroup)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Labels) != len(that1.Labels) {
+		return false
+	}
+	for i := range this.Labels {
+		if !this.Labels[i].Equal(that1.Labels[i]) {
+			return false
+		}
+	}
+	if len(this.Chunks) != len(that1.Chunks) {
+		return false
+	}
+	for i := range this.Chunks {
+		if !this.Chunks[i].Equal(&that1.Chunks[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *ChunkDetails) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ChunkDetails)
+	if !ok {
+		that2, ok := that.(ChunkDetails)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.ID != that1.ID {
+		return false
+	}
+	if !this.PartiallyDeletedInterval.Equal(that1.PartiallyDeletedInterval) {
+		return false
+	}
+	return true
+}
+func (this *Interval) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Interval)
+	if !ok {
+		that2, ok := that.(Interval)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.StartTimestampMs != that1.StartTimestampMs {
+		return false
+	}
+	if this.EndTimestampMs != that1.EndTimestampMs {
+		return false
+	}
+	return true
+}
+func (this *DeletePlan) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&purger.DeletePlan{")
+	if this.PlanInterval != nil {
+		s = append(s, "PlanInterval: "+fmt.Sprintf("%#v", this.PlanInterval)+",\n")
+	}
+	if this.ChunksGroup != nil {
+		vs := make([]*ChunksGroup, len(this.ChunksGroup))
+		for i := range vs {
+			vs[i] = &this.ChunksGroup[i]
+		}
+		s = append(s, "ChunksGroup: "+fmt.Sprintf("%#v", vs)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ChunksGroup) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&purger.ChunksGroup{")
+	s = append(s, "Labels: "+fmt.Sprintf("%#v", this.Labels)+",\n")
+	if this.Chunks != nil {
+		vs := make([]*ChunkDetails, len(this.Chunks))
+		for i := range vs {
+			vs[i] = &this.Chunks[i]
+		}
+		s = append(s, "Chunks: "+fmt.Sprintf("%#v", vs)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ChunkDetails) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&purger.ChunkDetails{")
+	s = append(s, "ID: "+fmt.Sprintf("%#v", this.ID)+",\n")
+	if this.PartiallyDeletedInterval != nil {
+		s = append(s, "PartiallyDeletedInterval: "+fmt.Sprintf("%#v", this.PartiallyDeletedInterval)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *Interval) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&purger.Interval{")
+	s = append(s, "StartTimestampMs: "+fmt.Sprintf("%#v", this.StartTimestampMs)+",\n")
+	s = append(s, "EndTimestampMs: "+fmt.Sprintf("%#v", this.EndTimestampMs)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func valueToGoStringDeletePlan(v interface{}, typ string) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
+}
+func (m *DeletePlan) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DeletePlan) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DeletePlan) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.ChunksGroup) > 0 {
+		for iNdEx := len(m.ChunksGroup) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.ChunksGroup[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintDeletePlan(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0x12
+		}
+	}
+	if m.PlanInterval != nil {
+		{
+			size, err := m.PlanInterval.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintDeletePlan(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ChunksGroup) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ChunksGroup) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ChunksGroup) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintDeletePlan(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0x12
+		}
+	}
+	if len(m.Labels) > 0 {
+		for iNdEx := len(m.Labels) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Labels[iNdEx].Size()
+				i -= size
+				if _, err := m.Labels[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintDeletePlan(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ChunkDetails) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ChunkDetails) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ChunkDetails) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.PartiallyDeletedInterval != nil {
+		{
+			size, err := m.PartiallyDeletedInterval.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintDeletePlan(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.ID) > 0 {
+		i -= len(m.ID)
+		copy(dAtA[i:], m.ID)
+		i = encodeVarintDeletePlan(dAtA, i, uint64(len(m.ID)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Interval) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Interval) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Interval) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.EndTimestampMs != 0 {
+		i = encodeVarintDeletePlan(dAtA, i, uint64(m.EndTimestampMs))
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.StartTimestampMs != 0 {
+		i = encodeVarintDeletePlan(dAtA, i, uint64(m.StartTimestampMs))
+		i--
+		dAtA[i] = 0x8
+	}
+	return len(dAtA) - i, nil
+}
+
+func encodeVarintDeletePlan(dAtA []byte, offset int, v uint64) int {
+	offset -= sovDeletePlan(v)
+	base := offset
+	for v >= 1<<7 {
+		dAtA[offset] = uint8(v&0x7f | 0x80)
+		v >>= 7
+		offset++
+	}
+	dAtA[offset] = uint8(v)
+	return base
+}
+func (m *DeletePlan) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.PlanInterval != nil {
+		l = m.PlanInterval.Size()
+		n += 1 + l + sovDeletePlan(uint64(l))
+	}
+	if len(m.ChunksGroup) > 0 {
+		for _, e := range m.ChunksGroup {
+			l = e.Size()
+			n += 1 + l + sovDeletePlan(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *ChunksGroup) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Labels) > 0 {
+		for _, e := range m.Labels {
+			l = e.Size()
+			n += 1 + l + sovDeletePlan(uint64(l))
+		}
+	}
+	if len(m.Chunks) > 0 {
+		for _, e := range m.Chunks {
+			l = e.Size()
+			n += 1 + l + sovDeletePlan(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *ChunkDetails) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.ID)
+	if l > 0 {
+		n += 1 + l + sovDeletePlan(uint64(l))
+	}
+	if m.PartiallyDeletedInterval != nil {
+		l = m.PartiallyDeletedInterval.Size()
+		n += 1 + l + sovDeletePlan(uint64(l))
+	}
+	return n
+}
+
+func (m *Interval) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.StartTimestampMs != 0 {
+		n += 1 + sovDeletePlan(uint64(m.StartTimestampMs))
+	}
+	if m.EndTimestampMs != 0 {
+		n += 1 + sovDeletePlan(uint64(m.EndTimestampMs))
+	}
+	return n
+}
+
+func sovDeletePlan(x uint64) (n int) {
+	return (math_bits.Len64(x|1) + 6) / 7
+}
+func sozDeletePlan(x uint64) (n int) {
+	return sovDeletePlan(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (this *DeletePlan) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForChunksGroup := "[]ChunksGroup{"
+	for _, f := range this.ChunksGroup {
+		repeatedStringForChunksGroup += strings.Replace(strings.Replace(f.String(), "ChunksGroup", "ChunksGroup", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForChunksGroup += "}"
+	s := strings.Join([]string{`&DeletePlan{`,
+		`PlanInterval:` + strings.Replace(this.PlanInterval.String(), "Interval", "Interval", 1) + `,`,
+		`ChunksGroup:` + repeatedStringForChunksGroup + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ChunksGroup) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForChunks := "[]ChunkDetails{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(strings.Replace(f.String(), "ChunkDetails", "ChunkDetails", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForChunks += "}"
+	s := strings.Join([]string{`&ChunksGroup{`,
+		`Labels:` + fmt.Sprintf("%v", this.Labels) + `,`,
+		`Chunks:` + repeatedStringForChunks + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ChunkDetails) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&ChunkDetails{`,
+		`ID:` + fmt.Sprintf("%v", this.ID) + `,`,
+		`PartiallyDeletedInterval:` + strings.Replace(this.PartiallyDeletedInterval.String(), "Interval", "Interval", 1) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *Interval) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Interval{`,
+		`StartTimestampMs:` + fmt.Sprintf("%v", this.StartTimestampMs) + `,`,
+		`EndTimestampMs:` + fmt.Sprintf("%v", this.EndTimestampMs) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func valueToStringDeletePlan(v interface{}) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("*%v", pv)
+}
+func (m *DeletePlan) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowDeletePlan
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DeletePlan: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DeletePlan: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PlanInterval", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.PlanInterval == nil {
+				m.PlanInterval = &Interval{}
+			}
+			if err := m.PlanInterval.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ChunksGroup", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.ChunksGroup = append(m.ChunksGroup, ChunksGroup{})
+			if err := m.ChunksGroup[len(m.ChunksGroup)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipDeletePlan(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ChunksGroup) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowDeletePlan
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ChunksGroup: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ChunksGroup: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Labels", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Labels = append(m.Labels, github_com_cortexproject_cortex_pkg_ingester_client.LabelAdapter{})
+			if err := m.Labels[len(m.Labels)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Chunks", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Chunks = append(m.Chunks, ChunkDetails{})
+			if err := m.Chunks[len(m.Chunks)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipDeletePlan(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ChunkDetails) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowDeletePlan
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ChunkDetails: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ChunkDetails: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.ID = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field PartiallyDeletedInterval", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.PartiallyDeletedInterval == nil {
+				m.PartiallyDeletedInterval = &Interval{}
+			}
+			if err := m.PartiallyDeletedInterval.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipDeletePlan(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Interval) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowDeletePlan
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Interval: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Interval: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field StartTimestampMs", wireType)
+			}
+			m.StartTimestampMs = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.StartTimestampMs |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field EndTimestampMs", wireType)
+			}
+			m.EndTimestampMs = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.EndTimestampMs |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		default:
+			iNdEx = preIndex
+			skippy, err := skipDeletePlan(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthDeletePlan
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func skipDeletePlan(dAtA []byte) (n int, err error) {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return 0, ErrIntOverflowDeletePlan
+			}
+			if iNdEx >= l {
+				return 0, io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		wireType := int(wire & 0x7)
+		switch wireType {
+		case 0:
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				iNdEx++
+				if dAtA[iNdEx-1] < 0x80 {
+					break
+				}
+			}
+			return iNdEx, nil
+		case 1:
+			iNdEx += 8
+			return iNdEx, nil
+		case 2:
+			var length int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowDeletePlan
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				length |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if length < 0 {
+				return 0, ErrInvalidLengthDeletePlan
+			}
+			iNdEx += length
+			if iNdEx < 0 {
+				return 0, ErrInvalidLengthDeletePlan
+			}
+			return iNdEx, nil
+		case 3:
+			for {
+				var innerWire uint64
+				var start int = iNdEx
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return 0, ErrIntOverflowDeletePlan
+					}
+					if iNdEx >= l {
+						return 0, io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					innerWire |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				innerWireType := int(innerWire & 0x7)
+				if innerWireType == 4 {
+					break
+				}
+				next, err := skipDeletePlan(dAtA[start:])
+				if err != nil {
+					return 0, err
+				}
+				iNdEx = start + next
+				if iNdEx < 0 {
+					return 0, ErrInvalidLengthDeletePlan
+				}
+			}
+			return iNdEx, nil
+		case 4:
+			return iNdEx, nil
+		case 5:
+			iNdEx += 4
+			return iNdEx, nil
+		default:
+			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+		}
+	}
+	panic("unreachable")
+}
+
+var (
+	ErrInvalidLengthDeletePlan = fmt.Errorf("proto: negative length found during unmarshaling")
+	ErrIntOverflowDeletePlan   = fmt.Errorf("proto: integer overflow")
+)
diff --git a/purger/delete_plan.proto b/purger/delete_plan.proto
new file mode 100644
index 0000000000000..2eaf1182103b0
--- /dev/null
+++ b/purger/delete_plan.proto
@@ -0,0 +1,34 @@
+syntax = "proto3";
+
+package purgeplan;
+
+option go_package = "purger";
+
+import "github.com/gogo/protobuf/gogoproto/gogo.proto";
+import "github.com/cortexproject/cortex/pkg/ingester/client/cortex.proto";
+
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+
+// DeletePlan holds all the chunks that are supposed to be deleted within an interval(usually a day)
+// This Proto file is used just for storing Delete Plans in proto format.
+message DeletePlan {
+  Interval plan_interval = 1;
+  repeated ChunksGroup chunks_group = 2 [(gogoproto.nullable) = false];
+}
+
+// ChunksGroup holds ChunkDetails and Labels for a group of chunks which have same series ID
+message ChunksGroup {
+  repeated cortex.LabelPair labels = 1 [(gogoproto.nullable) = false, (gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/ingester/client.LabelAdapter"];
+  repeated ChunkDetails chunks = 2 [(gogoproto.nullable) = false];
+}
+
+message ChunkDetails {
+  string ID = 1;
+  Interval partially_deleted_interval = 2;
+}
+
+message Interval {
+  int64 start_timestamp_ms = 1;
+  int64 end_timestamp_ms = 2;
+}
diff --git a/purger/purger.go b/purger/purger.go
new file mode 100644
index 0000000000000..838bd13ab97ff
--- /dev/null
+++ b/purger/purger.go
@@ -0,0 +1,553 @@
+package purger
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"sync"
+	"time"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/gogo/protobuf/proto"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+const millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
+
+type deleteRequestWithLogger struct {
+	chunk.DeleteRequest
+	logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this
+}
+
+// Config holds config for DataPurger
+type Config struct {
+	Enable          bool   `yaml:"enable"`
+	NumWorkers      int    `yaml:"num_workers"`
+	ObjectStoreType string `yaml:"object_store_type"`
+}
+
+// RegisterFlags registers CLI flags for Config
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.BoolVar(&cfg.Enable, "purger.enable", false, "Enable purger to allow deletion of series. Be aware that Delete series feature is still experimental")
+	f.IntVar(&cfg.NumWorkers, "purger.num-workers", 2, "Number of workers executing delete plans in parallel")
+	f.StringVar(&cfg.ObjectStoreType, "purger.object-store-type", "", "Name of the object store to use for storing delete plans")
+}
+
+type workerJob struct {
+	planNo          int
+	userID          string
+	deleteRequestID string
+	logger          log.Logger
+}
+
+// DataPurger does the purging of data which is requested to be deleted
+type DataPurger struct {
+	cfg          Config
+	deleteStore  *chunk.DeleteStore
+	chunkStore   chunk.Store
+	objectClient chunk.ObjectClient
+
+	executePlansChan chan deleteRequestWithLogger
+	workerJobChan    chan workerJob
+
+	// we would only allow processing of singe delete request at a time since delete requests touching same chunks could change the chunk IDs of partially deleted chunks
+	// and break the purge plan for other requests
+	inProcessRequestIDs    map[string]string
+	inProcessRequestIDsMtx sync.RWMutex
+
+	pendingPlansCount    map[string]int // per request pending plan count
+	pendingPlansCountMtx sync.Mutex
+
+	quit chan struct{}
+	wg   sync.WaitGroup
+}
+
+// NewDataPurger creates a new DataPurger
+func NewDataPurger(cfg Config, deleteStore *chunk.DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*DataPurger, error) {
+	dataPurger := DataPurger{
+		cfg:                 cfg,
+		deleteStore:         deleteStore,
+		chunkStore:          chunkStore,
+		objectClient:        storageClient,
+		executePlansChan:    make(chan deleteRequestWithLogger, 50),
+		workerJobChan:       make(chan workerJob, 50),
+		inProcessRequestIDs: map[string]string{},
+		pendingPlansCount:   map[string]int{},
+		quit:                make(chan struct{}),
+	}
+
+	return &dataPurger, nil
+}
+
+// Run keeps pulling delete requests for planning after initializing necessary things
+func (dp *DataPurger) Run() {
+	dp.wg.Add(1)
+	defer dp.wg.Done()
+
+	pullDeleteRequestsToPlanDeletesTicker := time.NewTicker(time.Hour)
+	defer pullDeleteRequestsToPlanDeletesTicker.Stop()
+
+	for {
+		select {
+		case <-pullDeleteRequestsToPlanDeletesTicker.C:
+			err := dp.pullDeleteRequestsToPlanDeletes()
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
+			}
+		case <-dp.quit:
+			return
+		}
+	}
+}
+
+// Init starts workers, scheduler and then loads in process delete requests
+func (dp *DataPurger) Init() error {
+	dp.runWorkers()
+	go dp.jobScheduler()
+
+	return dp.loadInprocessDeleteRequests()
+}
+
+// Stop stops all background workers/loops
+func (dp *DataPurger) Stop() {
+	close(dp.quit)
+	dp.wg.Wait()
+}
+
+func (dp *DataPurger) workerJobCleanup(job workerJob) {
+	err := dp.removeDeletePlan(context.Background(), job.userID, job.deleteRequestID, job.planNo)
+	if err != nil {
+		level.Error(job.logger).Log("msg", "error removing delete plan",
+			"plan_no", job.planNo, "err", err)
+		return
+	}
+
+	dp.pendingPlansCountMtx.Lock()
+	dp.pendingPlansCount[job.deleteRequestID]--
+
+	if dp.pendingPlansCount[job.deleteRequestID] == 0 {
+		level.Info(job.logger).Log("msg", "finished execution of all plans, cleaning up and updating status of request")
+
+		err := dp.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, chunk.StatusProcessed)
+		if err != nil {
+			level.Error(job.logger).Log("msg", "error updating delete request status to process", "err", err)
+		}
+
+		delete(dp.pendingPlansCount, job.deleteRequestID)
+		dp.pendingPlansCountMtx.Unlock()
+
+		dp.inProcessRequestIDsMtx.Lock()
+		delete(dp.inProcessRequestIDs, job.userID)
+		dp.inProcessRequestIDsMtx.Unlock()
+	} else {
+		dp.pendingPlansCountMtx.Unlock()
+	}
+}
+
+// we send all the delete plans to workerJobChan
+func (dp *DataPurger) jobScheduler() {
+	dp.wg.Add(1)
+	defer dp.wg.Done()
+
+	for {
+		select {
+		case req := <-dp.executePlansChan:
+			numPlans := numPlans(req.StartTime, req.EndTime)
+			level.Info(req.logger).Log("msg", "sending jobs to workers for purging data", "num_jobs", numPlans)
+
+			dp.pendingPlansCountMtx.Lock()
+			dp.pendingPlansCount[req.RequestID] = numPlans
+			dp.pendingPlansCountMtx.Unlock()
+
+			for i := 0; i < numPlans; i++ {
+				dp.workerJobChan <- workerJob{planNo: i, userID: req.UserID,
+					deleteRequestID: req.RequestID, logger: req.logger}
+			}
+		case <-dp.quit:
+			close(dp.workerJobChan)
+			return
+		}
+	}
+}
+
+func (dp *DataPurger) runWorkers() {
+	for i := 0; i < dp.cfg.NumWorkers; i++ {
+		dp.wg.Add(1)
+		go dp.worker()
+	}
+}
+
+func (dp *DataPurger) worker() {
+	defer dp.wg.Done()
+
+	for job := range dp.workerJobChan {
+		err := dp.executePlan(job.userID, job.deleteRequestID, job.planNo, job.logger)
+		if err != nil {
+			level.Error(job.logger).Log("msg", "error executing delete plan",
+				"plan_no", job.planNo, "err", err)
+			continue
+		}
+
+		dp.workerJobCleanup(job)
+	}
+}
+
+func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger log.Logger) error {
+	logger = log.With(logger, "plan_no", planNo)
+
+	plan, err := dp.getDeletePlan(context.Background(), userID, requestID, planNo)
+	if err != nil {
+		if err == chunk.ErrStorageObjectNotFound {
+			level.Info(logger).Log("msg", "plan not found, must have been executed already")
+			// this means plan was already executed and got removed. Do nothing.
+			return nil
+		}
+		return err
+	}
+
+	level.Info(logger).Log("msg", "executing plan")
+
+	ctx := user.InjectOrgID(context.Background(), userID)
+
+	for i := range plan.ChunksGroup {
+		level.Debug(logger).Log("msg", "deleting chunks", "labels", plan.ChunksGroup[i].Labels)
+
+		for _, chunkDetails := range plan.ChunksGroup[i].Chunks {
+			chunkRef, err := chunk.ParseExternalKey(userID, chunkDetails.ID)
+			if err != nil {
+				return err
+			}
+
+			var partiallyDeletedInterval *model.Interval = nil
+			if chunkDetails.PartiallyDeletedInterval != nil {
+				partiallyDeletedInterval = &model.Interval{
+					Start: model.Time(chunkDetails.PartiallyDeletedInterval.StartTimestampMs),
+					End:   model.Time(chunkDetails.PartiallyDeletedInterval.EndTimestampMs),
+				}
+			}
+
+			err = dp.chunkStore.DeleteChunk(ctx, chunkRef.From, chunkRef.Through, chunkRef.UserID,
+				chunkDetails.ID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels), partiallyDeletedInterval)
+			if err != nil {
+				if isMissingChunkErr(err) {
+					level.Error(logger).Log("msg", "chunk not found for deletion. We may have already deleted it",
+						"chunk_id", chunkDetails.ID)
+					continue
+				}
+				return err
+			}
+		}
+
+		level.Debug(logger).Log("msg", "deleting series", "labels", plan.ChunksGroup[i].Labels)
+
+		// this is mostly required to clean up series ids from series store
+		err := dp.chunkStore.DeleteSeriesIDs(ctx, model.Time(plan.PlanInterval.StartTimestampMs), model.Time(plan.PlanInterval.EndTimestampMs),
+			userID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels))
+		if err != nil {
+			return err
+		}
+	}
+
+	level.Info(logger).Log("msg", "finished execution of plan")
+
+	return nil
+}
+
+// we need to load all in process delete requests on startup to finish them first
+func (dp *DataPurger) loadInprocessDeleteRequests() error {
+	requestsWithBuildingPlanStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusBuildingPlan)
+	if err != nil {
+		return err
+	}
+
+	for _, deleteRequest := range requestsWithBuildingPlanStatus {
+		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+
+		level.Info(req.logger).Log("msg", "loaded in process delete requests with status building plan")
+
+		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		err := dp.buildDeletePlan(req)
+		if err != nil {
+			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
+		}
+
+		level.Info(req.logger).Log("msg", "sending delete request for execution")
+		dp.executePlansChan <- req
+	}
+
+	requestsWithDeletingStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusDeleting)
+	if err != nil {
+		return err
+	}
+
+	for _, deleteRequest := range requestsWithDeletingStatus {
+		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+		level.Info(req.logger).Log("msg", "loaded in process delete requests with status deleting")
+
+		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		dp.executePlansChan <- req
+	}
+
+	return nil
+}
+
+// pullDeleteRequestsToPlanDeletes pulls delete requests which do not have their delete plans built yet and sends them for building delete plans
+// after pulling delete requests for building plans, it updates its status to StatusBuildingPlan status to avoid picking this up again next time
+func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
+	deleteRequests, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusReceived)
+	if err != nil {
+		return err
+	}
+
+	for _, deleteRequest := range deleteRequests {
+		if deleteRequest.CreatedAt.Add(24 * time.Hour).After(model.Now()) {
+			continue
+		}
+
+		dp.inProcessRequestIDsMtx.RLock()
+		inprocessDeleteRequstID := dp.inProcessRequestIDs[deleteRequest.UserID]
+		dp.inProcessRequestIDsMtx.RUnlock()
+
+		if inprocessDeleteRequstID != "" {
+			level.Debug(util.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
+				"inprocess_request_id", inprocessDeleteRequstID,
+				"skipped_request_id", deleteRequest.RequestID, "user_id", deleteRequest.UserID)
+			continue
+		}
+
+		err = dp.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, chunk.StatusBuildingPlan)
+		if err != nil {
+			return err
+		}
+
+		dp.inProcessRequestIDsMtx.Lock()
+		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		dp.inProcessRequestIDsMtx.Unlock()
+
+		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+
+		level.Info(req.logger).Log("msg", "building plan for a new delete request")
+
+		err := dp.buildDeletePlan(req)
+		if err != nil {
+			// We do not want to remove this delete request from inProcessRequestIDs to make sure
+			// we do not move multiple deleting requests in deletion process.
+			// None of the other delete requests from the user would be considered for processing until then.
+			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
+			return err
+		}
+
+		level.Info(req.logger).Log("msg", "sending delete request for execution")
+		dp.executePlansChan <- req
+	}
+
+	return nil
+}
+
+// buildDeletePlan builds per day delete plan for given delete requests.
+// A days plan will include chunk ids and labels of all the chunks which are supposed to be deleted.
+// Chunks are grouped together by labels to avoid storing labels repetitively.
+// After building delete plans it updates status of delete request to StatusDeleting and sends it for execution
+func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
+	ctx := context.Background()
+	ctx = user.InjectOrgID(ctx, req.UserID)
+
+	perDayTimeRange := splitByDay(req.StartTime, req.EndTime)
+	level.Info(req.logger).Log("msg", "building delete plan", "num_plans", len(perDayTimeRange))
+
+	plans := make([][]byte, len(perDayTimeRange))
+	for i, planRange := range perDayTimeRange {
+		chunksGroups := []ChunksGroup{}
+
+		for _, selector := range req.Selectors {
+			matchers, err := promql.ParseMetricSelector(selector)
+			if err != nil {
+				return err
+			}
+
+			// ToDo: remove duplicate chunks
+			chunks, err := dp.chunkStore.Get(ctx, req.UserID, planRange.Start, planRange.End, matchers...)
+			if err != nil {
+				return err
+			}
+
+			chunksGroups = append(chunksGroups, groupChunks(chunks, req.StartTime, req.EndTime)...)
+		}
+
+		plan := DeletePlan{
+			PlanInterval: &Interval{
+				StartTimestampMs: int64(planRange.Start),
+				EndTimestampMs:   int64(planRange.End),
+			},
+			ChunksGroup: chunksGroups,
+		}
+
+		pb, err := proto.Marshal(&plan)
+		if err != nil {
+			return err
+		}
+
+		plans[i] = pb
+	}
+
+	err := dp.putDeletePlans(ctx, req.UserID, req.RequestID, plans)
+	if err != nil {
+		return err
+	}
+
+	err = dp.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, chunk.StatusDeleting)
+	if err != nil {
+		return err
+	}
+
+	level.Info(req.logger).Log("msg", "built delete plans", "num_plans", len(perDayTimeRange))
+
+	return nil
+}
+
+func (dp *DataPurger) putDeletePlans(ctx context.Context, userID, requestID string, plans [][]byte) error {
+	for i, plan := range plans {
+		objectKey := buildObjectKeyForPlan(userID, requestID, i)
+
+		err := dp.objectClient.PutObject(ctx, objectKey, bytes.NewReader(plan))
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+func (dp *DataPurger) getDeletePlan(ctx context.Context, userID, requestID string, planNo int) (*DeletePlan, error) {
+	objectKey := buildObjectKeyForPlan(userID, requestID, planNo)
+
+	readCloser, err := dp.objectClient.GetObject(ctx, objectKey)
+	if err != nil {
+		return nil, err
+	}
+
+	defer readCloser.Close()
+
+	buf, err := ioutil.ReadAll(readCloser)
+	if err != nil {
+		return nil, err
+	}
+
+	var plan DeletePlan
+	err = proto.Unmarshal(buf, &plan)
+	if err != nil {
+		return nil, err
+	}
+
+	return &plan, nil
+}
+
+func (dp *DataPurger) removeDeletePlan(ctx context.Context, userID, requestID string, planNo int) error {
+	objectKey := buildObjectKeyForPlan(userID, requestID, planNo)
+	return dp.objectClient.DeleteObject(ctx, objectKey)
+}
+
+// returns interval per plan
+func splitByDay(start, end model.Time) []model.Interval {
+	numOfDays := numPlans(start, end)
+
+	perDayTimeRange := make([]model.Interval, numOfDays)
+	startOfNextDay := model.Time(((int64(start) / millisecondPerDay) + 1) * millisecondPerDay)
+	perDayTimeRange[0] = model.Interval{Start: start, End: startOfNextDay - 1}
+
+	for i := 1; i < numOfDays; i++ {
+		interval := model.Interval{Start: startOfNextDay}
+		startOfNextDay += model.Time(millisecondPerDay)
+		interval.End = startOfNextDay - 1
+		perDayTimeRange[i] = interval
+	}
+
+	perDayTimeRange[numOfDays-1].End = end
+
+	return perDayTimeRange
+}
+
+func numPlans(start, end model.Time) int {
+	// rounding down start to start of the day
+	if start%model.Time(millisecondPerDay) != 0 {
+		start = model.Time((int64(start) / millisecondPerDay) * millisecondPerDay)
+	}
+
+	// rounding up end to end of the day
+	if end%model.Time(millisecondPerDay) != 0 {
+		end = model.Time((int64(end)/millisecondPerDay)*millisecondPerDay + millisecondPerDay)
+	}
+
+	return int(int64(end-start) / millisecondPerDay)
+}
+
+// groups chunks together by unique label sets i.e all the chunks with same labels would be stored in a group
+// chunk details are stored in groups for each unique label set to avoid storing them repetitively for each chunk
+func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time) []ChunksGroup {
+	metricToChunks := make(map[string]ChunksGroup)
+
+	for _, chk := range chunks {
+		// chunk.Metric are assumed to be sorted which should give same value from String() for same series.
+		// If they stop being sorted then in the worst case we would lose the benefit of grouping chunks to avoid storing labels repetitively.
+		metricString := chk.Metric.String()
+		group, ok := metricToChunks[metricString]
+		if !ok {
+			group = ChunksGroup{Labels: client.FromLabelsToLabelAdapters(chk.Metric)}
+		}
+
+		chunkDetails := ChunkDetails{ID: chk.ExternalKey()}
+
+		if deleteFrom > chk.From || deleteThrough < chk.Through {
+			partiallyDeletedInterval := Interval{StartTimestampMs: int64(chk.From), EndTimestampMs: int64(chk.Through)}
+
+			if deleteFrom > chk.From {
+				partiallyDeletedInterval.StartTimestampMs = int64(deleteFrom)
+			}
+
+			if deleteThrough < chk.Through {
+				partiallyDeletedInterval.EndTimestampMs = int64(deleteThrough)
+			}
+			chunkDetails.PartiallyDeletedInterval = &partiallyDeletedInterval
+		}
+
+		group.Chunks = append(group.Chunks, chunkDetails)
+		metricToChunks[metricString] = group
+	}
+
+	chunksGroups := make([]ChunksGroup, 0, len(metricToChunks))
+
+	for _, group := range metricToChunks {
+		chunksGroups = append(chunksGroups, group)
+	}
+
+	return chunksGroups
+}
+
+func isMissingChunkErr(err error) bool {
+	if err == chunk.ErrStorageObjectNotFound {
+		return true
+	}
+	if promqlStorageErr, ok := err.(promql.ErrStorage); ok && promqlStorageErr.Err == chunk.ErrStorageObjectNotFound {
+		return true
+	}
+
+	return false
+}
+
+func buildObjectKeyForPlan(userID, requestID string, planNo int) string {
+	return fmt.Sprintf("%s:%s/%d", userID, requestID, planNo)
+}
+
+func makeDeleteRequestWithLogger(deleteRequest chunk.DeleteRequest, l log.Logger) deleteRequestWithLogger {
+	logger := log.With(l, "user_id", deleteRequest.UserID, "request_id", deleteRequest.RequestID)
+	return deleteRequestWithLogger{deleteRequest, logger}
+}
diff --git a/purger/purger_test.go b/purger/purger_test.go
new file mode 100644
index 0000000000000..f1eb9e8fd8cd5
--- /dev/null
+++ b/purger/purger_test.go
@@ -0,0 +1,363 @@
+package purger
+
+import (
+	"context"
+	"fmt"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
+)
+
+const (
+	userID        = "userID"
+	modelTimeDay  = model.Time(millisecondPerDay)
+	modelTimeHour = model.Time(time.Hour / time.Millisecond)
+)
+
+func setupStoresAndPurger(t *testing.T) (*chunk.DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger) {
+	deleteStore, err := testutils.SetupTestDeleteStore()
+	require.NoError(t, err)
+
+	chunkStore, err := testutils.SetupTestChunkStore()
+	require.NoError(t, err)
+
+	storageClient, err := testutils.SetupTestObjectStore()
+	require.NoError(t, err)
+
+	var cfg Config
+	flagext.DefaultValues(&cfg)
+
+	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient)
+	require.NoError(t, err)
+
+	return deleteStore, chunkStore, storageClient, dataPurger
+}
+
+func buildChunks(from, through model.Time, batchSize int) ([]chunk.Chunk, error) {
+	var chunks []chunk.Chunk
+	for ; from < through; from = from.Add(time.Hour) {
+		// creating batchSize number of chunks chunks per hour
+		_, testChunks, err := testutils.CreateChunks(0, batchSize, from, from.Add(time.Hour))
+		if err != nil {
+			return nil, err
+		}
+
+		chunks = append(chunks, testChunks...)
+	}
+
+	return chunks, nil
+}
+
+var purgePlanTestCases = []struct {
+	name                              string
+	chunkStoreDataInterval            model.Interval
+	deleteRequestInterval             model.Interval
+	expectedNumberOfPlans             int
+	numChunksToDelete                 int
+	firstChunkPartialDeletionInterval *Interval
+	lastChunkPartialDeletionInterval  *Interval
+	batchSize                         int
+}{
+	{
+		name:                   "deleting whole hour from a one hour data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeHour},
+		deleteRequestInterval:  model.Interval{End: modelTimeHour},
+		expectedNumberOfPlans:  1,
+		numChunksToDelete:      1,
+	},
+	{
+		name:                   "deleting half a day from a days data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeDay},
+		deleteRequestInterval:  model.Interval{End: model.Time(millisecondPerDay / 2)},
+		expectedNumberOfPlans:  1,
+		numChunksToDelete:      12 + 1, // one chunk for each hour + end time touches chunk at boundary
+		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(millisecondPerDay / 2),
+			EndTimestampMs: int64(millisecondPerDay / 2)},
+	},
+	{
+		name:                   "deleting a full day from 2 days data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeDay * 2},
+		deleteRequestInterval:  model.Interval{End: modelTimeDay},
+		expectedNumberOfPlans:  1,
+		numChunksToDelete:      24 + 1, // one chunk for each hour + end time touches chunk at boundary
+		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: millisecondPerDay,
+			EndTimestampMs: millisecondPerDay},
+	},
+	{
+		name:                   "deleting 2 days partially from 2 days data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeDay * 2},
+		deleteRequestInterval: model.Interval{Start: model.Time(millisecondPerDay / 2),
+			End: model.Time(millisecondPerDay + millisecondPerDay/2)},
+		expectedNumberOfPlans: 2,
+		numChunksToDelete:     24 + 2, // one chunk for each hour + start and end time touches chunk at boundary
+		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(millisecondPerDay / 2),
+			EndTimestampMs: int64(millisecondPerDay / 2)},
+		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: millisecondPerDay + millisecondPerDay/2,
+			EndTimestampMs: millisecondPerDay + millisecondPerDay/2},
+	},
+	{
+		name:                   "deleting 2 days partially, not aligned with hour, from 2 days data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeDay * 2},
+		deleteRequestInterval: model.Interval{Start: model.Time(millisecondPerDay / 2).Add(time.Minute),
+			End: model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute)},
+		expectedNumberOfPlans: 2,
+		numChunksToDelete:     24, // one chunk for each hour, no chunks touched at boundary
+		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(model.Time(millisecondPerDay / 2).Add(time.Minute)),
+			EndTimestampMs: int64(model.Time(millisecondPerDay / 2).Add(time.Hour))},
+		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Hour)),
+			EndTimestampMs: int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute))},
+	},
+	{
+		name:                   "deleting data outside of period of existing data",
+		chunkStoreDataInterval: model.Interval{End: modelTimeDay},
+		deleteRequestInterval:  model.Interval{Start: model.Time(millisecondPerDay * 2), End: model.Time(millisecondPerDay * 3)},
+		expectedNumberOfPlans:  1,
+		numChunksToDelete:      0,
+	},
+	{
+		name:                   "building multi-day chunk and deleting part of it from first day",
+		chunkStoreDataInterval: model.Interval{Start: modelTimeDay.Add(-30 * time.Minute), End: modelTimeDay.Add(30 * time.Minute)},
+		deleteRequestInterval:  model.Interval{Start: modelTimeDay.Add(-30 * time.Minute), End: modelTimeDay.Add(-15 * time.Minute)},
+		expectedNumberOfPlans:  1,
+		numChunksToDelete:      1,
+		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(modelTimeDay.Add(-30 * time.Minute)),
+			EndTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute))},
+	},
+}
+
+func TestDataPurger_BuildPlan(t *testing.T) {
+	for _, tc := range purgePlanTestCases {
+		for batchSize := 1; batchSize <= 5; batchSize++ {
+			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
+				deleteStore, chunkStore, storageClient, dataPurger := setupStoresAndPurger(t)
+				defer func() {
+					dataPurger.Stop()
+					chunkStore.Stop()
+				}()
+
+				chunks, err := buildChunks(tc.chunkStoreDataInterval.Start, tc.chunkStoreDataInterval.End, batchSize)
+				require.NoError(t, err)
+
+				require.NoError(t, chunkStore.Put(context.Background(), chunks))
+
+				err = deleteStore.AddDeleteRequest(context.Background(), userID, tc.deleteRequestInterval.Start,
+					tc.deleteRequestInterval.End, []string{"foo"})
+				require.NoError(t, err)
+
+				deleteRequests, err := deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
+				require.NoError(t, err)
+
+				deleteRequest := deleteRequests[0]
+				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+
+				err = dataPurger.buildDeletePlan(requestWithLogger)
+				require.NoError(t, err)
+				planPath := fmt.Sprintf("%s:%s/", userID, deleteRequest.RequestID)
+
+				plans, err := storageClient.List(context.Background(), planPath)
+				require.NoError(t, err)
+				require.Equal(t, tc.expectedNumberOfPlans, len(plans))
+
+				numPlans := tc.expectedNumberOfPlans
+				var nilPurgePlanInterval *Interval
+				numChunks := 0
+
+				chunkIDs := map[string]struct{}{}
+
+				for i := range plans {
+					deletePlan, err := dataPurger.getDeletePlan(context.Background(), userID, deleteRequest.RequestID, i)
+					require.NoError(t, err)
+					for _, chunksGroup := range deletePlan.ChunksGroup {
+						numChunksInGroup := len(chunksGroup.Chunks)
+						chunks := chunksGroup.Chunks
+						numChunks += numChunksInGroup
+
+						sort.Slice(chunks, func(i, j int) bool {
+							chunkI, err := chunk.ParseExternalKey(userID, chunks[i].ID)
+							require.NoError(t, err)
+
+							chunkJ, err := chunk.ParseExternalKey(userID, chunks[j].ID)
+							require.NoError(t, err)
+
+							return chunkI.From < chunkJ.From
+						})
+
+						for j, chunkDetails := range chunksGroup.Chunks {
+							chunkIDs[chunkDetails.ID] = struct{}{}
+							if i == 0 && j == 0 && tc.firstChunkPartialDeletionInterval != nil {
+								require.Equal(t, *tc.firstChunkPartialDeletionInterval, *chunkDetails.PartiallyDeletedInterval)
+							} else if i == numPlans-1 && j == numChunksInGroup-1 && tc.lastChunkPartialDeletionInterval != nil {
+								require.Equal(t, *tc.lastChunkPartialDeletionInterval, *chunkDetails.PartiallyDeletedInterval)
+							} else {
+								require.Equal(t, nilPurgePlanInterval, chunkDetails.PartiallyDeletedInterval)
+							}
+						}
+					}
+				}
+
+				require.Equal(t, tc.numChunksToDelete*batchSize, len(chunkIDs))
+			})
+		}
+	}
+}
+
+func TestDataPurger_ExecutePlan(t *testing.T) {
+	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	for _, tc := range purgePlanTestCases {
+		for batchSize := 1; batchSize <= 5; batchSize++ {
+			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
+				deleteStore, chunkStore, _, dataPurger := setupStoresAndPurger(t)
+				defer func() {
+					dataPurger.Stop()
+					chunkStore.Stop()
+				}()
+
+				chunks, err := buildChunks(tc.chunkStoreDataInterval.Start, tc.chunkStoreDataInterval.End, batchSize)
+				require.NoError(t, err)
+
+				require.NoError(t, chunkStore.Put(context.Background(), chunks))
+
+				// calculate the expected number of chunks that should be there in store before deletion
+				chunkStoreDataIntervalTotal := tc.chunkStoreDataInterval.End - tc.chunkStoreDataInterval.Start
+				numChunksExpected := int(chunkStoreDataIntervalTotal / model.Time(time.Hour/time.Millisecond))
+
+				// see if store actually has expected number of chunks
+				chunks, err = chunkStore.Get(context.Background(), userID, tc.chunkStoreDataInterval.Start, tc.chunkStoreDataInterval.End, fooMetricNameMatcher...)
+				require.NoError(t, err)
+				require.Equal(t, numChunksExpected*batchSize, len(chunks))
+
+				// delete chunks
+				err = deleteStore.AddDeleteRequest(context.Background(), userID, tc.deleteRequestInterval.Start,
+					tc.deleteRequestInterval.End, []string{"foo"})
+				require.NoError(t, err)
+
+				// get the delete request
+				deleteRequests, err := deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
+				require.NoError(t, err)
+
+				deleteRequest := deleteRequests[0]
+				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+				err = dataPurger.buildDeletePlan(requestWithLogger)
+				require.NoError(t, err)
+
+				// execute all the plans
+				for i := 0; i < tc.expectedNumberOfPlans; i++ {
+					err := dataPurger.executePlan(userID, deleteRequest.RequestID, i, requestWithLogger.logger)
+					require.NoError(t, err)
+				}
+
+				// calculate the expected number of chunks that should be there in store after deletion
+				numChunksExpectedAfterDeletion := 0
+				for chunkStart := tc.chunkStoreDataInterval.Start; chunkStart < tc.chunkStoreDataInterval.End; chunkStart += modelTimeHour {
+					numChunksExpectedAfterDeletion += len(getNonDeletedIntervals(model.Interval{Start: chunkStart, End: chunkStart + modelTimeHour}, tc.deleteRequestInterval))
+				}
+
+				// see if store actually has expected number of chunks
+				chunks, err = chunkStore.Get(context.Background(), userID, tc.chunkStoreDataInterval.Start, tc.chunkStoreDataInterval.End, fooMetricNameMatcher...)
+				require.NoError(t, err)
+				require.Equal(t, numChunksExpectedAfterDeletion*batchSize, len(chunks))
+			})
+		}
+	}
+}
+
+func TestDataPurger_Restarts(t *testing.T) {
+	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	deleteStore, chunkStore, storageClient, dataPurger := setupStoresAndPurger(t)
+	defer func() {
+		chunkStore.Stop()
+	}()
+
+	chunks, err := buildChunks(0, model.Time(0).Add(10*24*time.Hour), 1)
+	require.NoError(t, err)
+
+	require.NoError(t, chunkStore.Put(context.Background(), chunks))
+
+	// delete chunks
+	err = deleteStore.AddDeleteRequest(context.Background(), userID, model.Time(0).Add(24*time.Hour),
+		model.Time(0).Add(8*24*time.Hour), []string{"foo"})
+	require.NoError(t, err)
+
+	// get the delete request
+	deleteRequests, err := deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
+	require.NoError(t, err)
+
+	deleteRequest := deleteRequests[0]
+	requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+	err = dataPurger.buildDeletePlan(requestWithLogger)
+	require.NoError(t, err)
+
+	// stop the existing purger
+	dataPurger.Stop()
+
+	// create a new purger to check whether it picks up in process delete requests
+	var cfg Config
+	flagext.DefaultValues(&cfg)
+	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient)
+	require.NoError(t, err)
+
+	// load in process delete requests by calling Run
+	require.NoError(t, newPurger.Init())
+
+	defer func() {
+		newPurger.Stop()
+	}()
+
+	// lets wait till purger finishes execution of in process delete requests
+	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
+	defer cancel()
+
+	for ctx.Err() == nil {
+		newPurger.inProcessRequestIDsMtx.RLock()
+
+		if len(newPurger.inProcessRequestIDs) == 0 {
+			newPurger.inProcessRequestIDsMtx.RUnlock()
+			break
+		}
+
+		newPurger.inProcessRequestIDsMtx.RUnlock()
+		time.Sleep(time.Second / 2)
+	}
+	require.NoError(t, ctx.Err())
+
+	// check whether data got deleted from the store since delete request has been processed
+	chunks, err = chunkStore.Get(context.Background(), userID, 0, model.Time(0).Add(10*24*time.Hour), fooMetricNameMatcher...)
+	require.NoError(t, err)
+
+	// we are deleting 7 days out of 10 so there should we 3 days data left in store which means 72 chunks
+	require.Equal(t, 72, len(chunks))
+
+	deleteRequests, err = deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
+	require.NoError(t, err)
+	require.Equal(t, chunk.StatusProcessed, deleteRequests[0].Status)
+}
+
+func getNonDeletedIntervals(originalInterval, deletedInterval model.Interval) []model.Interval {
+	nonDeletedIntervals := []model.Interval{}
+	if deletedInterval.Start > originalInterval.Start {
+		nonDeletedIntervals = append(nonDeletedIntervals, model.Interval{Start: originalInterval.Start, End: deletedInterval.Start - 1})
+	}
+
+	if deletedInterval.End < originalInterval.End {
+		nonDeletedIntervals = append(nonDeletedIntervals, model.Interval{Start: deletedInterval.End + 1, End: originalInterval.End})
+	}
+
+	return nonDeletedIntervals
+}
diff --git a/purger/request_handler.go b/purger/request_handler.go
new file mode 100644
index 0000000000000..c7435d02e9d5a
--- /dev/null
+++ b/purger/request_handler.go
@@ -0,0 +1,108 @@
+package purger
+
+import (
+	"encoding/json"
+	"fmt"
+	"net/http"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/queryrange"
+)
+
+// DeleteRequestHandler provides handlers for delete requests
+type DeleteRequestHandler struct {
+	deleteStore *chunk.DeleteStore
+}
+
+// NewDeleteRequestHandler creates a DeleteRequestHandler
+func NewDeleteRequestHandler(deleteStore *chunk.DeleteStore) (*DeleteRequestHandler, error) {
+	deleteMgr := DeleteRequestHandler{
+		deleteStore: deleteStore,
+	}
+
+	return &deleteMgr, nil
+}
+
+// AddDeleteRequestHandler handles addition of new delete request
+func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	params := r.URL.Query()
+	match := params["match[]"]
+	if len(match) == 0 {
+		http.Error(w, "selectors not set", http.StatusBadRequest)
+		return
+	}
+
+	for i := range match {
+		_, err := promql.ParseMetricSelector(match[i])
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+	}
+
+	startParam := params.Get("start")
+	startTime := int64(0)
+	if startParam != "" {
+		startTime, err = queryrange.ParseTime(startParam)
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+	}
+
+	endParam := params.Get("end")
+	endTime := int64(model.Now())
+
+	if endParam != "" {
+		endTime, err = queryrange.ParseTime(endParam)
+		if err != nil {
+			http.Error(w, err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		if endTime > int64(model.Now()) {
+			http.Error(w, "deletes in future not allowed", http.StatusBadRequest)
+			return
+		}
+	}
+
+	if startTime > endTime {
+		http.Error(w, "start time can't be greater than end time", http.StatusBadRequest)
+		return
+	}
+
+	if err := dm.deleteStore.AddDeleteRequest(ctx, userID, model.Time(startTime), model.Time(endTime), match); err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+	}
+}
+
+// GetAllDeleteRequestsHandler handles get all delete requests
+func (dm *DeleteRequestHandler) GetAllDeleteRequestsHandler(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	deleteRequests, err := dm.deleteStore.GetAllDeleteRequestsForUser(ctx, userID)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	if err := json.NewEncoder(w).Encode(deleteRequests); err != nil {
+		http.Error(w, fmt.Sprintf("Error marshalling response: %v", err), http.StatusInternalServerError)
+	}
+}
diff --git a/storage/chunk_client_test.go b/storage/chunk_client_test.go
index 21eeb791de65d..c3682a49147f4 100644
--- a/storage/chunk_client_test.go
+++ b/storage/chunk_client_test.go
@@ -25,7 +25,7 @@ func TestChunksBasic(t *testing.T) {
 		// Write a few batches of chunks.
 		written := []string{}
 		for i := 0; i < 5; i++ {
-			keys, chunks, err := testutils.CreateChunks(i, batchSize, model.Now())
+			keys, chunks, err := testutils.CreateChunks(i, batchSize, model.Now().Add(-time.Hour), model.Now())
 			require.NoError(t, err)
 			written = append(written, keys...)
 			err = client.PutChunks(ctx, chunks)
diff --git a/storage/factory.go b/storage/factory.go
index db9f11e55f702..b358b2f38f4d6 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -61,6 +61,8 @@ type Config struct {
 	IndexCacheValidity time.Duration
 
 	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config,omitempty"`
+
+	DeleteStoreConfig chunk.DeleteStoreConfig `yaml:"delete_store,omitempty"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -72,6 +74,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.CassandraStorageConfig.RegisterFlags(f)
 	cfg.BoltDBConfig.RegisterFlags(f)
 	cfg.FSConfig.RegisterFlags(f)
+	cfg.DeleteStoreConfig.RegisterFlags(f)
 
 	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or tsdb. Be aware tsdb is experimental and shouldn't be used in production.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
@@ -243,3 +246,15 @@ func NewBucketClient(storageConfig Config) (chunk.BucketClient, error) {
 
 	return nil, nil
 }
+
+// NewObjectClient makes a new StorageClient of the desired types.
+func NewObjectClient(name string, cfg Config) (chunk.ObjectClient, error) {
+	switch name {
+	case "inmemory":
+		return chunk.NewMockStorage(), nil
+	case "filesystem":
+		return local.NewFSObjectClient(cfg.FSConfig)
+	default:
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: filesystem", name)
+	}
+}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index b539548f4ca95..33256307c9384 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -11,11 +11,11 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
-
 	"github.com/cortexproject/cortex/pkg/chunk"
+	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/validation"
 )
 
 const (
@@ -62,11 +62,11 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client,
 }
 
 // CreateChunks creates some chunks for testing
-func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chunk.Chunk, error) {
+func CreateChunks(startIndex, batchSize int, from model.Time, through model.Time) ([]string, []chunk.Chunk, error) {
 	keys := []string{}
 	chunks := []chunk.Chunk{}
 	for j := 0; j < batchSize; j++ {
-		chunk := dummyChunkFor(start, labels.Labels{
+		chunk := dummyChunkFor(from, through, labels.Labels{
 			{Name: model.MetricNameLabel, Value: "foo"},
 			{Name: "index", Value: strconv.Itoa(startIndex*batchSize + j)},
 		})
@@ -76,22 +76,26 @@ func CreateChunks(startIndex, batchSize int, start model.Time) ([]string, []chun
 	return keys, chunks, nil
 }
 
-func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
+func dummyChunkFor(from, through model.Time, metric labels.Labels) chunk.Chunk {
 	cs := promchunk.New()
-	_, err := cs.Add(model.SamplePair{Timestamp: now, Value: 0})
-	if err != nil {
-		panic(err)
+
+	for ts := from; ts <= through; ts = ts.Add(15 * time.Second) {
+		_, err := cs.Add(model.SamplePair{Timestamp: ts, Value: 0})
+		if err != nil {
+			panic(err)
+		}
 	}
+
 	chunk := chunk.NewChunk(
 		userID,
 		client.Fingerprint(metric),
 		metric,
 		cs,
-		now.Add(-time.Hour),
-		now,
+		from,
+		through,
 	)
 	// Force checksum calculation.
-	err = chunk.Encode()
+	err := chunk.Encode()
 	if err != nil {
 		panic(err)
 	}
@@ -101,3 +105,60 @@ func dummyChunkFor(now model.Time, metric labels.Labels) chunk.Chunk {
 func TeardownFixture(t *testing.T, fixture Fixture) {
 	require.NoError(t, fixture.Teardown())
 }
+
+func SetupTestChunkStore() (chunk.Store, error) {
+	var (
+		tbmConfig chunk.TableManagerConfig
+		schemaCfg = chunk.DefaultSchemaConfig("", "v10", 0)
+	)
+	flagext.DefaultValues(&tbmConfig)
+	storage := chunk.NewMockStorage()
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	err = tableManager.SyncTables(context.Background())
+	if err != nil {
+		return nil, err
+	}
+
+	var limits validation.Limits
+	flagext.DefaultValues(&limits)
+	limits.MaxQueryLength = 30 * 24 * time.Hour
+	overrides, err := validation.NewOverrides(limits, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var storeCfg chunk.StoreConfig
+	flagext.DefaultValues(&storeCfg)
+
+	store := chunk.NewCompositeStore()
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides)
+	if err != nil {
+		return nil, err
+	}
+
+	return store, nil
+}
+
+func SetupTestDeleteStore() (*chunk.DeleteStore, error) {
+	var deleteStoreConfig chunk.DeleteStoreConfig
+	flagext.DefaultValues(&deleteStoreConfig)
+
+	mockStorage := chunk.NewMockStorage()
+
+	err := mockStorage.CreateTable(context.Background(), chunk.TableDesc{
+		Name: deleteStoreConfig.RequestsTableName,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return chunk.NewDeleteStore(deleteStoreConfig, mockStorage)
+}
+
+func SetupTestObjectStore() (chunk.ObjectClient, error) {
+	return chunk.NewMockStorage(), nil
+}

From 31a7eb450d147b4fd514a94fcd4ce903f7c156f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 4 Mar 2020 08:50:06 +0100
Subject: [PATCH 464/660] Convert Cortex components to services (#2166)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* First step towards Cortex using services model. Only Server module converted.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted runtimeconfig.Manager to service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added /services page.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted memberlistKV to service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed runtimeconfig tests.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted Ring to service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Log waiting for other module to initialize.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted overrides to service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted client pool to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert ring lifecycler into a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted HA Tracker to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted Distributor to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Handle nil from wrappedService call.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Explain why Server uses service and not a wrappedService.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make service from Store.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert ingester to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert querier initialization into a service. This isn't full
conversion, but more work would be required to fully use services
for querier. Left TODO comment instead.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Listen for module failures, and log them.

Also log when all services start successfully.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert blockQueryable and UserStore into a services.

UserStore now does initial sync in Starting state.
blockQueryable doesn't return querier until it has finished starting.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Wait a little before shutdown.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted queryFrontend to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Logging

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert TableManager to service

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert Ruler to service

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert Configs to service

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Convert AlertManager to service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Renamed init methods back.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed tests.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Converted Compactor to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Polishing, comments.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comments.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Lint comments.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Stop server only after all other modules have finished.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Don't send more jobs to lifecycler loop, if it's not running anymore.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Don't stop Server until other modules have stopped.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed Compactor from All target. It was meant to be for testing only.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* More comments around startup logic.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* moduleServiceWrapper doesn't need full Cortex, only serviceMap

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Messages

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix outdated comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Start lifecycler in starting functions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed comment. Return lifecycler's failure case, if any, as error.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix lifecycler usage.

Pass independent context to lifecycler, so that it doesn't stop too early.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix test.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed obsolete waiting code. Only log error if it is real error.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Renamed servManager to subservices.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Addressing review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix test.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation errors after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Extracted code that creates server service into separate file.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added some helper methods.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use helper methods to simplify service creation.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Helper functions for manager.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use helper functions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixes, use helper functions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixes, use helper functions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* comment

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Helper function

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Use helper functions to reduce amount of code.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added tests for helper functions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed imports

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Simplify code.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Stop and wait until stopped.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Imports

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Manage compaction and shipper via subservices manager.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Improve error message.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added unit test for Cortex initialization and module dependencies.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comments, return errors.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Unified /ready handlers into one, that reflects state of all services.

It also uses ingester's check (if configured) to limit rate of starting
ingesters.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added //nolint:errcheck to `defer services.StopAndAwaitTerminated(...)` calls.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix http handler logic. Also renamed it.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Address review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* One more test... since Shutdown already stops Run, no need to call Stop().

We can use Stop in the test instead.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed integration test, old versions of Cortex didn't have /ready probe.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make lint happy.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Mention /ready for all services.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Wrap "not running" error into promql.ErrStorage.

That makes API handler to return HTTP code 500 (server error)
instead of 422 (client error).

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Expose http port via method on HTTPService.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Print number of services in each state.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix comment and remove obsolete line.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compile errors after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Rebased and converted data purger to a service.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Pass context to the bucket client.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/purger.go      | 62 ++++++++++++++++----------------------
 purger/purger_test.go | 13 ++++----
 table_manager.go      | 70 ++++++++++++++++++++-----------------------
 3 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index 838bd13ab97ff..f63dfc2a6a9d4 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -19,6 +19,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
 const millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
@@ -51,6 +52,8 @@ type workerJob struct {
 
 // DataPurger does the purging of data which is requested to be deleted
 type DataPurger struct {
+	services.Service
+
 	cfg          Config
 	deleteStore  *chunk.DeleteStore
 	chunkStore   chunk.Store
@@ -67,8 +70,7 @@ type DataPurger struct {
 	pendingPlansCount    map[string]int // per request pending plan count
 	pendingPlansCountMtx sync.Mutex
 
-	quit chan struct{}
-	wg   sync.WaitGroup
+	wg sync.WaitGroup
 }
 
 // NewDataPurger creates a new DataPurger
@@ -82,45 +84,39 @@ func NewDataPurger(cfg Config, deleteStore *chunk.DeleteStore, chunkStore chunk.
 		workerJobChan:       make(chan workerJob, 50),
 		inProcessRequestIDs: map[string]string{},
 		pendingPlansCount:   map[string]int{},
-		quit:                make(chan struct{}),
 	}
 
+	dataPurger.Service = services.NewTimerService(time.Hour, dataPurger.init, dataPurger.runOneIteration, dataPurger.stop)
 	return &dataPurger, nil
 }
 
 // Run keeps pulling delete requests for planning after initializing necessary things
-func (dp *DataPurger) Run() {
-	dp.wg.Add(1)
-	defer dp.wg.Done()
-
-	pullDeleteRequestsToPlanDeletesTicker := time.NewTicker(time.Hour)
-	defer pullDeleteRequestsToPlanDeletesTicker.Stop()
-
-	for {
-		select {
-		case <-pullDeleteRequestsToPlanDeletesTicker.C:
-			err := dp.pullDeleteRequestsToPlanDeletes()
-			if err != nil {
-				level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
-			}
-		case <-dp.quit:
-			return
-		}
+func (dp *DataPurger) runOneIteration(ctx context.Context) error {
+	err := dp.pullDeleteRequestsToPlanDeletes()
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
 	}
+	// Don't return error here, or Timer service will stop.
+	return nil
 }
 
-// Init starts workers, scheduler and then loads in process delete requests
-func (dp *DataPurger) Init() error {
-	dp.runWorkers()
-	go dp.jobScheduler()
+// init starts workers, scheduler and then loads in process delete requests
+func (dp *DataPurger) init(ctx context.Context) error {
+	for i := 0; i < dp.cfg.NumWorkers; i++ {
+		dp.wg.Add(1)
+		go dp.worker()
+	}
+
+	dp.wg.Add(1)
+	go dp.jobScheduler(ctx)
 
 	return dp.loadInprocessDeleteRequests()
 }
 
-// Stop stops all background workers/loops
-func (dp *DataPurger) Stop() {
-	close(dp.quit)
+// Stop waits until all background tasks stop.
+func (dp *DataPurger) stop() error {
 	dp.wg.Wait()
+	return nil
 }
 
 func (dp *DataPurger) workerJobCleanup(job workerJob) {
@@ -154,8 +150,7 @@ func (dp *DataPurger) workerJobCleanup(job workerJob) {
 }
 
 // we send all the delete plans to workerJobChan
-func (dp *DataPurger) jobScheduler() {
-	dp.wg.Add(1)
+func (dp *DataPurger) jobScheduler(ctx context.Context) {
 	defer dp.wg.Done()
 
 	for {
@@ -172,20 +167,13 @@ func (dp *DataPurger) jobScheduler() {
 				dp.workerJobChan <- workerJob{planNo: i, userID: req.UserID,
 					deleteRequestID: req.RequestID, logger: req.logger}
 			}
-		case <-dp.quit:
+		case <-ctx.Done():
 			close(dp.workerJobChan)
 			return
 		}
 	}
 }
 
-func (dp *DataPurger) runWorkers() {
-	for i := 0; i < dp.cfg.NumWorkers; i++ {
-		dp.wg.Add(1)
-		go dp.worker()
-	}
-}
-
 func (dp *DataPurger) worker() {
 	defer dp.wg.Done()
 
diff --git a/purger/purger_test.go b/purger/purger_test.go
index f1eb9e8fd8cd5..3f86d9e9fac22 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -15,6 +15,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
 const (
@@ -140,7 +141,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
 				deleteStore, chunkStore, storageClient, dataPurger := setupStoresAndPurger(t)
 				defer func() {
-					dataPurger.Stop()
+					dataPurger.StopAsync()
 					chunkStore.Stop()
 				}()
 
@@ -221,7 +222,7 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
 				deleteStore, chunkStore, _, dataPurger := setupStoresAndPurger(t)
 				defer func() {
-					dataPurger.Stop()
+					dataPurger.StopAsync()
 					chunkStore.Stop()
 				}()
 
@@ -305,7 +306,7 @@ func TestDataPurger_Restarts(t *testing.T) {
 	require.NoError(t, err)
 
 	// stop the existing purger
-	dataPurger.Stop()
+	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), dataPurger))
 
 	// create a new purger to check whether it picks up in process delete requests
 	var cfg Config
@@ -314,11 +315,9 @@ func TestDataPurger_Restarts(t *testing.T) {
 	require.NoError(t, err)
 
 	// load in process delete requests by calling Run
-	require.NoError(t, newPurger.Init())
+	require.NoError(t, services.StartAndAwaitRunning(context.Background(), newPurger))
 
-	defer func() {
-		newPurger.Stop()
-	}()
+	defer newPurger.StopAsync()
 
 	// lets wait till purger finishes execution of in process delete requests
 	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
diff --git a/table_manager.go b/table_manager.go
index 1be5fd1c2035b..362581ee4c77f 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -8,7 +8,6 @@ import (
 	"math/rand"
 	"sort"
 	"strings"
-	"sync"
 	"time"
 
 	"github.com/go-kit/kit/log/level"
@@ -18,6 +17,7 @@ import (
 	"github.com/weaveworks/common/mtime"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
 const (
@@ -116,13 +116,15 @@ func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
 
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
+	services.Service
+
 	client       TableClient
 	cfg          TableManagerConfig
 	schemaCfg    SchemaConfig
 	maxChunkAge  time.Duration
-	done         chan struct{}
-	wait         sync.WaitGroup
 	bucketClient BucketClient
+
+	bucketRetentionLoop services.Service
 }
 
 // NewTableManager makes a new TableManager
@@ -137,36 +139,36 @@ func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge
 		}
 	}
 
-	return &TableManager{
+	tm := &TableManager{
 		cfg:          cfg,
 		schemaCfg:    schemaCfg,
 		maxChunkAge:  maxChunkAge,
 		client:       tableClient,
-		done:         make(chan struct{}),
 		bucketClient: objectClient,
-	}, nil
+	}
+
+	tm.Service = services.NewBasicService(tm.starting, tm.loop, tm.stopping)
+	return tm, nil
 }
 
 // Start the TableManager
-func (m *TableManager) Start() {
-	m.wait.Add(1)
-	go m.loop()
-
+func (m *TableManager) starting(ctx context.Context) error {
 	if m.bucketClient != nil && m.cfg.RetentionPeriod != 0 && m.cfg.RetentionDeletesEnabled {
-		m.wait.Add(1)
-		go m.bucketRetentionLoop()
+		m.bucketRetentionLoop = services.NewTimerService(bucketRetentionEnforcementInterval, nil, m.bucketRetentionIteration, nil)
+		return services.StartAndAwaitRunning(ctx, m.bucketRetentionLoop)
 	}
+	return nil
 }
 
 // Stop the TableManager
-func (m *TableManager) Stop() {
-	close(m.done)
-	m.wait.Wait()
+func (m *TableManager) stopping() error {
+	if m.bucketRetentionLoop != nil {
+		return services.StopAndAwaitTerminated(context.Background(), m.bucketRetentionLoop)
+	}
+	return nil
 }
 
-func (m *TableManager) loop() {
-	defer m.wait.Done()
-
+func (m *TableManager) loop(ctx context.Context) error {
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
@@ -179,8 +181,8 @@ func (m *TableManager) loop() {
 	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
 	select {
 	case <-time.After(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval)))):
-	case <-m.done:
-		return
+	case <-ctx.Done():
+		return nil
 	}
 
 	for {
@@ -191,30 +193,22 @@ func (m *TableManager) loop() {
 			}); err != nil {
 				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
 			}
-		case <-m.done:
-			return
+		case <-ctx.Done():
+			return nil
 		}
 	}
 }
 
-func (m *TableManager) bucketRetentionLoop() {
-	defer m.wait.Done()
-
-	ticker := time.NewTicker(bucketRetentionEnforcementInterval)
-	defer ticker.Stop()
-
-	for {
-		select {
-		case <-ticker.C:
-			err := m.bucketClient.DeleteChunksBefore(context.Background(), mtime.Now().Add(-m.cfg.RetentionPeriod))
+// single iteration of bucket retention loop
+func (m *TableManager) bucketRetentionIteration(ctx context.Context) error {
+	err := m.bucketClient.DeleteChunksBefore(ctx, mtime.Now().Add(-m.cfg.RetentionPeriod))
 
-			if err != nil {
-				level.Error(util.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
-			}
-		case <-m.done:
-			return
-		}
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
 	}
+
+	// don't return error, otherwise timer service would stop.
+	return nil
 }
 
 // SyncTables will calculate the tables expected to exist, create those that do

From 981a04ee0337899334ba603e50387b8defc93f92 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 4 Mar 2020 09:54:24 +0100
Subject: [PATCH 465/660] Upgrade Prometheus client to 1.5.0 (#2205)

* Upgraded Prometheus client to 1.5.0

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed test-exporter compilation issue

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed integration tests compilation

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 aws/metrics_autoscaling_test.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 244ae23a7e40f..6bd9b73d8863d 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -7,7 +7,6 @@ import (
 	"time"
 
 	"github.com/pkg/errors"
-	"github.com/prometheus/client_golang/api"
 	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
 
@@ -430,7 +429,7 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	}
 }
 
-func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, api.Warnings, error) {
+func (m *mockPrometheus) QueryRange(ctx context.Context, query string, r promV1.Range) (model.Value, promV1.Warnings, error) {
 	if len(m.rangeValues) == 0 {
 		return nil, nil, errors.New("mockPrometheus.QueryRange: out of values")
 	}

From 11fe44b288154c472e4c6ab26c0e5f6f1bfd69dc Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 5 Mar 2020 10:25:50 +0100
Subject: [PATCH 466/660] add metrics for number of failures in creation and
 deletion of tables (#2213)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_table_client_test.go | 14 +++---
 aws/metrics_autoscaling_test.go   |  4 +-
 chunk_store_test.go               |  2 +-
 table_manager.go                  | 75 ++++++++++++++++++++++++-------
 table_manager_test.go             | 14 +++---
 testutils/testutils.go            |  4 +-
 6 files changed, 77 insertions(+), 36 deletions(-)

diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
index 1c2bce578ed84..7725e433baf5c 100644
--- a/aws/dynamodb_table_client_test.go
+++ b/aws/dynamodb_table_client_test.go
@@ -167,7 +167,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 
 	// Check tables are created with autoscale
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -186,7 +186,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -205,7 +205,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.OutCooldown = 200
 		tbm.ChunkTables.WriteScale.TargetValue = 90.0
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -225,7 +225,7 @@ func TestTableManagerAutoScaling(t *testing.T) {
 		tbm.IndexTables.WriteScale.Enabled = false
 		tbm.ChunkTables.WriteScale.Enabled = false
 
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -270,7 +270,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -286,7 +286,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables are autoscaled even if there are less than the limit.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -303,7 +303,7 @@ func TestTableManagerInactiveAutoScaling(t *testing.T) {
 
 	// Check inactive tables past the limit do not autoscale but the latest N do.
 	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 6bd9b73d8863d..39e1e544905a0 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -58,7 +58,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureProvisionConfig(2, chunkWriteScale, inactiveWriteScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -216,7 +216,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureReadProvisionConfig(chunkReadScale, inactiveReadScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index e391c9f06ac65..dde220990cee3 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -70,7 +70,7 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 	require.NoError(t, err)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil)
+	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil, nil)
 	require.NoError(t, err)
 
 	err = tableManager.SyncTables(context.Background())
diff --git a/table_manager.go b/table_manager.go
index 362581ee4c77f..85a480c9385d0 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -13,6 +13,7 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
+	tsdberrors "github.com/prometheus/prometheus/tsdb/errors"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
 
@@ -27,23 +28,49 @@ const (
 	bucketRetentionEnforcementInterval = 12 * time.Hour
 )
 
-var (
-	syncTableDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
+type tableManagerMetrics struct {
+	syncTableDuration *prometheus.HistogramVec
+	tableCapacity     *prometheus.GaugeVec
+	createFailures    prometheus.Gauge
+	deleteFailures    prometheus.Gauge
+}
+
+func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
+	m := tableManagerMetrics{}
+	m.syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_sync_tables_seconds",
 		Help:      "Time spent doing SyncTables.",
 		Buckets:   prometheus.DefBuckets,
-	}, []string{"operation", "status_code"}))
-	tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+	}, []string{"operation", "status_code"})
+
+	m.tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "dynamo_table_capacity_units",
 		Help:      "Per-table DynamoDB capacity, measured in DynamoDB capacity units.",
 	}, []string{"op", "table"})
-)
 
-func init() {
-	prometheus.MustRegister(tableCapacity)
-	syncTableDuration.Register()
+	m.createFailures = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "table_manager_create_failures",
+		Help:      "Number of table creation failures during the last table-manager reconciliation",
+	})
+	m.deleteFailures = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "table_manager_delete_failures",
+		Help:      "Number of table deletion failures during the last table-manager reconciliation",
+	})
+
+	if r != nil {
+		r.MustRegister(
+			m.syncTableDuration,
+			m.tableCapacity,
+			m.createFailures,
+			m.deleteFailures,
+		)
+	}
+
+	return &m
 }
 
 // TableManagerConfig holds config for a TableManager
@@ -123,13 +150,14 @@ type TableManager struct {
 	schemaCfg    SchemaConfig
 	maxChunkAge  time.Duration
 	bucketClient BucketClient
+	metrics      *tableManagerMetrics
 
 	bucketRetentionLoop services.Service
 }
 
 // NewTableManager makes a new TableManager
 func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient,
-	objectClient BucketClient) (*TableManager, error) {
+	objectClient BucketClient, registerer prometheus.Registerer) (*TableManager, error) {
 
 	if cfg.RetentionPeriod != 0 {
 		// Assume the newest config is the one to use for validation of retention
@@ -145,6 +173,7 @@ func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge
 		maxChunkAge:  maxChunkAge,
 		client:       tableClient,
 		bucketClient: objectClient,
+		metrics:      newTableManagerMetrics(registerer),
 	}
 
 	tm.Service = services.NewBasicService(tm.starting, tm.loop, tm.stopping)
@@ -172,7 +201,7 @@ func (m *TableManager) loop(ctx context.Context) error {
 	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
 	defer ticker.Stop()
 
-	if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", syncTableDuration, instrument.ErrorCode, func(ctx context.Context) error {
+	if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error {
 		return m.SyncTables(ctx)
 	}); err != nil {
 		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
@@ -188,7 +217,7 @@ func (m *TableManager) loop(ctx context.Context) error {
 	for {
 		select {
 		case <-ticker.C:
-			if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", syncTableDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error {
 				return m.SyncTables(ctx)
 			}); err != nil {
 				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
@@ -358,17 +387,26 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 }
 
 func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
+	numFailures := 0
+	merr := tsdberrors.MultiError{}
+
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "creating table", "table", desc.Name)
 		err := m.client.CreateTable(ctx, desc)
 		if err != nil {
-			return err
+			numFailures++
+			merr.Add(err)
 		}
 	}
-	return nil
+
+	m.metrics.createFailures.Set(float64(numFailures))
+	return merr.Err()
 }
 
 func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDesc) error {
+	numFailures := 0
+	merr := tsdberrors.MultiError{}
+
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)
 		if !m.cfg.RetentionDeletesEnabled {
@@ -378,10 +416,13 @@ func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDes
 		level.Info(util.Logger).Log("msg", "deleting table", "table", desc.Name)
 		err := m.client.DeleteTable(ctx, desc.Name)
 		if err != nil {
-			return err
+			numFailures++
+			merr.Add(err)
 		}
 	}
-	return nil
+
+	m.metrics.deleteFailures.Set(float64(numFailures))
+	return merr.Err()
 }
 
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
@@ -392,8 +433,8 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 			return err
 		}
 
-		tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead))
-		tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
+		m.metrics.tableCapacity.WithLabelValues(readLabel, expected.Name).Set(float64(current.ProvisionedRead))
+		m.metrics.tableCapacity.WithLabelValues(writeLabel, expected.Name).Set(float64(current.ProvisionedWrite))
 
 		if m.cfg.ThroughputUpdatesDisabled {
 			continue
diff --git a/table_manager_test.go b/table_manager_test.go
index ed91ea46eae92..80415a60bf57a 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -173,7 +173,7 @@ func TestTableManager(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -358,7 +358,7 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -446,7 +446,7 @@ func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
 			InactiveThroughputOnDemandMode: true,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -529,7 +529,7 @@ func TestTableManagerTags(t *testing.T) {
 				IndexTables: PeriodicTableConfig{},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -553,7 +553,7 @@ func TestTableManagerTags(t *testing.T) {
 				},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -607,7 +607,7 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 			InactiveReadThroughput:     inactiveRead,
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -730,6 +730,6 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 
 	// Test table manager retention not multiple of periodic config
 	tbmConfig.RetentionPeriod++
-	_, err = NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil)
+	_, err = NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
 	require.Error(t, err)
 }
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 33256307c9384..a046cced72341 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -44,7 +44,7 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client,
 		return nil, nil, err
 	}
 
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil, nil)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -113,7 +113,7 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	)
 	flagext.DefaultValues(&tbmConfig)
 	storage := chunk.NewMockStorage()
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil, nil)
 	if err != nil {
 		return nil, err
 	}

From 1a8abfdf2fbe237e165087b1117016f5107fc134 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 9 Mar 2020 11:41:06 +0100
Subject: [PATCH 467/660] services: add failure case to Stopping function
 (#2231)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stopping function now takes failureCase error as parameter.

This allows stopping function to react differently based on
whether running function has failed or not.

In this commit, all stopping functions simply ignore the passed
parameter.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/purger.go | 2 +-
 table_manager.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index f63dfc2a6a9d4..39f53238ec880 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -114,7 +114,7 @@ func (dp *DataPurger) init(ctx context.Context) error {
 }
 
 // Stop waits until all background tasks stop.
-func (dp *DataPurger) stop() error {
+func (dp *DataPurger) stop(_ error) error {
 	dp.wg.Wait()
 	return nil
 }
diff --git a/table_manager.go b/table_manager.go
index 85a480c9385d0..4cc65ad239d89 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -190,7 +190,7 @@ func (m *TableManager) starting(ctx context.Context) error {
 }
 
 // Stop the TableManager
-func (m *TableManager) stopping() error {
+func (m *TableManager) stopping(_ error) error {
 	if m.bucketRetentionLoop != nil {
 		return services.StopAndAwaitTerminated(context.Background(), m.bucketRetentionLoop)
 	}

From c84d0fbc7ac787a090c4b6bfbf83e6a42c0fb69a Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Mon, 9 Mar 2020 14:07:30 +0000
Subject: [PATCH 468/660] Update the default configs for spread flush and
 bigchunks (#2207)

* Make spread flushes and no jitter the defaults for single process.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Make bigchunk encoding the default.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Add to changelog (and reorder a couple of entries.)

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Update CHANGELOG.md

Co-Authored-By: Bryan Boreham <bryan@weave.works>

* Fixed tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: Bryan Boreham <bryan@weave.works>
---
 cache/cache_test.go | 19 +++++++++++++++++--
 encoding/factory.go |  2 +-
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index dcbaaae79727a..ac586c74ec0f1 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -52,9 +52,24 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) {
 		buf, err := c.Encoded()
 		require.NoError(t, err)
 
+		// In order to be able to compare the expected chunk (this one) with the
+		// actual one (the one that will be fetched from the cache) we need to
+		// cleanup the chunk to avoid any internal references mismatch (ie. appender
+		// pointer).
+		cleanChunk := chunk.Chunk{
+			UserID:      c.UserID,
+			Fingerprint: c.Fingerprint,
+			From:        c.From,
+			Through:     c.Through,
+			Checksum:    c.Checksum,
+			ChecksumSet: c.ChecksumSet,
+		}
+		err = cleanChunk.Decode(chunk.NewDecodeContext(), buf)
+		require.NoError(t, err)
+
 		keys = append(keys, c.ExternalKey())
 		bufs = append(bufs, buf)
-		chunks = append(chunks, c)
+		chunks = append(chunks, cleanChunk)
 	}
 
 	cache.Store(context.Background(), keys, bufs)
@@ -75,7 +90,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch
 		require.NoError(t, err)
 		err = c.Decode(chunk.NewDecodeContext(), bufs[0])
 		require.NoError(t, err)
-		require.Equal(t, c, chunks[index])
+		require.Equal(t, chunks[index], c)
 	}
 }
 
diff --git a/encoding/factory.go b/encoding/factory.go
index 5ac314d9d0fde..95f2a61ccdc18 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -15,7 +15,7 @@ type Config struct{}
 
 var (
 	// DefaultEncoding exported for use in unit tests elsewhere
-	DefaultEncoding             = DoubleDelta
+	DefaultEncoding             = Bigchunk
 	alwaysMarshalFullsizeChunks = true
 	bigchunkSizeCapBytes        = 0
 )

From a2ba001e5e455a28d39e935b802d39b935b2e1e2 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 9 Mar 2020 15:43:55 +0100
Subject: [PATCH 469/660] Remove support schema flags, only use config file.
 (#2221)

* Remove support schema flags, only use config file.

Also rename the schema config file flag to something sane.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Update docs to use the schema file.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Deprecate not remove the config flag.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Make integration tests pass?

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Remove support schema flags, only use config file.

Also rename the schema config file flag to something sane.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Update docs to use the schema file.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Deprecate not remove the config flag.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Update docs/configuration/schema-config-reference.md

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed schema config doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixes after rebase

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Remove duplicated entry from CHANGELOG

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 schema_config.go | 134 ++++++-----------------------------------------
 1 file changed, 17 insertions(+), 117 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 0141fe3d9aaf4..ab4cd4247ab8e 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -14,7 +14,6 @@ import (
 	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 const (
@@ -27,6 +26,7 @@ const (
 var (
 	errInvalidSchemaVersion = errors.New("invalid schema version")
 	errInvalidTablePeriod   = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
+	errConfigFileNotSet     = errors.New("schema config file needs to be set")
 )
 
 // PeriodConfig defines the schema and tables to use for a period of time
@@ -69,116 +69,31 @@ func (d *DayTime) UnmarshalYAML(unmarshal func(interface{}) error) error {
 type SchemaConfig struct {
 	Configs []PeriodConfig `yaml:"configs"`
 
-	fileName string
-	legacy   LegacySchemaConfig // if fileName is set then legacy config is ignored
-}
-
-// LegacySchemaConfig lets you configure schema via command-line flags
-type LegacySchemaConfig struct {
-	StorageClient string // aws, gcp, etc.
-
-	// After midnight on this day, we start bucketing indexes by day instead of by
-	// hour.  Only the day matters, not the time within the day.
-	DailyBucketsFrom      flagext.DayValue
-	Base64ValuesFrom      flagext.DayValue
-	V4SchemaFrom          flagext.DayValue
-	V5SchemaFrom          flagext.DayValue
-	V6SchemaFrom          flagext.DayValue
-	V9SchemaFrom          flagext.DayValue
-	BigtableColumnKeyFrom flagext.DayValue
-
-	// Config for the index & chunk tables.
-	OriginalTableName string
-	UsePeriodicTables bool
-	IndexTablesFrom   flagext.DayValue
-	IndexTables       PeriodicTableConfig
-	ChunkTablesFrom   flagext.DayValue
-	ChunkTables       PeriodicTableConfig
+	fileName       string
+	legacyFileName string
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.fileName, "config-yaml", "", "Schema config yaml")
-	cfg.legacy.RegisterFlags(f)
-}
-
-// RegisterFlags adds the flags required to config this to the given FlagSet.
-func (cfg *LegacySchemaConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.StorageClient, "chunk.storage-client", "aws", "Which storage client to use (aws, gcp, cassandra, inmemory).")
-	f.Var(&cfg.DailyBucketsFrom, "dynamodb.daily-buckets-from", "The date (in the format YYYY-MM-DD) of the first day for which DynamoDB index buckets should be day-sized vs. hour-sized.")
-	f.Var(&cfg.Base64ValuesFrom, "dynamodb.base64-buckets-from", "The date (in the format YYYY-MM-DD) after which we will stop querying to non-base64 encoded values.")
-	f.Var(&cfg.V4SchemaFrom, "dynamodb.v4-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v4 schema.")
-	f.Var(&cfg.V5SchemaFrom, "dynamodb.v5-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v5 schema.")
-	f.Var(&cfg.V6SchemaFrom, "dynamodb.v6-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v6 schema.")
-	f.Var(&cfg.V9SchemaFrom, "dynamodb.v9-schema-from", "The date (in the format YYYY-MM-DD) after which we enable v9 schema (Series indexing).")
-	f.Var(&cfg.BigtableColumnKeyFrom, "bigtable.column-key-from", "The date (in the format YYYY-MM-DD) after which we use bigtable column keys.")
-
-	f.StringVar(&cfg.OriginalTableName, "dynamodb.original-table-name", "cortex", "The name of the DynamoDB table used before versioned schemas were introduced.")
-	f.BoolVar(&cfg.UsePeriodicTables, "dynamodb.use-periodic-tables", false, "Should we use periodic tables.")
-
-	f.Var(&cfg.IndexTablesFrom, "dynamodb.periodic-table.from", "Date after which to use periodic tables.")
-	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", "cortex_", f)
-	f.Var(&cfg.ChunkTablesFrom, "dynamodb.chunk-table.from", "Date after which to write chunks to DynamoDB.")
-	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", "cortex_chunks_", f)
+	flag.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file.")
+	// TODO(gouthamve): Add a metric for this.
+	flag.StringVar(&cfg.legacyFileName, "config-yaml", "", "DEPRECATED(use -schema-config-file) The path to the schema config file.")
 }
 
-func (cfg *SchemaConfig) loadFromFlags() error {
-	cfg.Configs = []PeriodConfig{}
-
-	add := func(t string, f model.Time) {
-		cfg.Configs = append(cfg.Configs, PeriodConfig{
-			From:      DayTime{f},
-			Schema:    t,
-			IndexType: cfg.legacy.StorageClient,
-			IndexTables: PeriodicTableConfig{
-				Prefix: cfg.legacy.OriginalTableName,
-				Tags:   cfg.legacy.IndexTables.Tags,
-			},
-		})
-	}
-
-	add("v1", 0)
+// loadFromFile loads the schema config from a yaml file
+func (cfg *SchemaConfig) loadFromFile() error {
+	if cfg.fileName == "" {
+		cfg.fileName = cfg.legacyFileName
 
-	if cfg.legacy.DailyBucketsFrom.IsSet() {
-		add("v2", cfg.legacy.DailyBucketsFrom.Time)
-	}
-	if cfg.legacy.Base64ValuesFrom.IsSet() {
-		add("v3", cfg.legacy.Base64ValuesFrom.Time)
-	}
-	if cfg.legacy.V4SchemaFrom.IsSet() {
-		add("v4", cfg.legacy.V4SchemaFrom.Time)
-	}
-	if cfg.legacy.V5SchemaFrom.IsSet() {
-		add("v5", cfg.legacy.V5SchemaFrom.Time)
-	}
-	if cfg.legacy.V6SchemaFrom.IsSet() {
-		add("v6", cfg.legacy.V6SchemaFrom.Time)
-	}
-	if cfg.legacy.V9SchemaFrom.IsSet() {
-		add("v9", cfg.legacy.V9SchemaFrom.Time)
+		if cfg.legacyFileName != "" {
+			level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag -config-yaml, use -schema-config-file instead")
+		}
 	}
 
-	cfg.ForEachAfter(cfg.legacy.IndexTablesFrom.Time, func(config *PeriodConfig) {
-		config.IndexTables = cfg.legacy.IndexTables
-	})
-	if cfg.legacy.ChunkTablesFrom.IsSet() {
-		cfg.ForEachAfter(cfg.legacy.ChunkTablesFrom.Time, func(config *PeriodConfig) {
-			if config.IndexType == "aws" {
-				config.IndexType = "aws-dynamo"
-			}
-			config.ChunkTables = cfg.legacy.ChunkTables
-		})
-	}
-	if cfg.legacy.BigtableColumnKeyFrom.IsSet() {
-		cfg.ForEachAfter(cfg.legacy.BigtableColumnKeyFrom.Time, func(config *PeriodConfig) {
-			config.IndexType = "gcp-columnkey"
-		})
+	if cfg.fileName == "" {
+		return errConfigFileNotSet
 	}
-	return nil
-}
 
-// loadFromFile loads the schema config from a yaml file
-func (cfg *SchemaConfig) loadFromFile() error {
 	f, err := os.Open(cfg.fileName)
 	if err != nil {
 		return err
@@ -304,16 +219,8 @@ func (cfg *SchemaConfig) Load() error {
 		return nil
 	}
 
-	// Load config from file (if provided), falling back to CLI flags
-	var err error
-
-	if cfg.fileName == "" {
-		err = cfg.loadFromFlags()
-	} else {
-		err = cfg.loadFromFile()
-	}
-
-	if err != nil {
+	// Load config from file.
+	if err := cfg.loadFromFile(); err != nil {
 		return err
 	}
 
@@ -388,13 +295,6 @@ type PeriodicTableConfig struct {
 	Tags   Tags          `yaml:"tags,omitempty"`
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet.
-func (cfg *PeriodicTableConfig) RegisterFlags(argPrefix, tablePrefix string, f *flag.FlagSet) {
-	f.StringVar(&cfg.Prefix, argPrefix+".prefix", tablePrefix, "DynamoDB table prefix for period tables.")
-	f.DurationVar(&cfg.Period, argPrefix+".period", 7*24*time.Hour, "DynamoDB table period.")
-	f.Var(&cfg.Tags, argPrefix+".tag", "Tag (of the form key=value) to be added to all tables under management.")
-}
-
 // AutoScalingConfig for DynamoDB tables.
 type AutoScalingConfig struct {
 	Enabled     bool    `yaml:"enabled,omitempty"`

From 21516ada21a1b60def22d0571a35b3843dc8fdf2 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Thu, 12 Mar 2020 05:25:31 -0400
Subject: [PATCH 470/660] 404 handling and DeleteObject implementations (GCS &
 S3) (#2260)

* add proper 404 handling and delete implementations to object client implementations

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix typo

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 aws/s3_storage_client.go | 30 ++++++++++++++++++++++++++----
 gcp/gcs_object_client.go | 32 ++++++++++++++++++++++++++------
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 339945cf35d47..69efd2dd43c69 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 
 	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
@@ -90,9 +91,23 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 // Stop fulfills the chunk.ObjectClient interface
 func (a *S3ObjectClient) Stop() {}
 
-func (a *S3ObjectClient) DeleteObject(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from S3
-	return chunk.ErrMethodNotImplemented
+// DeleteObject deletes the specified objectKey from the appropriate S3 bucket
+func (a *S3ObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
+	_, err := a.S3.DeleteObject(&s3.DeleteObjectInput{
+		Bucket: aws.String(a.bucketFromKey(objectKey)),
+		Key:    aws.String(objectKey),
+	})
+
+	if err != nil {
+		if aerr, ok := err.(awserr.Error); ok {
+			if aerr.Code() == s3.ErrCodeNoSuchKey {
+				return chunk.ErrStorageObjectNotFound
+			}
+		}
+		return err
+	}
+
+	return nil
 }
 
 // bucketFromKey maps a key to a bucket name
@@ -108,7 +123,8 @@ func (a *S3ObjectClient) bucketFromKey(key string) string {
 	return a.bucketNames[hash%uint32(len(a.bucketNames))]
 }
 
-// Get object from the store
+// GetObject returns a reader for the specified object key from the configured S3 bucket. If the
+// key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
 func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	var resp *s3.GetObjectOutput
 
@@ -123,7 +139,13 @@ func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 		})
 		return err
 	})
+
 	if err != nil {
+		if aerr, ok := err.(awserr.Error); ok {
+			if aerr.Code() == s3.ErrCodeNoSuchKey {
+				return nil, chunk.ErrStorageObjectNotFound
+			}
+		}
 		return nil, err
 	}
 
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 4785e1951a54f..f8703ef69e3c6 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -64,7 +64,8 @@ func (s *GCSObjectClient) Stop() {
 	s.client.Close()
 }
 
-// Get object from the store
+// GetObject returns a reader for the specified object key from the configured GCS bucket. If the
+// key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
 func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
 	if s.cfg.RequestTimeout > 0 {
 		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
@@ -73,10 +74,19 @@ func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.R
 		defer cancel()
 	}
 
-	return s.bucket.Object(objectKey).NewReader(ctx)
+	reader, err := s.bucket.Object(objectKey).NewReader(ctx)
+
+	if err != nil {
+		if err == storage.ErrObjectNotExist {
+			return nil, chunk.ErrStorageObjectNotFound
+		}
+		return nil, err
+	}
+
+	return reader, nil
 }
 
-// Put object into the store
+// PutObject puts the specified bytes into the configured GCS bucket at the provided key
 func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
 	writer := s.bucket.Object(objectKey).NewWriter(ctx)
 	// Default GCSChunkSize is 8M and for each call, 8M is allocated xD
@@ -128,7 +138,17 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stor
 	return storageObjects, nil
 }
 
-func (s *GCSObjectClient) DeleteObject(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from GCS
-	return chunk.ErrMethodNotImplemented
+// DeleteObject deletes the specified object key from the configured GCS bucket. If the
+// key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
+func (s *GCSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
+	err := s.bucket.Object(objectKey).Delete(ctx)
+
+	if err != nil {
+		if err == storage.ErrObjectNotExist {
+			return chunk.ErrStorageObjectNotFound
+		}
+		return err
+	}
+
+	return nil
 }

From 906efdd3903f2fa1e0655038dea73e1a0b4df63d Mon Sep 17 00:00:00 2001
From: Bartlomiej Plotka <bwplotka@gmail.com>
Date: Thu, 12 Mar 2020 14:30:41 +0000
Subject: [PATCH 471/660] e2e: Fixed WaitSumMetrics to fail on non existing
 metric (#2256)

* e2e: Fixed WaitSumMetrics to fail on non existing metric

This is kind of tricky as logically sum of non-existing is 0, but also it's super easy to make a
mistake and put wrong metric and sneakly introduce bug... so I think being strict makes sense here? WDYT?

Alternative is to extend `isExpected func(sums ...float64) bool` to something like `isExpected func(exists bool, sums ...float64) bool`
but I think the proposed simplification makes sense here. (:

Also I think e2e base unit test should be run all the time not only on integration build,
remove the tag from those in main e2e core package.

Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>

* Fixed init of the metrics used in integration tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Moved build tag to `required_docker` to be explicit.

Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 table_manager.go | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 4cc65ad239d89..a401ba3592fdf 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -29,10 +29,11 @@ const (
 )
 
 type tableManagerMetrics struct {
-	syncTableDuration *prometheus.HistogramVec
-	tableCapacity     *prometheus.GaugeVec
-	createFailures    prometheus.Gauge
-	deleteFailures    prometheus.Gauge
+	syncTableDuration  *prometheus.HistogramVec
+	tableCapacity      *prometheus.GaugeVec
+	createFailures     prometheus.Gauge
+	deleteFailures     prometheus.Gauge
+	lastSuccessfulSync prometheus.Gauge
 }
 
 func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
@@ -61,12 +62,19 @@ func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 		Help:      "Number of table deletion failures during the last table-manager reconciliation",
 	})
 
+	m.lastSuccessfulSync = prometheus.NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "table_manager_sync_success_timestamp_seconds",
+		Help:      "Timestamp of the last successful table manager sync.",
+	})
+
 	if r != nil {
 		r.MustRegister(
 			m.syncTableDuration,
 			m.tableCapacity,
 			m.createFailures,
 			m.deleteFailures,
+			m.lastSuccessfulSync,
 		)
 	}
 
@@ -259,7 +267,12 @@ func (m *TableManager) SyncTables(ctx context.Context) error {
 		return err
 	}
 
-	return m.updateTables(ctx, toCheckThroughput)
+	if err := m.updateTables(ctx, toCheckThroughput); err != nil {
+		return err
+	}
+
+	m.metrics.lastSuccessfulSync.SetToCurrentTime()
+	return nil
 }
 
 func (m *TableManager) calculateExpectedTables() []TableDesc {

From a389d1dd34316237367563c19404617e1d4b5090 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 12 Mar 2020 16:41:44 +0100
Subject: [PATCH 472/660] Support 1w, 1y for retention and table period take 2
 (#2252)

* Allow 1w in the period duration.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add support for 1w, 1y to table manager retention

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Update docs and changelog

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Added model.Duration support to doc generator

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fix infinite recursion in UnmarshalYAML.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Walk back integration config changes to support old versions

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Final nits

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 schema_config.go      | 39 ++++++++++++++++++++++++++++++++++++---
 schema_config_test.go | 28 ++++++++++++++++++++++++++++
 table_manager.go      | 41 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index ab4cd4247ab8e..d98f361cb99a3 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -290,9 +290,42 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 
 // PeriodicTableConfig is configuration for a set of time-sharded tables.
 type PeriodicTableConfig struct {
-	Prefix string        `yaml:"prefix"`
-	Period time.Duration `yaml:"period,omitempty"`
-	Tags   Tags          `yaml:"tags,omitempty"`
+	Prefix string
+	Period time.Duration
+	Tags   Tags
+}
+
+// UnmarshalYAML implements the yaml.Unmarshaler interface.
+func (cfg *PeriodicTableConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
+	g := struct {
+		Prefix string         `yaml:"prefix"`
+		Period model.Duration `yaml:"period"`
+		Tags   Tags           `yaml:"tags"`
+	}{}
+	if err := unmarshal(&g); err != nil {
+		return err
+	}
+
+	cfg.Prefix = g.Prefix
+	cfg.Period = time.Duration(g.Period)
+	cfg.Tags = g.Tags
+
+	return nil
+}
+
+// MarshalYAML implements the yaml.Marshaler interface.
+func (cfg PeriodicTableConfig) MarshalYAML() (interface{}, error) {
+	g := &struct {
+		Prefix string         `yaml:"prefix"`
+		Period model.Duration `yaml:"period"`
+		Tags   Tags           `yaml:"tags"`
+	}{
+		Prefix: cfg.Prefix,
+		Period: model.Duration(cfg.Period),
+		Tags:   cfg.Tags,
+	}
+
+	return g, nil
 }
 
 // AutoScalingConfig for DynamoDB tables.
diff --git a/schema_config_test.go b/schema_config_test.go
index cf9eba4a0c3a9..f82a87a06db61 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -7,6 +7,7 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	yaml "gopkg.in/yaml.v2"
 )
 
 func TestHourlyBuckets(t *testing.T) {
@@ -422,3 +423,30 @@ func MustParseDayTime(s string) DayTime {
 	}
 	return DayTime{model.TimeFromUnix(t.Unix())}
 }
+
+func TestPeriodicTableConfigCustomUnmarshalling(t *testing.T) {
+	yamlFile := `prefix: cortex_
+period: 1w
+tags:
+  foo: bar
+`
+
+	cfg := PeriodicTableConfig{}
+	err := yaml.Unmarshal([]byte(yamlFile), &cfg)
+	require.NoError(t, err)
+
+	expectedCfg := PeriodicTableConfig{
+		Prefix: "cortex_",
+		Period: 7 * 24 * time.Hour,
+		Tags: map[string]string{
+			"foo": "bar",
+		},
+	}
+
+	require.Equal(t, expectedCfg, cfg)
+
+	yamlGenerated, err := yaml.Marshal(&cfg)
+	require.NoError(t, err)
+
+	require.Equal(t, yamlFile, string(yamlGenerated))
+}
diff --git a/table_manager.go b/table_manager.go
index a401ba3592fdf..baee41e74bbed 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -90,7 +90,9 @@ type TableManagerConfig struct {
 	RetentionDeletesEnabled bool `yaml:"retention_deletes_enabled"`
 
 	// How far back tables will be kept before they are deleted
-	RetentionPeriod time.Duration `yaml:"retention_period"`
+	RetentionPeriod time.Duration `yaml:"-"`
+	// This is so that we can accept 1w, 1y in the YAML.
+	RetentionPeriodModel model.Duration `yaml:"retention_period"`
 
 	// Period with which the table manager will poll for tables.
 	DynamoDBPollInterval time.Duration `yaml:"dynamodb_poll_interval"`
@@ -102,6 +104,41 @@ type TableManagerConfig struct {
 	ChunkTables ProvisionConfig `yaml:"chunk_tables_provisioning"`
 }
 
+// UnmarshalYAML implements the yaml.Unmarshaler interface. To support RetentionPeriod.
+func (cfg *TableManagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
+
+	// If we call unmarshal on TableManagerConfig, it will call UnmarshalYAML leading to infinite recursion.
+	// To make unmarshal fill the plain data struct rather than calling UnmarshalYAML
+	// again, we have to hide it using a type indirection.
+	type plain TableManagerConfig
+	if err := unmarshal((*plain)(cfg)); err != nil {
+		return err
+	}
+
+	if cfg.RetentionPeriodModel > 0 {
+		cfg.RetentionPeriod = time.Duration(cfg.RetentionPeriodModel)
+	}
+
+	return nil
+}
+
+// MarshalYAML implements the yaml.Marshaler interface. To support RetentionPeriod.
+func (cfg *TableManagerConfig) MarshalYAML() (interface{}, error) {
+	cfg.RetentionPeriodModel = model.Duration(cfg.RetentionPeriod)
+	return cfg, nil
+}
+
+// Validate validates the config.
+func (cfg *TableManagerConfig) Validate() error {
+	// We're setting this field because when using flags, you set the RetentionPeriodModel but not RetentionPeriod.
+	// TODO(gouthamve): Its a hack, but I can't think of any other way :/
+	if cfg.RetentionPeriodModel > 0 {
+		cfg.RetentionPeriod = time.Duration(cfg.RetentionPeriodModel)
+	}
+
+	return nil
+}
+
 // ProvisionConfig holds config for provisioning capacity (on DynamoDB)
 type ProvisionConfig struct {
 	ProvisionedThroughputOnDemandMode bool  `yaml:"provisioned_throughput_on_demand_mode"`
@@ -123,7 +160,7 @@ type ProvisionConfig struct {
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.BoolVar(&cfg.RetentionDeletesEnabled, "table-manager.retention-deletes-enabled", false, "If true, enables retention deletes of DB tables")
-	f.DurationVar(&cfg.RetentionPeriod, "table-manager.retention-period", 0, "Tables older than this retention period are deleted. Note: This setting is destructive to data!(default: 0, which disables deletion)")
+	f.Var(&cfg.RetentionPeriodModel, "table-manager.retention-period", "Tables older than this retention period are deleted. Note: This setting is destructive to data!(default: 0, which disables deletion)")
 	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 

From 1e7749d9328431e19867ac2d915132bc60ec0775 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Fri, 13 Mar 2020 04:30:58 -0700
Subject: [PATCH 473/660] added FIFO cache metrics for current number of
 entries and memory usage (#2270)

* added FIFO cache metrics for current number of entries and memory usage
fixed bug in updating last element of FIFO
updated unit tests

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed "make mod-check"

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* Revert "fixed "make mod-check""

This reverts commit fe00ab880b7d7dfacfb9a7af1f38ebd3fe74e1ba.

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/fifo_cache.go      | 72 +++++++++++++++++++++++++++++++++++++++-
 cache/fifo_cache_test.go | 54 ++++++++++++++++++++++++++----
 2 files changed, 118 insertions(+), 8 deletions(-)

diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 0b8a6b30407cc..9eb3cd87ec876 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -5,6 +5,7 @@ import (
 	"flag"
 	"sync"
 	"time"
+	"unsafe"
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
@@ -32,6 +33,13 @@ var (
 		Help:      "The total number of evicted entries",
 	}, []string{"cache"})
 
+	cacheEntriesCurrent = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "entries",
+		Help:      "The total number of entries",
+	}, []string{"cache"})
+
 	cacheTotalGets = promauto.NewCounterVec(prometheus.CounterOpts{
 		Namespace: "querier",
 		Subsystem: "cache",
@@ -52,6 +60,13 @@ var (
 		Name:      "stale_gets_total",
 		Help:      "The total number of Get calls that had an entry which expired",
 	}, []string{"cache"})
+
+	cacheMemoryBytes = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "querier",
+		Subsystem: "cache",
+		Name:      "memory_bytes",
+		Help:      "The current cache size in bytes",
+	}, []string{"cache"})
 )
 
 // FifoCacheConfig holds config for the FifoCache.
@@ -81,9 +96,11 @@ type FifoCache struct {
 	entriesAdded    prometheus.Counter
 	entriesAddedNew prometheus.Counter
 	entriesEvicted  prometheus.Counter
+	entriesCurrent  prometheus.Gauge
 	totalGets       prometheus.Counter
 	totalMisses     prometheus.Counter
 	staleGets       prometheus.Counter
+	memoryBytes     prometheus.Gauge
 }
 
 type cacheEntry struct {
@@ -96,7 +113,7 @@ type cacheEntry struct {
 // NewFifoCache returns a new initialised FifoCache of size.
 // TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
 func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
-	return &FifoCache{
+	cache := &FifoCache{
 		size:     cfg.Size,
 		validity: cfg.Validity,
 		entries:  make([]cacheEntry, 0, cfg.Size),
@@ -106,10 +123,15 @@ func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
 		entriesAddedNew: cacheEntriesAddedNew.WithLabelValues(name),
 		entriesEvicted:  cacheEntriesEvicted.WithLabelValues(name),
+		entriesCurrent:  cacheEntriesCurrent.WithLabelValues(name),
 		totalGets:       cacheTotalGets.WithLabelValues(name),
 		totalMisses:     cacheTotalMisses.WithLabelValues(name),
 		staleGets:       cacheStaleGets.WithLabelValues(name),
+		memoryBytes:     cacheMemoryBytes.WithLabelValues(name),
 	}
+	// set initial memory allocation
+	cache.memoryBytes.Set(float64(int(unsafe.Sizeof(cacheEntry{})) * cache.size))
+	return cache
 }
 
 // Fetch implements Cache.
@@ -162,6 +184,7 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 	index, ok := c.index[key]
 	if ok {
 		entry := c.entries[index]
+		deltaSize := sizeOf(value) - sizeOf(entry.value)
 
 		entry.updated = time.Now()
 		entry.value = value
@@ -170,6 +193,11 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 		c.entries[entry.prev].next = entry.next
 		c.entries[entry.next].prev = entry.prev
 
+		// Corner case: updating last element
+		if c.last == index {
+			c.last = entry.prev
+		}
+
 		// Insert it at the beginning
 		entry.next = c.first
 		entry.prev = c.last
@@ -178,6 +206,7 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 		c.first = index
 
 		c.entries[index] = entry
+		c.memoryBytes.Add(float64(deltaSize))
 		return
 	}
 	c.entriesAddedNew.Inc()
@@ -187,6 +216,7 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 		c.entriesEvicted.Inc()
 		index = c.last
 		entry := c.entries[index]
+		deltaSize := sizeOf(key) + sizeOf(value) - sizeOf(entry.key) - sizeOf(entry.value)
 
 		c.last = entry.prev
 		c.first = index
@@ -197,6 +227,7 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 		entry.value = value
 		entry.key = key
 		c.entries[index] = entry
+		c.memoryBytes.Add(float64(deltaSize))
 		return
 	}
 
@@ -213,6 +244,9 @@ func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
 	c.entries[c.last].next = index
 	c.first = index
 	c.index[key] = index
+
+	c.memoryBytes.Add(float64(sizeOf(key) + sizeOf(value)))
+	c.entriesCurrent.Inc()
 }
 
 // Get returns the stored value against the key and when the key was last updated.
@@ -240,3 +274,39 @@ func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
 	c.totalMisses.Inc()
 	return nil, false
 }
+
+func sizeOf(i interface{}) int {
+	switch v := i.(type) {
+	case string:
+		return len(v)
+	case []int8:
+		return len(v)
+	case []uint8:
+		return len(v)
+	case []int32:
+		return len(v) * 4
+	case []uint32:
+		return len(v) * 4
+	case []float32:
+		return len(v) * 4
+	case []int64:
+		return len(v) * 8
+	case []uint64:
+		return len(v) * 8
+	case []float64:
+		return len(v) * 8
+	// next 2 cases are machine dependent
+	case []int:
+		if l := len(v); l > 0 {
+			return int(unsafe.Sizeof(v[0])) * l
+		}
+		return 0
+	case []uint:
+		if l := len(v); l > 0 {
+			return int(unsafe.Sizeof(v[0])) * l
+		}
+		return 0
+	default:
+		return int(unsafe.Sizeof(i))
+	}
+}
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index 7d533ad42aaa8..718df84906627 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -5,7 +5,10 @@ import (
 	"strconv"
 	"testing"
 	"time"
+	"unsafe"
 
+	"github.com/prometheus/client_golang/prometheus/testutil"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
@@ -13,7 +16,7 @@ const size = 10
 const overwrite = 5
 
 func TestFifoCache(t *testing.T) {
-	c := NewFifoCache("test", FifoCacheConfig{Size: size, Validity: 1 * time.Minute})
+	c := NewFifoCache("test1", FifoCacheConfig{Size: size, Validity: 1 * time.Minute})
 	ctx := context.Background()
 
 	// Check put / get works
@@ -72,18 +75,55 @@ func TestFifoCache(t *testing.T) {
 	}
 }
 
-func TestFifoCacheExpiry(t *testing.T) {
-	c := NewFifoCache("test", FifoCacheConfig{Size: size, Validity: 5 * time.Millisecond})
+func TestFifoCacheEvictionExpiry(t *testing.T) {
+	c := NewFifoCache("test2", FifoCacheConfig{Size: 3, Validity: 5 * time.Millisecond})
 	ctx := context.Background()
+	memorySz := int(unsafe.Sizeof(cacheEntry{})) * 3
 
-	c.Put(ctx, []string{"0"}, []interface{}{0})
+	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
 
-	value, ok := c.Get(ctx, "0")
+	key1, key2, key3, key4 := "key1", "key23", "key345", "key45676789"
+	data1, data2, data3 := []float64{1.0, 2.0, 3.0}, "testdata", []byte{1, 2, 3, 4, 5, 6, 7, 8}
+	memorySz += len(key1) + len(key2) + len(key3) + len(data1)*8 + len(data2) + len(data3)
+
+	c.Put(ctx,
+		[]string{key1, key2, key4, key3, key2, key1},
+		[]interface{}{[]int32{1, 2, 3, 4}, "dummy", []int{5, 4, 3, 2, 1}, data3, data2, data1})
+
+	value, ok := c.Get(ctx, key1)
 	require.True(t, ok)
-	require.Equal(t, 0, value.(int))
+	require.Equal(t, data1, value.([]float64))
+
+	_, ok = c.Get(ctx, key4)
+	require.False(t, ok)
+
+	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
+	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
+	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
+	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(2))
+	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(1))
+	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
 
 	// Expire the entry.
 	time.Sleep(5 * time.Millisecond)
-	_, ok = c.Get(ctx, strconv.Itoa(0))
+	_, ok = c.Get(ctx, key1)
 	require.False(t, ok)
+
+	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
+	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
+	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
+	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(3))
+	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(2))
+	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(1))
+	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
 }

From bca670f566adb3261edbd6af54fc54e93b4c3da6 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 13 Mar 2020 22:44:36 +0530
Subject: [PATCH 474/660] query time filtering of chunks for pending delete
 requests (#2214)

* query time filtering of chunks for pending delete requests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

moved code for query time filtering from stores to querier
moved DeleteStore and TombstonesLoader to purger package

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* simplified DeletedSeriesIterator

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* checking whether seeked point is deleted before calling Next in DeletedSeriesIterator.Seek

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add tests for DeletedSeriesIterator

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changed tombstonesReloadDuration to 5 mins

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 .../delete_requests_store.go                  |  24 +-
 purger/purger.go                              |  20 +-
 purger/purger_test.go                         |  22 +-
 purger/request_handler.go                     |  13 +-
 purger/tombstones.go                          | 293 ++++++++++++++++++
 purger/tombstones_test.go                     | 135 ++++++++
 storage/factory.go                            |   3 +-
 testutils/testutils.go                        |  16 -
 8 files changed, 478 insertions(+), 48 deletions(-)
 rename delete_requests_store.go => purger/delete_requests_store.go (90%)
 create mode 100644 purger/tombstones.go
 create mode 100644 purger/tombstones_test.go

diff --git a/delete_requests_store.go b/purger/delete_requests_store.go
similarity index 90%
rename from delete_requests_store.go
rename to purger/delete_requests_store.go
index 963f1d29d088d..029fa966c4f5d 100644
--- a/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -1,4 +1,4 @@
-package chunk
+package purger
 
 import (
 	"context"
@@ -12,6 +12,8 @@ import (
 	"strings"
 	"time"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 )
@@ -48,7 +50,7 @@ type DeleteRequest struct {
 // DeleteStore provides all the methods required to manage lifecycle of delete request and things related to it
 type DeleteStore struct {
 	cfg         DeleteStoreConfig
-	indexClient IndexClient
+	indexClient chunk.IndexClient
 }
 
 // DeleteStoreConfig holds configuration for delete store
@@ -64,7 +66,7 @@ func (cfg *DeleteStoreConfig) RegisterFlags(f *flag.FlagSet) {
 }
 
 // NewDeleteStore creates a store for managing delete requests
-func NewDeleteStore(cfg DeleteStoreConfig, indexClient IndexClient) (*DeleteStore, error) {
+func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*DeleteStore, error) {
 	ds := DeleteStore{
 		cfg:         cfg,
 		indexClient: indexClient,
@@ -108,19 +110,19 @@ func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, star
 
 // GetDeleteRequestsByStatus returns all delete requests for given status
 func (ds *DeleteStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []IndexQuery{{TableName: ds.cfg.RequestsTableName, ValueEqual: []byte(status)}})
+	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{{TableName: ds.cfg.RequestsTableName, ValueEqual: []byte(status)}})
 }
 
 // GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status
 func (ds *DeleteStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []IndexQuery{
+	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
 		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID), ValueEqual: []byte(status)},
 	})
 }
 
 // GetAllDeleteRequestsForUser returns all delete requests for a user
 func (ds *DeleteStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []IndexQuery{
+	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
 		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID)},
 	})
 }
@@ -139,7 +141,7 @@ func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID strin
 func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) {
 	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
 
-	deleteRequests, err := ds.queryDeleteRequests(ctx, []IndexQuery{
+	deleteRequests, err := ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
 		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userIDAndRequestID)},
 	})
 
@@ -169,9 +171,9 @@ func (ds *DeleteStore) GetPendingDeleteRequestsForUser(ctx context.Context, user
 	return pendingDeleteRequests, nil
 }
 
-func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []IndexQuery) ([]DeleteRequest, error) {
+func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []chunk.IndexQuery) ([]DeleteRequest, error) {
 	deleteRequests := []DeleteRequest{}
-	err := ds.indexClient.QueryPages(ctx, deleteQuery, func(query IndexQuery, batch ReadBatch) (shouldContinue bool) {
+	err := ds.indexClient.QueryPages(ctx, deleteQuery, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
 		itr := batch.Iterator()
 		for itr.Next() {
 			userID, requestID := splitUserIDAndRequestID(string(itr.RangeValue()))
@@ -189,10 +191,10 @@ func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []In
 	}
 
 	for i, deleteRequest := range deleteRequests {
-		deleteRequestQuery := []IndexQuery{{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s", deleteRequest.UserID, deleteRequest.RequestID)}}
+		deleteRequestQuery := []chunk.IndexQuery{{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s", deleteRequest.UserID, deleteRequest.RequestID)}}
 
 		var parseError error
-		err := ds.indexClient.QueryPages(ctx, deleteRequestQuery, func(query IndexQuery, batch ReadBatch) (shouldContinue bool) {
+		err := ds.indexClient.QueryPages(ctx, deleteRequestQuery, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
 			itr := batch.Iterator()
 			itr.Next()
 
diff --git a/purger/purger.go b/purger/purger.go
index 39f53238ec880..b969181f966c5 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -25,7 +25,7 @@ import (
 const millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
 
 type deleteRequestWithLogger struct {
-	chunk.DeleteRequest
+	DeleteRequest
 	logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this
 }
 
@@ -55,7 +55,7 @@ type DataPurger struct {
 	services.Service
 
 	cfg          Config
-	deleteStore  *chunk.DeleteStore
+	deleteStore  *DeleteStore
 	chunkStore   chunk.Store
 	objectClient chunk.ObjectClient
 
@@ -74,7 +74,7 @@ type DataPurger struct {
 }
 
 // NewDataPurger creates a new DataPurger
-func NewDataPurger(cfg Config, deleteStore *chunk.DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*DataPurger, error) {
+func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*DataPurger, error) {
 	dataPurger := DataPurger{
 		cfg:                 cfg,
 		deleteStore:         deleteStore,
@@ -133,7 +133,7 @@ func (dp *DataPurger) workerJobCleanup(job workerJob) {
 	if dp.pendingPlansCount[job.deleteRequestID] == 0 {
 		level.Info(job.logger).Log("msg", "finished execution of all plans, cleaning up and updating status of request")
 
-		err := dp.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, chunk.StatusProcessed)
+		err := dp.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, StatusProcessed)
 		if err != nil {
 			level.Error(job.logger).Log("msg", "error updating delete request status to process", "err", err)
 		}
@@ -252,7 +252,7 @@ func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger l
 
 // we need to load all in process delete requests on startup to finish them first
 func (dp *DataPurger) loadInprocessDeleteRequests() error {
-	requestsWithBuildingPlanStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusBuildingPlan)
+	requestsWithBuildingPlanStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan)
 	if err != nil {
 		return err
 	}
@@ -272,7 +272,7 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 		dp.executePlansChan <- req
 	}
 
-	requestsWithDeletingStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusDeleting)
+	requestsWithDeletingStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting)
 	if err != nil {
 		return err
 	}
@@ -291,7 +291,7 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 // pullDeleteRequestsToPlanDeletes pulls delete requests which do not have their delete plans built yet and sends them for building delete plans
 // after pulling delete requests for building plans, it updates its status to StatusBuildingPlan status to avoid picking this up again next time
 func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
-	deleteRequests, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), chunk.StatusReceived)
+	deleteRequests, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusReceived)
 	if err != nil {
 		return err
 	}
@@ -312,7 +312,7 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 			continue
 		}
 
-		err = dp.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, chunk.StatusBuildingPlan)
+		err = dp.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, StatusBuildingPlan)
 		if err != nil {
 			return err
 		}
@@ -392,7 +392,7 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 		return err
 	}
 
-	err = dp.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, chunk.StatusDeleting)
+	err = dp.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, StatusDeleting)
 	if err != nil {
 		return err
 	}
@@ -535,7 +535,7 @@ func buildObjectKeyForPlan(userID, requestID string, planNo int) string {
 	return fmt.Sprintf("%s:%s/%d", userID, requestID, planNo)
 }
 
-func makeDeleteRequestWithLogger(deleteRequest chunk.DeleteRequest, l log.Logger) deleteRequestWithLogger {
+func makeDeleteRequestWithLogger(deleteRequest DeleteRequest, l log.Logger) deleteRequestWithLogger {
 	logger := log.With(l, "user_id", deleteRequest.UserID, "request_id", deleteRequest.RequestID)
 	return deleteRequestWithLogger{deleteRequest, logger}
 }
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 3f86d9e9fac22..c91c0f020b2a0 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -24,8 +24,24 @@ const (
 	modelTimeHour = model.Time(time.Hour / time.Millisecond)
 )
 
-func setupStoresAndPurger(t *testing.T) (*chunk.DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger) {
-	deleteStore, err := testutils.SetupTestDeleteStore()
+func setupTestDeleteStore() (*DeleteStore, error) {
+	var deleteStoreConfig DeleteStoreConfig
+	flagext.DefaultValues(&deleteStoreConfig)
+
+	mockStorage := chunk.NewMockStorage()
+
+	err := mockStorage.CreateTable(context.Background(), chunk.TableDesc{
+		Name: deleteStoreConfig.RequestsTableName,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	return NewDeleteStore(deleteStoreConfig, mockStorage)
+}
+
+func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger) {
+	deleteStore, err := setupTestDeleteStore()
 	require.NoError(t, err)
 
 	chunkStore, err := testutils.SetupTestChunkStore()
@@ -345,7 +361,7 @@ func TestDataPurger_Restarts(t *testing.T) {
 
 	deleteRequests, err = deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
 	require.NoError(t, err)
-	require.Equal(t, chunk.StatusProcessed, deleteRequests[0].Status)
+	require.Equal(t, StatusProcessed, deleteRequests[0].Status)
 }
 
 func getNonDeletedIntervals(originalInterval, deletedInterval model.Interval) []model.Interval {
diff --git a/purger/request_handler.go b/purger/request_handler.go
index c7435d02e9d5a..30a4035e3c2e7 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -5,21 +5,20 @@ import (
 	"fmt"
 	"net/http"
 
+	"github.com/cortexproject/cortex/pkg/util"
+
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/weaveworks/common/user"
-
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 )
 
 // DeleteRequestHandler provides handlers for delete requests
 type DeleteRequestHandler struct {
-	deleteStore *chunk.DeleteStore
+	deleteStore *DeleteStore
 }
 
 // NewDeleteRequestHandler creates a DeleteRequestHandler
-func NewDeleteRequestHandler(deleteStore *chunk.DeleteStore) (*DeleteRequestHandler, error) {
+func NewDeleteRequestHandler(deleteStore *DeleteStore) (*DeleteRequestHandler, error) {
 	deleteMgr := DeleteRequestHandler{
 		deleteStore: deleteStore,
 	}
@@ -54,7 +53,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	startParam := params.Get("start")
 	startTime := int64(0)
 	if startParam != "" {
-		startTime, err = queryrange.ParseTime(startParam)
+		startTime, err = util.ParseTime(startParam)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusBadRequest)
 			return
@@ -65,7 +64,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	endTime := int64(model.Now())
 
 	if endParam != "" {
-		endTime, err = queryrange.ParseTime(endParam)
+		endTime, err = util.ParseTime(endParam)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusBadRequest)
 			return
diff --git a/purger/tombstones.go b/purger/tombstones.go
new file mode 100644
index 0000000000000..6741d0d40bfaf
--- /dev/null
+++ b/purger/tombstones.go
@@ -0,0 +1,293 @@
+package purger
+
+import (
+	"context"
+	"sort"
+	"sync"
+	"time"
+
+	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+const tombstonesReloadDuration = 5 * time.Minute
+
+// TombstonesSet holds all the pending delete requests for a user
+type TombstonesSet struct {
+	tombstones                               []DeleteRequest
+	oldestTombstoneStart, newestTombstoneEnd model.Time // Used as optimization to find whether we want to iterate over tombstones or not
+}
+
+// TombstonesLoader loads delete requests and gen numbers from store and keeps checking for updates.
+// It keeps checking for changes in gen numbers, which also means changes in delete requests and reloads specific users delete requests.
+type TombstonesLoader struct {
+	tombstones    map[string]*TombstonesSet
+	tombstonesMtx sync.RWMutex
+
+	deleteStore *DeleteStore
+	quit        chan struct{}
+}
+
+// NewTombstonesLoader creates a TombstonesLoader
+func NewTombstonesLoader(deleteStore *DeleteStore) *TombstonesLoader {
+	tl := TombstonesLoader{
+		tombstones:  map[string]*TombstonesSet{},
+		deleteStore: deleteStore,
+	}
+	go tl.loop()
+
+	return &tl
+}
+
+// Stop stops TombstonesLoader
+func (tl *TombstonesLoader) Stop() {
+	close(tl.quit)
+}
+
+func (tl *TombstonesLoader) loop() {
+	tombstonesReloadTimer := time.NewTicker(tombstonesReloadDuration)
+	for {
+		select {
+		case <-tombstonesReloadTimer.C:
+			err := tl.reloadTombstones()
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error reloading tombstones", "err", err)
+			}
+		case <-tl.quit:
+			return
+		}
+	}
+}
+
+func (tl *TombstonesLoader) reloadTombstones() error {
+	// check for updates in loaded gen numbers
+	tl.tombstonesMtx.Lock()
+
+	userIDs := make([]string, 0, len(tl.tombstones))
+	for userID := range tl.tombstones {
+		userIDs = append(userIDs, userID)
+	}
+
+	tl.tombstonesMtx.Unlock()
+
+	// for all the updated gen numbers, reload delete requests
+	for _, userID := range userIDs {
+		err := tl.loadPendingTombstones(userID)
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// GetPendingTombstones returns all pending tombstones
+func (tl *TombstonesLoader) GetPendingTombstones(userID string) (*TombstonesSet, error) {
+	tl.tombstonesMtx.RLock()
+
+	tombstoneSet, isOK := tl.tombstones[userID]
+	if isOK {
+		tl.tombstonesMtx.RUnlock()
+		return tombstoneSet, nil
+	}
+
+	tl.tombstonesMtx.RUnlock()
+	err := tl.loadPendingTombstones(userID)
+	if err != nil {
+		return nil, err
+	}
+
+	tl.tombstonesMtx.RLock()
+	defer tl.tombstonesMtx.RUnlock()
+
+	return tl.tombstones[userID], nil
+}
+
+// GetPendingTombstones returns all pending tombstones
+func (tl *TombstonesLoader) GetPendingTombstonesForInterval(userID string, from, to model.Time) (*TombstonesSet, error) {
+	allTombstones, err := tl.GetPendingTombstones(userID)
+	if err != nil {
+		return nil, err
+	}
+
+	if !allTombstones.HasTombstonesForInterval(from, to) {
+		return &TombstonesSet{}, nil
+	}
+
+	filteredSet := TombstonesSet{oldestTombstoneStart: model.Now()}
+
+	for _, tombstone := range allTombstones.tombstones {
+		if !intervalsOverlap(model.Interval{Start: from, End: to}, model.Interval{Start: tombstone.StartTime, End: tombstone.EndTime}) {
+			continue
+		}
+
+		filteredSet.tombstones = append(filteredSet.tombstones, tombstone)
+
+		if tombstone.StartTime < filteredSet.oldestTombstoneStart {
+			filteredSet.oldestTombstoneStart = tombstone.StartTime
+		}
+
+		if tombstone.EndTime > filteredSet.newestTombstoneEnd {
+			filteredSet.newestTombstoneEnd = tombstone.EndTime
+		}
+	}
+
+	return &filteredSet, nil
+}
+
+func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
+	if tl.deleteStore == nil {
+		tl.tombstonesMtx.Lock()
+		defer tl.tombstonesMtx.Unlock()
+
+		tl.tombstones[userID] = &TombstonesSet{oldestTombstoneStart: 0, newestTombstoneEnd: 0}
+		return nil
+	}
+
+	pendingDeleteRequests, err := tl.deleteStore.GetPendingDeleteRequestsForUser(context.Background(), userID)
+	if err != nil {
+		return err
+	}
+
+	tombstoneSet := TombstonesSet{tombstones: pendingDeleteRequests, oldestTombstoneStart: model.Now()}
+	for i := range tombstoneSet.tombstones {
+		tombstoneSet.tombstones[i].Matchers = make([][]*labels.Matcher, len(tombstoneSet.tombstones[i].Selectors))
+
+		for j, selector := range tombstoneSet.tombstones[i].Selectors {
+			tombstoneSet.tombstones[i].Matchers[j], err = promql.ParseMetricSelector(selector)
+
+			if err != nil {
+				return err
+			}
+		}
+
+		if tombstoneSet.tombstones[i].StartTime < tombstoneSet.oldestTombstoneStart {
+			tombstoneSet.oldestTombstoneStart = tombstoneSet.tombstones[i].StartTime
+		}
+
+		if tombstoneSet.tombstones[i].EndTime > tombstoneSet.newestTombstoneEnd {
+			tombstoneSet.newestTombstoneEnd = tombstoneSet.tombstones[i].EndTime
+		}
+	}
+
+	tl.tombstonesMtx.Lock()
+	defer tl.tombstonesMtx.Unlock()
+	tl.tombstones[userID] = &tombstoneSet
+
+	return nil
+}
+
+// GetDeletedIntervals returns non-overlapping, sorted  deleted intervals.
+func (ts TombstonesSet) GetDeletedIntervals(lbls labels.Labels, from, to model.Time) []model.Interval {
+	if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd {
+		return nil
+	}
+
+	var deletedIntervals []model.Interval
+	requestedInterval := model.Interval{Start: from, End: to}
+
+	for i := range ts.tombstones {
+		overlaps, overlappingInterval := getOverlappingInterval(requestedInterval,
+			model.Interval{Start: ts.tombstones[i].StartTime, End: ts.tombstones[i].EndTime})
+
+		if !overlaps {
+			continue
+		}
+
+		matches := false
+		for _, matchers := range ts.tombstones[i].Matchers {
+			if labels.Selector(matchers).Matches(lbls) {
+				matches = true
+				break
+			}
+		}
+
+		if !matches {
+			continue
+		}
+
+		if overlappingInterval == requestedInterval {
+			// whole interval deleted
+			return []model.Interval{requestedInterval}
+		}
+
+		deletedIntervals = append(deletedIntervals, overlappingInterval)
+	}
+
+	if len(deletedIntervals) == 0 {
+		return nil
+	}
+
+	return mergeIntervals(deletedIntervals)
+}
+
+// Len returns number of tombstones that are there
+func (ts TombstonesSet) Len() int {
+	return len(ts.tombstones)
+}
+
+// HasTombstonesForInterval tells whether there are any tombstones which overlapping given interval
+func (ts TombstonesSet) HasTombstonesForInterval(from, to model.Time) bool {
+	if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd {
+		return false
+	}
+
+	return true
+}
+
+// sorts and merges overlapping intervals
+func mergeIntervals(intervals []model.Interval) []model.Interval {
+	if len(intervals) <= 1 {
+		return intervals
+	}
+
+	mergedIntervals := make([]model.Interval, 0, len(intervals))
+	sort.Slice(intervals, func(i, j int) bool {
+		return intervals[i].Start < intervals[j].Start
+	})
+
+	ongoingTrFrom, ongoingTrTo := intervals[0].Start, intervals[0].End
+	for i := 1; i < len(intervals); i++ {
+		// if there is no overlap add it to mergedIntervals
+		if intervals[i].Start > ongoingTrTo {
+			mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo})
+			ongoingTrFrom = intervals[i].Start
+			ongoingTrTo = intervals[i].End
+			continue
+		}
+
+		// there is an overlap but check whether existing time range is bigger than the current one
+		if intervals[i].End > ongoingTrTo {
+			ongoingTrTo = intervals[i].End
+		}
+	}
+
+	// add the last time range
+	mergedIntervals = append(mergedIntervals, model.Interval{Start: ongoingTrFrom, End: ongoingTrTo})
+
+	return mergedIntervals
+}
+
+func getOverlappingInterval(interval1, interval2 model.Interval) (bool, model.Interval) {
+	if interval2.Start > interval1.Start {
+		interval1.Start = interval2.Start
+	}
+
+	if interval2.End < interval1.End {
+		interval1.End = interval2.End
+	}
+
+	return interval1.Start < interval1.End, interval1
+}
+
+func intervalsOverlap(interval1, interval2 model.Interval) bool {
+	if interval1.Start > interval2.End || interval2.Start > interval1.End {
+		return false
+	}
+
+	return true
+}
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
new file mode 100644
index 0000000000000..f74b4784b00ac
--- /dev/null
+++ b/purger/tombstones_test.go
@@ -0,0 +1,135 @@
+package purger
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestTombstonesLoader(t *testing.T) {
+	deleteRequestSelectors := []string{"foo"}
+	metric, err := promql.ParseMetric(deleteRequestSelectors[0])
+	require.NoError(t, err)
+
+	for _, tc := range []struct {
+		name                   string
+		deleteRequestIntervals []model.Interval
+		queryForInterval       model.Interval
+		expectedIntervals      []model.Interval
+	}{
+		{
+			name:             "no delete requests",
+			queryForInterval: model.Interval{End: modelTimeDay},
+		},
+		{
+			name: "query out of range of delete requests",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+			},
+			queryForInterval: model.Interval{Start: modelTimeDay.Add(time.Hour), End: modelTimeDay * 2},
+		},
+		{
+			name: "no overlap but disjoint deleted intervals",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{Start: modelTimeDay.Add(time.Hour), End: modelTimeDay.Add(2 * time.Hour)},
+			},
+			queryForInterval: model.Interval{End: modelTimeDay.Add(2 * time.Hour)},
+			expectedIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{Start: modelTimeDay.Add(time.Hour), End: modelTimeDay.Add(2 * time.Hour)},
+			},
+		},
+		{
+			name: "no overlap but continuous deleted intervals",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{Start: modelTimeDay, End: modelTimeDay.Add(2 * time.Hour)},
+			},
+			queryForInterval: model.Interval{End: modelTimeDay.Add(2 * time.Hour)},
+			expectedIntervals: []model.Interval{
+				{End: modelTimeDay.Add(2 * time.Hour)},
+			},
+		},
+		{
+			name: "some overlap in deleted intervals",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{Start: modelTimeDay.Add(-time.Hour), End: modelTimeDay.Add(2 * time.Hour)},
+			},
+			queryForInterval: model.Interval{End: modelTimeDay.Add(2 * time.Hour)},
+			expectedIntervals: []model.Interval{
+				{End: modelTimeDay.Add(2 * time.Hour)},
+			},
+		},
+		{
+			name: "complete overlap in deleted intervals",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{End: modelTimeDay},
+			},
+			queryForInterval: model.Interval{End: modelTimeDay.Add(2 * time.Hour)},
+			expectedIntervals: []model.Interval{
+				{End: modelTimeDay},
+			},
+		},
+		{
+			name: "mix of overlaps in deleted intervals",
+			deleteRequestIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{End: modelTimeDay},
+				{Start: modelTimeDay.Add(time.Hour), End: modelTimeDay.Add(2 * time.Hour)},
+				{Start: modelTimeDay.Add(2 * time.Hour), End: modelTimeDay.Add(24 * time.Hour)},
+				{Start: modelTimeDay.Add(23 * time.Hour), End: modelTimeDay * 3},
+			},
+			queryForInterval: model.Interval{End: modelTimeDay * 10},
+			expectedIntervals: []model.Interval{
+				{End: modelTimeDay},
+				{Start: modelTimeDay.Add(time.Hour), End: modelTimeDay * 3},
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			deleteStore, err := setupTestDeleteStore()
+			require.NoError(t, err)
+
+			tombstonesLoader := NewTombstonesLoader(deleteStore)
+
+			// add delete requests
+			for _, interval := range tc.deleteRequestIntervals {
+				err := deleteStore.AddDeleteRequest(context.Background(), userID, interval.Start, interval.End, deleteRequestSelectors)
+				require.NoError(t, err)
+			}
+
+			// get all delete requests for user
+			tombstonesAnalyzer, err := tombstonesLoader.GetPendingTombstones(userID)
+			require.NoError(t, err)
+
+			// verify whether number of delete requests is same as what we added
+			require.Equal(t, len(tc.deleteRequestIntervals), tombstonesAnalyzer.Len())
+
+			// if we are expecting to get deleted intervals then HasTombstonesForInterval should return true else false
+			expectedHasTombstonesForInterval := true
+			if len(tc.expectedIntervals) == 0 {
+				expectedHasTombstonesForInterval = false
+			}
+
+			hasTombstonesForInterval := tombstonesAnalyzer.HasTombstonesForInterval(tc.queryForInterval.Start, tc.queryForInterval.End)
+			require.Equal(t, expectedHasTombstonesForInterval, hasTombstonesForInterval)
+
+			// get deleted intervals
+			intervals := tombstonesAnalyzer.GetDeletedIntervals(metric, tc.queryForInterval.Start, tc.queryForInterval.End)
+			require.Equal(t, len(tc.expectedIntervals), len(intervals))
+
+			// verify whether we got expected intervals back
+			for i, interval := range intervals {
+				require.Equal(t, tc.expectedIntervals[i].Start, interval.Start)
+				require.Equal(t, tc.expectedIntervals[i].End, interval.End)
+			}
+		})
+	}
+}
diff --git a/storage/factory.go b/storage/factory.go
index b358b2f38f4d6..e46fffee3d661 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -18,6 +18,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
+	"github.com/cortexproject/cortex/pkg/chunk/purger"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -62,7 +63,7 @@ type Config struct {
 
 	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config,omitempty"`
 
-	DeleteStoreConfig chunk.DeleteStoreConfig `yaml:"delete_store,omitempty"`
+	DeleteStoreConfig purger.DeleteStoreConfig `yaml:"delete_store,omitempty"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
diff --git a/testutils/testutils.go b/testutils/testutils.go
index a046cced72341..92dd0dcaf4594 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -143,22 +143,6 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	return store, nil
 }
 
-func SetupTestDeleteStore() (*chunk.DeleteStore, error) {
-	var deleteStoreConfig chunk.DeleteStoreConfig
-	flagext.DefaultValues(&deleteStoreConfig)
-
-	mockStorage := chunk.NewMockStorage()
-
-	err := mockStorage.CreateTable(context.Background(), chunk.TableDesc{
-		Name: deleteStoreConfig.RequestsTableName,
-	})
-	if err != nil {
-		return nil, err
-	}
-
-	return chunk.NewDeleteStore(deleteStoreConfig, mockStorage)
-}
-
 func SetupTestObjectStore() (chunk.ObjectClient, error) {
 	return chunk.NewMockStorage(), nil
 }

From 1c13de1bfc099779c1c3080f1577e2376ecd8f33 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 16 Mar 2020 18:43:20 +0000
Subject: [PATCH 475/660] Allow more connection reuse for DynamoDB and S3 calls
 (#2268)

We will connect many times in parallel to the same DynamoDB server,
and with default settings Go will close and re-open connections; see
https://github.com/golang/go/issues/13801

Raise MaxIdleConnsPerHost to avoid this.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 20 ++++++++++++++++++++
 aws/s3_storage_client.go       |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 88d71bd84e328..fa2b6679fdc96 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"flag"
 	"fmt"
+	"net"
+	"net/http"
 	"net/url"
 	"strings"
 	"time"
@@ -865,5 +867,23 @@ func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 		return nil, err
 	}
 	config = config.WithMaxRetries(0) // We do our own retries, so we can monitor them
+	config = config.WithHTTPClient(&http.Client{Transport: defaultTransport})
 	return session.NewSession(config)
 }
+
+// Copy-pasted http.DefaultTransport
+var defaultTransport http.RoundTripper = &http.Transport{
+	Proxy: http.ProxyFromEnvironment,
+	DialContext: (&net.Dialer{
+		Timeout:   30 * time.Second,
+		KeepAlive: 30 * time.Second,
+	}).DialContext,
+	ForceAttemptHTTP2: true,
+	MaxIdleConns:      100,
+	// We will connect many times in parallel to the same DynamoDB server,
+	// see https://github.com/golang/go/issues/13801
+	MaxIdleConnsPerHost:   100,
+	IdleConnTimeout:       90 * time.Second,
+	TLSHandshakeTimeout:   10 * time.Second,
+	ExpectContinueTimeout: 1 * time.Second,
+}
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 69efd2dd43c69..7083d77321fd5 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"hash/fnv"
 	"io"
+	"net/http"
 	"strings"
 
 	"github.com/aws/aws-sdk-go/aws"
@@ -72,6 +73,7 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	s3Config = s3Config.WithS3ForcePathStyle(cfg.S3ForcePathStyle) // support for Path Style S3 url if has the flag
 
 	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
+	s3Config = s3Config.WithHTTPClient(&http.Client{Transport: defaultTransport})
 	sess, err := session.NewSession(s3Config)
 	if err != nil {
 		return nil, err

From f880416ea0e32affd209b9aca27af78d815c41a9 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 17 Mar 2020 15:53:08 +0000
Subject: [PATCH 476/660] Attach memcached span logs to the correct span
 (#2282)

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 cache/memcached.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index b56a9206c1bdc..09f4f76b0f000 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -149,8 +149,8 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	err := instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(_ context.Context) error {
-		sp := opentracing.SpanFromContext(ctx)
+	err := instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(innerCtx context.Context) error {
+		sp := opentracing.SpanFromContext(innerCtx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
 		var err error

From d2512c75ca018df4f9f1a4cf3150883f29271fd8 Mon Sep 17 00:00:00 2001
From: Jay Batra <jaybatra73@gmail.com>
Date: Thu, 19 Mar 2020 13:19:39 +0530
Subject: [PATCH 477/660] Removes all the occurences of omitempty (#2209)

* Removes all the occurences of omitempty(cortexproject#2209)

This PR removes all the occurences of omitempty
from structs.

Signed-off by Jay Batra <jaybatra73@gmail.com>

Signed-off-by: Jay Batra <jaybatra73@gmail.com>

* Update CHANGELOG.md

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/background.go         |  4 ++--
 cache/cache.go              | 16 ++++++++--------
 cache/fifo_cache.go         |  4 ++--
 cache/memcached.go          |  6 +++---
 cache/memcached_client.go   | 12 ++++++------
 cache/redis_cache.go        | 10 +++++-----
 cassandra/storage_client.go | 30 +++++++++++++++---------------
 chunk_store.go              |  6 +++---
 schema_config.go            | 16 ++++++++--------
 storage/factory.go          |  4 ++--
 10 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 861d7e8160430..5b101e4a4cced 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -26,8 +26,8 @@ var (
 
 // BackgroundConfig is config for a Background Cache.
 type BackgroundConfig struct {
-	WriteBackGoroutines int `yaml:"writeback_goroutines,omitempty"`
-	WriteBackBuffer     int `yaml:"writeback_buffer,omitempty"`
+	WriteBackGoroutines int `yaml:"writeback_goroutines"`
+	WriteBackBuffer     int `yaml:"writeback_buffer"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/cache.go b/cache/cache.go
index 5f2d0a4642fe2..76f0eac4c9c1c 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -22,18 +22,18 @@ type Cache interface {
 
 // Config for building Caches.
 type Config struct {
-	EnableFifoCache bool `yaml:"enable_fifocache,omitempty"`
+	EnableFifoCache bool `yaml:"enable_fifocache"`
 
-	DefaultValidity time.Duration `yaml:"default_validity,omitempty"`
+	DefaultValidity time.Duration `yaml:"default_validity"`
 
-	Background     BackgroundConfig      `yaml:"background,omitempty"`
-	Memcache       MemcachedConfig       `yaml:"memcached,omitempty"`
-	MemcacheClient MemcachedClientConfig `yaml:"memcached_client,omitempty"`
-	Redis          RedisConfig           `yaml:"redis,omitempty"`
-	Fifocache      FifoCacheConfig       `yaml:"fifocache,omitempty"`
+	Background     BackgroundConfig      `yaml:"background"`
+	Memcache       MemcachedConfig       `yaml:"memcached"`
+	MemcacheClient MemcachedClientConfig `yaml:"memcached_client"`
+	Redis          RedisConfig           `yaml:"redis"`
+	Fifocache      FifoCacheConfig       `yaml:"fifocache"`
 
 	// This is to name the cache metrics properly.
-	Prefix string `yaml:"prefix,omitempty" doc:"hidden"`
+	Prefix string `yaml:"prefix" doc:"hidden"`
 
 	// For tests to inject specific implementations.
 	Cache Cache `yaml:"-"`
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 9eb3cd87ec876..ef2c6a3a9a2dc 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -71,8 +71,8 @@ var (
 
 // FifoCacheConfig holds config for the FifoCache.
 type FifoCacheConfig struct {
-	Size     int           `yaml:"size,omitempty"`
-	Validity time.Duration `yaml:"validity,omitempty"`
+	Size     int           `yaml:"size"`
+	Validity time.Duration `yaml:"validity"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/memcached.go b/cache/memcached.go
index 09f4f76b0f000..3319110e97132 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -41,10 +41,10 @@ func (o observableVecCollector) After(method, statusCode string, start time.Time
 
 // MemcachedConfig is config to make a Memcached
 type MemcachedConfig struct {
-	Expiration time.Duration `yaml:"expiration,omitempty"`
+	Expiration time.Duration `yaml:"expiration"`
 
-	BatchSize   int `yaml:"batch_size,omitempty"`
-	Parallelism int `yaml:"parallelism,omitempty"`
+	BatchSize   int `yaml:"batch_size"`
+	Parallelism int `yaml:"parallelism"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 2712a584bc62e..8501e3d7cff3c 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -39,12 +39,12 @@ type memcachedClient struct {
 
 // MemcachedClientConfig defines how a MemcachedClient should be constructed.
 type MemcachedClientConfig struct {
-	Host           string        `yaml:"host,omitempty"`
-	Service        string        `yaml:"service,omitempty"`
-	Timeout        time.Duration `yaml:"timeout,omitempty"`
-	MaxIdleConns   int           `yaml:"max_idle_conns,omitempty"`
-	UpdateInterval time.Duration `yaml:"update_interval,omitempty"`
-	ConsistentHash bool          `yaml:"consistent_hash,omitempty"`
+	Host           string        `yaml:"host"`
+	Service        string        `yaml:"service"`
+	Timeout        time.Duration `yaml:"timeout"`
+	MaxIdleConns   int           `yaml:"max_idle_conns"`
+	UpdateInterval time.Duration `yaml:"update_interval"`
+	ConsistentHash bool          `yaml:"consistent_hash"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 7ab48d2c67d76..e8a9fc9134ffb 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -22,11 +22,11 @@ type RedisCache struct {
 
 // RedisConfig defines how a RedisCache should be constructed.
 type RedisConfig struct {
-	Endpoint       string         `yaml:"endpoint,omitempty"`
-	Timeout        time.Duration  `yaml:"timeout,omitempty"`
-	Expiration     time.Duration  `yaml:"expiration,omitempty"`
-	MaxIdleConns   int            `yaml:"max_idle_conns,omitempty"`
-	MaxActiveConns int            `yaml:"max_active_conns,omitempty"`
+	Endpoint       string         `yaml:"endpoint"`
+	Timeout        time.Duration  `yaml:"timeout"`
+	Expiration     time.Duration  `yaml:"expiration"`
+	MaxIdleConns   int            `yaml:"max_idle_conns"`
+	MaxActiveConns int            `yaml:"max_active_conns"`
 	Password       flagext.Secret `yaml:"password"`
 	EnableTLS      bool           `yaml:"enable_tls"`
 }
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index f3aac2f86fd8e..3c2e6f4071267 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -19,22 +19,22 @@ import (
 
 // Config for a StorageClient
 type Config struct {
-	Addresses                string              `yaml:"addresses,omitempty"`
-	Port                     int                 `yaml:"port,omitempty"`
-	Keyspace                 string              `yaml:"keyspace,omitempty"`
-	Consistency              string              `yaml:"consistency,omitempty"`
-	ReplicationFactor        int                 `yaml:"replication_factor,omitempty"`
-	DisableInitialHostLookup bool                `yaml:"disable_initial_host_lookup,omitempty"`
-	SSL                      bool                `yaml:"SSL,omitempty"`
-	HostVerification         bool                `yaml:"host_verification,omitempty"`
-	CAPath                   string              `yaml:"CA_path,omitempty"`
-	Auth                     bool                `yaml:"auth,omitempty"`
-	Username                 string              `yaml:"username,omitempty"`
-	Password                 flagext.Secret      `yaml:"password,omitempty"`
-	PasswordFile             string              `yaml:"password_file,omitempty"`
+	Addresses                string              `yaml:"addresses"`
+	Port                     int                 `yaml:"port"`
+	Keyspace                 string              `yaml:"keyspace"`
+	Consistency              string              `yaml:"consistency"`
+	ReplicationFactor        int                 `yaml:"replication_factor"`
+	DisableInitialHostLookup bool                `yaml:"disable_initial_host_lookup"`
+	SSL                      bool                `yaml:"SSL"`
+	HostVerification         bool                `yaml:"host_verification"`
+	CAPath                   string              `yaml:"CA_path"`
+	Auth                     bool                `yaml:"auth"`
+	Username                 string              `yaml:"username"`
+	Password                 flagext.Secret      `yaml:"password"`
+	PasswordFile             string              `yaml:"password_file"`
 	CustomAuthenticators     flagext.StringSlice `yaml:"custom_authenticators"`
-	Timeout                  time.Duration       `yaml:"timeout,omitempty"`
-	ConnectTimeout           time.Duration       `yaml:"connect_timeout,omitempty"`
+	Timeout                  time.Duration       `yaml:"timeout"`
+	ConnectTimeout           time.Duration       `yaml:"connect_timeout"`
 	Retries                  int                 `yaml:"max_retries"`
 	MaxBackoff               time.Duration       `yaml:"retry_max_backoff"`
 	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
diff --git a/chunk_store.go b/chunk_store.go
index 2f9b95e78aecc..9080d0a86afbe 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -47,10 +47,10 @@ var (
 
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
-	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config,omitempty"`
-	WriteDedupeCacheConfig cache.Config `yaml:"write_dedupe_cache_config,omitempty"`
+	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config"`
+	WriteDedupeCacheConfig cache.Config `yaml:"write_dedupe_cache_config"`
 
-	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than,omitempty"`
+	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than"`
 
 	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
 	MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
diff --git a/schema_config.go b/schema_config.go
index d98f361cb99a3..bce069604daf8 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -36,7 +36,7 @@ type PeriodConfig struct {
 	ObjectType  string              `yaml:"object_store"` // type of object client to use; if omitted, defaults to store.
 	Schema      string              `yaml:"schema"`
 	IndexTables PeriodicTableConfig `yaml:"index"`
-	ChunkTables PeriodicTableConfig `yaml:"chunks,omitempty"`
+	ChunkTables PeriodicTableConfig `yaml:"chunks"`
 	RowShards   uint32              `yaml:"row_shards"`
 }
 
@@ -330,13 +330,13 @@ func (cfg PeriodicTableConfig) MarshalYAML() (interface{}, error) {
 
 // AutoScalingConfig for DynamoDB tables.
 type AutoScalingConfig struct {
-	Enabled     bool    `yaml:"enabled,omitempty"`
-	RoleARN     string  `yaml:"role_arn,omitempty"`
-	MinCapacity int64   `yaml:"min_capacity,omitempty"`
-	MaxCapacity int64   `yaml:"max_capacity,omitempty"`
-	OutCooldown int64   `yaml:"out_cooldown,omitempty"`
-	InCooldown  int64   `yaml:"in_cooldown,omitempty"`
-	TargetValue float64 `yaml:"target,omitempty"`
+	Enabled     bool    `yaml:"enabled"`
+	RoleARN     string  `yaml:"role_arn"`
+	MinCapacity int64   `yaml:"min_capacity"`
+	MaxCapacity int64   `yaml:"max_capacity"`
+	OutCooldown int64   `yaml:"out_cooldown"`
+	InCooldown  int64   `yaml:"in_cooldown"`
+	TargetValue float64 `yaml:"target"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
diff --git a/storage/factory.go b/storage/factory.go
index e46fffee3d661..23bc186198807 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -61,9 +61,9 @@ type Config struct {
 
 	IndexCacheValidity time.Duration
 
-	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config,omitempty"`
+	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config"`
 
-	DeleteStoreConfig purger.DeleteStoreConfig `yaml:"delete_store,omitempty"`
+	DeleteStoreConfig purger.DeleteStoreConfig `yaml:"delete_store"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.

From a23676c825db1cfd429374faacff580ad0a4888b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 19 Mar 2020 18:36:24 +0100
Subject: [PATCH 478/660] Allow usage of host lookups for memcache discovery
 (#2281)

* Allow usage of host lookups for memcache discovery

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Move docs to arguments which is a better place?

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/cache.go            |  6 ++---
 cache/memcached_client.go | 55 ++++++++++++++++++++++++++++++++-------
 2 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index 76f0eac4c9c1c..fd2bbea9b58e2 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -70,16 +70,16 @@ func New(cfg Config) (Cache, error) {
 		caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
 	}
 
-	if cfg.MemcacheClient.Host != "" && cfg.Redis.Endpoint != "" {
+	if (cfg.MemcacheClient.Host != "" || cfg.MemcacheClient.Addresses != "") && cfg.Redis.Endpoint != "" {
 		return nil, errors.New("use of multiple cache storage systems is not supported")
 	}
 
-	if cfg.MemcacheClient.Host != "" {
+	if cfg.MemcacheClient.Host != "" || cfg.MemcacheClient.Addresses != "" {
 		if cfg.Memcache.Expiration == 0 && cfg.DefaultValidity != 0 {
 			cfg.Memcache.Expiration = cfg.DefaultValidity
 		}
 
-		client := NewMemcachedClient(cfg.MemcacheClient)
+		client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix)
 		cache := NewMemcached(cfg.Memcache, client, cfg.Prefix)
 
 		cacheName := cfg.Prefix + "memcache"
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 8501e3d7cff3c..1e15730612f2a 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -1,19 +1,32 @@
 package cache
 
 import (
+	"context"
 	"flag"
 	"fmt"
 	"net"
 	"sort"
+	"strings"
 	"sync"
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/thanos-io/thanos/pkg/discovery/dns"
 
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
+var (
+	memcacheServersDiscovered = promauto.NewGaugeVec(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "memcache_client_servers",
+		Help:      "The number of memcache servers discovered.",
+	}, []string{"name"})
+)
+
 // MemcachedClient interface exists for mocking memcacheClient.
 type MemcachedClient interface {
 	GetMulti(keys []string) (map[string]*memcache.Item, error)
@@ -30,17 +43,24 @@ type serverSelector interface {
 type memcachedClient struct {
 	*memcache.Client
 	serverList serverSelector
-	hostname   string
-	service    string
+
+	hostname string
+	service  string
+
+	addresses []string
+	provider  *dns.Provider
 
 	quit chan struct{}
 	wait sync.WaitGroup
+
+	numServers prometheus.Gauge
 }
 
 // MemcachedClientConfig defines how a MemcachedClient should be constructed.
 type MemcachedClientConfig struct {
 	Host           string        `yaml:"host"`
 	Service        string        `yaml:"service"`
+	Addresses      string        `yaml:"addresses"` // EXPERIMENTAL.
 	Timeout        time.Duration `yaml:"timeout"`
 	MaxIdleConns   int           `yaml:"max_idle_conns"`
 	UpdateInterval time.Duration `yaml:"update_interval"`
@@ -51,6 +71,7 @@ type MemcachedClientConfig struct {
 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
 	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
+	f.StringVar(&cfg.Addresses, prefix+"memcached.addresses", "", description+"EXPERIMENTAL: Comma separated addresses list in DNS Service Discovery format: https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery")
 	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
@@ -59,7 +80,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
-func NewMemcachedClient(cfg MemcachedClientConfig) MemcachedClient {
+func NewMemcachedClient(cfg MemcachedClientConfig, name string) MemcachedClient {
 	var selector serverSelector
 	if cfg.ConsistentHash {
 		selector = &MemcachedJumpHashSelector{}
@@ -76,8 +97,13 @@ func NewMemcachedClient(cfg MemcachedClientConfig) MemcachedClient {
 		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
+		addresses:  strings.Split(cfg.Addresses, ","),
+		provider:   dns.NewProvider(util.Logger, prometheus.DefaultRegisterer, dns.GolangResolverType),
 		quit:       make(chan struct{}),
+
+		numServers: memcacheServersDiscovered.WithLabelValues(name),
 	}
+
 	err := newClient.updateMemcacheServers()
 	if err != nil {
 		level.Error(util.Logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
@@ -114,17 +140,28 @@ func (c *memcachedClient) updateLoop(updateInterval time.Duration) {
 // updateMemcacheServers sets a memcache server list from SRV records. SRV
 // priority & weight are ignored.
 func (c *memcachedClient) updateMemcacheServers() error {
-	_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
-	if err != nil {
-		return err
-	}
 	var servers []string
-	for _, srv := range addrs {
-		servers = append(servers, fmt.Sprintf("%s:%d", srv.Target, srv.Port))
+
+	if len(c.addresses) > 0 {
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+		defer cancel()
+
+		c.provider.Resolve(ctx, c.addresses)
+		servers = c.provider.Addresses()
+	} else {
+		_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
+		if err != nil {
+			return err
+		}
+		for _, srv := range addrs {
+			servers = append(servers, fmt.Sprintf("%s:%d", srv.Target, srv.Port))
+		}
 	}
+
 	// ServerList deterministically maps keys to _index_ of the server list.
 	// Since DNS returns records in different order each time, we sort to
 	// guarantee best possible match between nodes.
 	sort.Strings(servers)
+	c.numServers.Set(float64(len(servers)))
 	return c.serverList.SetServers(servers...)
 }

From 88ee167adb1e86fb17c3ac282bfd62bd4c4f5f29 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 20 Mar 2020 12:21:27 +0100
Subject: [PATCH 479/660] Make each DNS provider register a diff metric (#2303)

* Make each DNS provider register a diff metric

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add tests to test multi register.

Also incorporate review feedback.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/cache.go            | 4 +++-
 cache/memcached_client.go | 8 ++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index fd2bbea9b58e2..f47be182cd469 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -5,6 +5,8 @@ import (
 	"errors"
 	"flag"
 	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
 )
 
 // Cache byte arrays by key.
@@ -79,7 +81,7 @@ func New(cfg Config) (Cache, error) {
 			cfg.Memcache.Expiration = cfg.DefaultValidity
 		}
 
-		client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix)
+		client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix, prometheus.DefaultRegisterer)
 		cache := NewMemcached(cfg.Memcache, client, cfg.Prefix)
 
 		cacheName := cfg.Prefix + "memcache"
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 1e15730612f2a..b120b61fec161 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -80,7 +80,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
-func NewMemcachedClient(cfg MemcachedClientConfig, name string) MemcachedClient {
+func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer) MemcachedClient {
 	var selector serverSelector
 	if cfg.ConsistentHash {
 		selector = &MemcachedJumpHashSelector{}
@@ -92,13 +92,17 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string) MemcachedClient
 	client.Timeout = cfg.Timeout
 	client.MaxIdleConns = cfg.MaxIdleConns
 
+	dnsProviderRegisterer := prometheus.WrapRegistererWithPrefix("cortex_", prometheus.WrapRegistererWith(prometheus.Labels{
+		"name": name,
+	}, r))
+
 	newClient := &memcachedClient{
 		Client:     client,
 		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
 		addresses:  strings.Split(cfg.Addresses, ","),
-		provider:   dns.NewProvider(util.Logger, prometheus.DefaultRegisterer, dns.GolangResolverType),
+		provider:   dns.NewProvider(util.Logger, dnsProviderRegisterer, dns.GolangResolverType),
 		quit:       make(chan struct{}),
 
 		numServers: memcacheServersDiscovered.WithLabelValues(name),

From a09490d86f90b32f9308e372bca91c1f5453a0a3 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Fri, 20 Mar 2020 11:39:17 -0400
Subject: [PATCH 480/660] Ruler API and ObjectStore Backend (#2269)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add ruler API backed by object storage

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* simplify and comment ruler storage api

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* wrape calls to ruler with auth middleware

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* use saner metrics for ruler integration test

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove omitempty from configs

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add clarifying comments for ruler integration tests and reorder metric checks

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update changelog

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update docs

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* Update pkg/ruler/api.go

Co-Authored-By: Peter Štibraný <peter.stibrany@grafana.com>
Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* refactor per PR comments

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix API per PR comments

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 aws/fixtures.go              |  3 ++-
 aws/s3_storage_client.go     |  6 ++++--
 azure/blob_storage_client.go | 35 ++++++++++++++++++++++-------------
 gcp/fixtures.go              |  2 +-
 gcp/gcs_object_client.go     | 22 ++++++++++++----------
 storage/factory.go           |  6 +++---
 6 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/aws/fixtures.go b/aws/fixtures.go
index d0f41e4685b88..a9bf9dd96c65a 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -47,7 +47,8 @@ var Fixtures = []testutils.Fixture{
 				schemaCfg:               schemaConfig,
 			}
 			object := objectclient.NewClient(&S3ObjectClient{
-				S3: newMockS3(),
+				S3:        newMockS3(),
+				delimiter: chunk.DirDelim,
 			}, nil)
 			return index, object, table, schemaConfig, nil
 		},
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 7083d77321fd5..f9fc0c5401141 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -58,10 +58,11 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 type S3ObjectClient struct {
 	bucketNames []string
 	S3          s3iface.S3API
+	delimiter   string
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
-func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
+func NewS3ObjectClient(cfg S3Config, delimiter string) (*S3ObjectClient, error) {
 	if cfg.S3.URL == nil {
 		return nil, fmt.Errorf("no URL specified for S3")
 	}
@@ -86,6 +87,7 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	client := S3ObjectClient{
 		S3:          s3Client,
 		bucketNames: bucketNames,
+		delimiter:   delimiter,
 	}
 	return &client, nil
 }
@@ -175,7 +177,7 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 			input := s3.ListObjectsV2Input{
 				Bucket:    aws.String(a.bucketNames[i]),
 				Prefix:    aws.String(prefix),
-				Delimiter: aws.String(chunk.DirDelim),
+				Delimiter: aws.String(a.delimiter),
 			}
 
 			for {
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index df4a337351656..8372c6068611e 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -35,16 +35,21 @@ type BlobStorageConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&c.ContainerName, "azure.container-name", "cortex", "Name of the blob container used to store chunks. Defaults to `cortex`. This container must be created before running cortex.")
-	f.StringVar(&c.AccountName, "azure.account-name", "", "The Microsoft Azure account name to be used")
-	f.Var(&c.AccountKey, "azure.account-key", "The Microsoft Azure account key to use.")
-	f.DurationVar(&c.RequestTimeout, "azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
-	f.IntVar(&c.DownloadBufferSize, "azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
-	f.IntVar(&c.UploadBufferSize, "azure.upload-buffer-size", 256000, "Preallocated buffer size for up;oads (default is 256KB)")
-	f.IntVar(&c.UploadBufferCount, "azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk. (defaults to 1)")
-	f.IntVar(&c.MaxRetries, "azure.max-retries", 5, "Number of retries for a request which times out.")
-	f.DurationVar(&c.MinRetryDelay, "azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.")
-	f.DurationVar(&c.MaxRetryDelay, "azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.")
+	c.RegisterFlagsWithPrefix("", f)
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&c.ContainerName, prefix+"azure.container-name", "cortex", "Name of the blob container used to store chunks. Defaults to `cortex`. This container must be created before running cortex.")
+	f.StringVar(&c.AccountName, prefix+"azure.account-name", "", "The Microsoft Azure account name to be used")
+	f.Var(&c.AccountKey, prefix+"azure.account-key", "The Microsoft Azure account key to use.")
+	f.DurationVar(&c.RequestTimeout, prefix+"azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
+	f.IntVar(&c.DownloadBufferSize, prefix+"azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
+	f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for up;oads (default is 256KB)")
+	f.IntVar(&c.UploadBufferCount, prefix+"azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk. (defaults to 1)")
+	f.IntVar(&c.MaxRetries, prefix+"azure.max-retries", 5, "Number of retries for a request which times out.")
+	f.DurationVar(&c.MinRetryDelay, prefix+"azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.")
+	f.DurationVar(&c.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.")
 }
 
 // BlobStorage is used to interact with azure blob storage for setting or getting time series chunks.
@@ -53,11 +58,15 @@ type BlobStorage struct {
 	//blobService storage.Serv
 	cfg          *BlobStorageConfig
 	containerURL azblob.ContainerURL
+	delimiter    string
 }
 
 // NewBlobStorage creates a new instance of the BlobStorage struct.
-func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
-	blobStorage := &BlobStorage{cfg: cfg}
+func NewBlobStorage(cfg *BlobStorageConfig, delimiter string) (*BlobStorage, error) {
+	blobStorage := &BlobStorage{
+		cfg:       cfg,
+		delimiter: delimiter,
+	}
 
 	var err error
 	blobStorage.containerURL, err = blobStorage.buildContainerURL()
@@ -165,7 +174,7 @@ func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageO
 			return nil, ctx.Err()
 		}
 
-		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, chunk.DirDelim, azblob.ListBlobsSegmentOptions{Prefix: prefix})
+		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, b.delimiter, azblob.ListBlobsSegmentOptions{Prefix: prefix})
 		if err != nil {
 			return nil, err
 		}
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 0275e7ebe8a9f..123d279ab47ac 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -79,7 +79,7 @@ func (f *fixture) Clients() (
 	if f.gcsObjectClient {
 		cClient = objectclient.NewClient(newGCSObjectClient(GCSConfig{
 			BucketName: "chunks",
-		}, f.gcssrv.Client()), nil)
+		}, f.gcssrv.Client(), chunk.DirDelim), nil)
 	} else {
 		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index f8703ef69e3c6..49c497c7367dc 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -13,9 +13,10 @@ import (
 )
 
 type GCSObjectClient struct {
-	cfg    GCSConfig
-	client *storage.Client
-	bucket *storage.BucketHandle
+	cfg       GCSConfig
+	client    *storage.Client
+	bucket    *storage.BucketHandle
+	delimiter string
 }
 
 // GCSConfig is config for the GCS Chunk Client.
@@ -38,7 +39,7 @@ func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 }
 
 // NewGCSObjectClient makes a new chunk.Client that writes chunks to GCS.
-func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, error) {
+func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, delimiter string) (*GCSObjectClient, error) {
 	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
 		return nil, err
@@ -48,15 +49,16 @@ func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, e
 	if err != nil {
 		return nil, err
 	}
-	return newGCSObjectClient(cfg, client), nil
+	return newGCSObjectClient(cfg, client, delimiter), nil
 }
 
-func newGCSObjectClient(cfg GCSConfig, client *storage.Client) *GCSObjectClient {
+func newGCSObjectClient(cfg GCSConfig, client *storage.Client, delimiter string) *GCSObjectClient {
 	bucket := client.Bucket(cfg.BucketName)
 	return &GCSObjectClient{
-		cfg:    cfg,
-		client: client,
-		bucket: bucket,
+		cfg:       cfg,
+		client:    client,
+		bucket:    bucket,
+		delimiter: delimiter,
 	}
 }
 
@@ -109,7 +111,7 @@ func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, objec
 func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
 	var storageObjects []chunk.StorageObject
 
-	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: chunk.DirDelim})
+	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: s.delimiter})
 	for {
 		if ctx.Err() != nil {
 			return nil, ctx.Err()
diff --git a/storage/factory.go b/storage/factory.go
index 23bc186198807..ce857ed0d189b 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -176,7 +176,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "aws", "s3":
-		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config))
+		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config, chunk.DirDelim))
 	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
@@ -187,13 +187,13 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		}
 		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
 	case "azure":
-		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig))
+		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim))
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
-		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig))
+		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim))
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":

From 8181398770e919bd4564e87cb675ecabbbe37323 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 20 Mar 2020 19:21:16 +0100
Subject: [PATCH 481/660] Fix issues around multiple registrations (#2309)

Move all the caches up and don't create new ones for each schema.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/cache_test.go       |  4 +---
 cache/memcached_client.go |  5 ++++-
 cache/mock.go             |  7 ++++++-
 chunk_store.go            |  4 ++--
 chunk_store_test.go       |  7 ++++++-
 chunk_store_utils.go      | 10 ++--------
 composite_store.go        |  8 +++++---
 series_store.go           |  9 ++-------
 storage/factory.go        | 22 ++++++++++++++++++----
 testutils/testutils.go    |  3 ++-
 10 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/cache/cache_test.go b/cache/cache_test.go
index ac586c74ec0f1..66b24d5a73635 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -113,9 +113,7 @@ func testCacheMultiple(t *testing.T, cache cache.Cache, keys []string, chunks []
 }
 
 func testChunkFetcher(t *testing.T, c cache.Cache, keys []string, chunks []chunk.Chunk) {
-	fetcher, err := chunk.NewChunkFetcher(cache.Config{
-		Cache: c,
-	}, false, nil)
+	fetcher, err := chunk.NewChunkFetcher(c, false, nil)
 	require.NoError(t, err)
 	defer fetcher.Stop()
 
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index b120b61fec161..89ee4060656be 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -101,13 +101,16 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
-		addresses:  strings.Split(cfg.Addresses, ","),
 		provider:   dns.NewProvider(util.Logger, dnsProviderRegisterer, dns.GolangResolverType),
 		quit:       make(chan struct{}),
 
 		numServers: memcacheServersDiscovered.WithLabelValues(name),
 	}
 
+	if len(cfg.Addresses) > 0 {
+		newClient.addresses = strings.Split(cfg.Addresses, ",")
+	}
+
 	err := newClient.updateMemcacheServers()
 	if err != nil {
 		level.Error(util.Logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
diff --git a/cache/mock.go b/cache/mock.go
index e44be6cbf37e7..6503aea80dc0c 100644
--- a/cache/mock.go
+++ b/cache/mock.go
@@ -36,9 +36,14 @@ func (m *mockCache) Fetch(ctx context.Context, keys []string) (found []string, b
 func (m *mockCache) Stop() {
 }
 
-// NewMockCache makes a new MockCache
+// NewMockCache makes a new MockCache.
 func NewMockCache() Cache {
 	return &mockCache{
 		cache: map[string][]byte{},
 	}
 }
+
+// NewNoopCache returns a no-op cache.
+func NewNoopCache() Cache {
+	return NewTiered(nil)
+}
diff --git a/chunk_store.go b/chunk_store.go
index 9080d0a86afbe..444b539fa5184 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -84,8 +84,8 @@ type store struct {
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
+func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits, chunksCache cache.Cache) (Store, error) {
+	fetcher, err := NewChunkFetcher(chunksCache, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index dde220990cee3..b0963de2bbd44 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -82,8 +82,13 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 	overrides, err := validation.NewOverrides(limits, nil)
 	require.NoError(t, err)
 
+	chunksCache, err := cache.New(storeCfg.ChunkCacheConfig)
+	require.NoError(t, err)
+	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig)
+	require.NoError(t, err)
+
 	store := NewCompositeStore()
-	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides)
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides, chunksCache, writeDedupeCache)
 	require.NoError(t, err)
 	return store
 }
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 27a5a84fe97cb..78cd7c14fe49e 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -99,16 +99,10 @@ type decodeResponse struct {
 }
 
 // NewChunkFetcher makes a new ChunkFetcher.
-func NewChunkFetcher(cfg cache.Config, cacheStubs bool, storage Client) (*Fetcher, error) {
-	cfg.Prefix = "chunks"
-	cache, err := cache.New(cfg)
-	if err != nil {
-		return nil, err
-	}
-
+func NewChunkFetcher(cacher cache.Cache, cacheStubs bool, storage Client) (*Fetcher, error) {
 	c := &Fetcher{
 		storage:        storage,
-		cache:          cache,
+		cache:          cacher,
 		cacheStubs:     cacheStubs,
 		decodeRequests: make(chan decodeRequest),
 	}
diff --git a/composite_store.go b/composite_store.go
index fdb30d339fa32..366ca1de86dca 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -7,6 +7,8 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 // StoreLimits helps get Limits specific to Queries for Stores
@@ -56,15 +58,15 @@ func NewCompositeStore() CompositeStore {
 }
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
-func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks Client, limits StoreLimits) error {
+func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) error {
 	schema := cfg.CreateSchema()
 	var store Store
 	var err error
 	switch cfg.Schema {
 	case "v9", "v10", "v11":
-		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits)
+		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits, chunksCache, writeDedupeCache)
 	default:
-		store, err = newStore(storeCfg, schema, index, chunks, limits)
+		store, err = newStore(storeCfg, schema, index, chunks, limits, chunksCache)
 	}
 	if err != nil {
 		return err
diff --git a/series_store.go b/series_store.go
index 16f5fb2fe331d..7d99e0349c5cb 100644
--- a/series_store.go
+++ b/series_store.go
@@ -68,13 +68,8 @@ type seriesStore struct {
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits) (Store, error) {
-	fetcher, err := NewChunkFetcher(cfg.ChunkCacheConfig, cfg.chunkCacheStubs, chunks)
-	if err != nil {
-		return nil, err
-	}
-
-	writeDedupeCache, err := cache.New(cfg.WriteDedupeCacheConfig)
+func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) (Store, error) {
+	fetcher, err := NewChunkFetcher(chunksCache, cfg.chunkCacheStubs, chunks)
 	if err != nil {
 		return nil, err
 	}
diff --git a/storage/factory.go b/storage/factory.go
index ce857ed0d189b..dc89bf67f3368 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -95,14 +95,28 @@ func (cfg *Config) Validate() error {
 
 // NewStore makes the storage clients based on the configuration.
 func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits) (chunk.Store, error) {
-	tieredCache, err := cache.New(cfg.IndexQueriesCacheConfig)
+	indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig)
+	if err != nil {
+		return nil, err
+	}
+
+	chunkCacheCfg := storeCfg.ChunkCacheConfig
+	chunkCacheCfg.Prefix = "chunks"
+	chunksCache, err := cache.New(chunkCacheCfg)
 	if err != nil {
 		return nil, err
 	}
 
 	// Cache is shared by multiple stores, which means they will try and Stop
 	// it more than once.  Wrap in a StopOnce to prevent this.
-	tieredCache = cache.StopOnce(tieredCache)
+	indexReadCache = cache.StopOnce(indexReadCache)
+	chunksCache = cache.StopOnce(chunksCache)
+	writeDedupeCache = cache.StopOnce(writeDedupeCache)
 
 	err = schemaCfg.Load()
 	if err != nil {
@@ -115,7 +129,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating index client")
 		}
-		index = newCachingIndexClient(index, tieredCache, cfg.IndexCacheValidity, limits)
+		index = newCachingIndexClient(index, indexReadCache, cfg.IndexCacheValidity, limits)
 
 		objectStoreType := s.ObjectType
 		if objectStoreType == "" {
@@ -126,7 +140,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 			return nil, errors.Wrap(err, "error creating object client")
 		}
 
-		err = stores.AddPeriod(storeCfg, s, index, chunks, limits)
+		err = stores.AddPeriod(storeCfg, s, index, chunks, limits, chunksCache, writeDedupeCache)
 		if err != nil {
 			return nil, err
 		}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 92dd0dcaf4594..4124a9f629714 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -12,6 +12,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
@@ -135,7 +136,7 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	flagext.DefaultValues(&storeCfg)
 
 	store := chunk.NewCompositeStore()
-	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides)
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides, cache.NewNoopCache(), cache.NewNoopCache())
 	if err != nil {
 		return nil, err
 	}

From 72132b6186c0db3e7b31274fea879a766ac9e59d Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 24 Mar 2020 18:53:45 +0100
Subject: [PATCH 482/660] Prefix chunks cache flags. (#2241)

* Prefix chunks cache flags.

Fixes https://github.com/cortexproject/cortex/issues/2191

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Move background cache prefix to cache from memcache

Fixes #553

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Update docs for the cache flag changes.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Fix all docs references

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Rebase and update prefixes in doc

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Review feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/background.go       | 4 ++--
 cache/memcached_client.go | 2 +-
 chunk_store.go            | 9 ++++++---
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 5b101e4a4cced..9ada6b61602f6 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -32,8 +32,8 @@ type BackgroundConfig struct {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *BackgroundConfig) RegisterFlagsWithPrefix(prefix string, description string, f *flag.FlagSet) {
-	f.IntVar(&cfg.WriteBackGoroutines, prefix+"memcache.write-back-goroutines", 10, description+"How many goroutines to use to write back to memcache.")
-	f.IntVar(&cfg.WriteBackBuffer, prefix+"memcache.write-back-buffer", 10000, description+"How many key batches to buffer for background write-back.")
+	f.IntVar(&cfg.WriteBackGoroutines, prefix+"background.write-back-concurrency", 10, description+"At what concurrency to write back to cache.")
+	f.IntVar(&cfg.WriteBackBuffer, prefix+"background.write-back-buffer", 10000, description+"How many key batches to buffer for background write-back.")
 }
 
 type backgroundCache struct {
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 89ee4060656be..131b60a4046c8 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -69,7 +69,7 @@ type MemcachedClientConfig struct {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use when caching chunks. If empty, no memcached will be used.")
+	f.StringVar(&cfg.Host, prefix+"memcached.hostname", "", description+"Hostname for memcached service to use. If empty and if addresses is unset, no memcached will be used.")
 	f.StringVar(&cfg.Service, prefix+"memcached.service", "memcached", description+"SRV service used to discover memcache servers.")
 	f.StringVar(&cfg.Addresses, prefix+"memcached.addresses", "", description+"EXPERIMENTAL: Comma separated addresses list in DNS Service Discovery format: https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery")
 	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
diff --git a/chunk_store.go b/chunk_store.go
index 444b539fa5184..a4f67a18e10c9 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -55,14 +55,17 @@ type StoreConfig struct {
 	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
 	MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
 
-	// Not visible in yaml because the setting shouldn't be common between ingesters and queriers
+	// Not visible in yaml because the setting shouldn't be common between ingesters and queriers.
+	// This exists in case we don't want to cache all the chunks but still want to take advantage of
+	// ingester chunk write deduplication. But for the queriers we need the full value. So when this option
+	// is set, use different caches for ingesters and queriers.
 	chunkCacheStubs bool // don't write the full chunk to cache, just a stub entry
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
-	cfg.ChunkCacheConfig.RegisterFlagsWithPrefix("", "Cache config for chunks. ", f)
-	f.BoolVar(&cfg.chunkCacheStubs, "store.chunk-cache-stubs", false, "If true, don't write the full chunk to cache, just a stub entry.")
+	cfg.ChunkCacheConfig.RegisterFlagsWithPrefix("store.chunks-cache.", "Cache config for chunks. ", f)
+	f.BoolVar(&cfg.chunkCacheStubs, "store.chunks-cache.cache-stubs", false, "If true, don't write the full chunk to cache, just a stub entry.")
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")

From 5d22b3a93e4b7f85d3c2230b9ab11028bcead3a1 Mon Sep 17 00:00:00 2001
From: Owen Diehl <ow.diehl@gmail.com>
Date: Thu, 26 Mar 2020 11:22:27 -0400
Subject: [PATCH 483/660] enforce sharding defaults in period configs (#2332)

* periodconfig rowshards defaults
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* linting
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* comment cleanup
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>

* ensures schema validation in testware
Signed-off-by: Owen Diehl <ow.diehl@gmail.com>
---
 fixtures.go           |   6 +-
 schema_config.go      |  31 ++++++---
 schema_config_test.go | 151 ++++++++++++++++++++++++++++++++++++++----
 3 files changed, 167 insertions(+), 21 deletions(-)

diff --git a/fixtures.go b/fixtures.go
index 807d738a38be8..d15b0213b0d4a 100644
--- a/fixtures.go
+++ b/fixtures.go
@@ -35,7 +35,7 @@ var BenchmarkLabels = labels.Labels{
 
 // DefaultSchemaConfig creates a simple schema config for testing
 func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
-	return SchemaConfig{
+	s := SchemaConfig{
 		Configs: []PeriodConfig{{
 			IndexType: store,
 			Schema:    schema,
@@ -50,6 +50,10 @@ func DefaultSchemaConfig(store, schema string, from model.Time) SchemaConfig {
 			},
 		}},
 	}
+	if err := s.Validate(); err != nil {
+		panic(err)
+	}
+	return s
 }
 
 // ChunksToMatrix converts a set of chunks to a model.Matrix.
diff --git a/schema_config.go b/schema_config.go
index bce069604daf8..924ecf88685e7 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -107,7 +107,9 @@ func (cfg *SchemaConfig) loadFromFile() error {
 // Validate the schema config and returns an error if the validation
 // doesn't pass
 func (cfg *SchemaConfig) Validate() error {
-	for _, periodCfg := range cfg.Configs {
+	for i := range cfg.Configs {
+		periodCfg := &cfg.Configs[i]
+		periodCfg.applyDefaults()
 		if err := periodCfg.validate(); err != nil {
 			return err
 		}
@@ -142,10 +144,6 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() Schema {
-	rowShards := defaultRowShards(cfg.Schema)
-	if cfg.RowShards > 0 {
-		rowShards = cfg.RowShards
-	}
 
 	var e entries
 	switch cfg.Schema {
@@ -165,12 +163,12 @@ func (cfg PeriodConfig) CreateSchema() Schema {
 		e = v9Entries{}
 	case "v10":
 		e = v10Entries{
-			rowShards: rowShards,
+			rowShards: cfg.RowShards,
 		}
 	case "v11":
 		e = v11Entries{
 			v10Entries: v10Entries{
-				rowShards: rowShards,
+				rowShards: cfg.RowShards,
 			},
 		}
 	default:
@@ -191,7 +189,13 @@ func (cfg PeriodConfig) createBucketsFunc() (schemaBucketsFunc, time.Duration) {
 	}
 }
 
-// validate the period config
+func (cfg *PeriodConfig) applyDefaults() {
+	if cfg.RowShards == 0 {
+		cfg.RowShards = defaultRowShards(cfg.Schema)
+	}
+}
+
+// Validate the period config.
 func (cfg PeriodConfig) validate() error {
 	// Ensure the schema version exists
 	schema := cfg.CreateSchema()
@@ -210,6 +214,17 @@ func (cfg PeriodConfig) validate() error {
 		return errInvalidTablePeriod
 	}
 
+	switch cfg.Schema {
+	case "v1", "v2", "v3", "v4", "v5", "v6", "v9":
+	case "v10", "v11":
+		if cfg.RowShards == 0 {
+			return fmt.Errorf("Must have row_shards > 0 (current: %d) for schema (%s)", cfg.RowShards, cfg.Schema)
+		}
+	default:
+		// This generally unreachable path protects us from adding schemas and not handling them in this function.
+		return fmt.Errorf("unexpected schema (%s)", cfg.Schema)
+	}
+
 	return nil
 }
 
diff --git a/schema_config_test.go b/schema_config_test.go
index f82a87a06db61..a449d90c0a849 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -308,11 +308,12 @@ func TestSchemaConfig_Validate(t *testing.T) {
 
 	tests := map[string]struct {
 		config   *SchemaConfig
-		expected error
+		expected *SchemaConfig
+		err      error
 	}{
 		"should pass the default config (ie. used cortex runs with a target not requiring the schema config)": {
-			config:   &SchemaConfig{},
-			expected: nil,
+			config: &SchemaConfig{},
+			err:    nil,
 		},
 		"should fail on invalid schema version": {
 			config: &SchemaConfig{
@@ -320,7 +321,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					{Schema: "v0"},
 				},
 			},
-			expected: errInvalidSchemaVersion,
+			err: errInvalidSchemaVersion,
 		},
 		"should fail on index table period not multiple of 1h for schema v1": {
 			config: &SchemaConfig{
@@ -331,7 +332,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: errInvalidTablePeriod,
+			err: errInvalidTablePeriod,
 		},
 		"should fail on chunk table period not multiple of 1h for schema v1": {
 			config: &SchemaConfig{
@@ -343,7 +344,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: errInvalidTablePeriod,
+			err: errInvalidTablePeriod,
 		},
 		"should pass on index and chunk table period multiple of 1h for schema v1": {
 			config: &SchemaConfig{
@@ -355,7 +356,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: nil,
+			err: nil,
 		},
 		"should fail on index table period not multiple of 24h for schema v10": {
 			config: &SchemaConfig{
@@ -366,7 +367,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: errInvalidTablePeriod,
+			err: errInvalidTablePeriod,
 		},
 		"should fail on chunk table period not multiple of 24h for schema v10": {
 			config: &SchemaConfig{
@@ -378,7 +379,7 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: errInvalidTablePeriod,
+			err: errInvalidTablePeriod,
 		},
 		"should pass on index and chunk table period multiple of 24h for schema v10": {
 			config: &SchemaConfig{
@@ -390,7 +391,17 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: nil,
+			expected: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						RowShards:   16,
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+						ChunkTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: nil,
 		},
 		"should pass on index and chunk table period set to zero (no period tables)": {
 			config: &SchemaConfig{
@@ -402,7 +413,54 @@ func TestSchemaConfig_Validate(t *testing.T) {
 					},
 				},
 			},
-			expected: nil,
+			expected: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						RowShards:   16,
+						IndexTables: PeriodicTableConfig{Period: 0},
+						ChunkTables: PeriodicTableConfig{Period: 0},
+					},
+				},
+			},
+			err: nil,
+		},
+		"should set shard factor defaults": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema: "v10",
+					},
+				},
+			},
+			expected: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:    "v10",
+						RowShards: 16,
+					},
+				},
+			},
+			err: nil,
+		},
+		"should not override explicit shard factor": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:    "v11",
+						RowShards: 6,
+					},
+				},
+			},
+			expected: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:    "v11",
+						RowShards: 6,
+					},
+				},
+			},
+			err: nil,
 		},
 	}
 
@@ -411,7 +469,76 @@ func TestSchemaConfig_Validate(t *testing.T) {
 
 		t.Run(testName, func(t *testing.T) {
 			actual := testData.config.Validate()
-			assert.Equal(t, testData.expected, actual)
+			assert.Equal(t, testData.err, actual)
+			if testData.expected != nil {
+				require.Equal(t, testData.expected, testData.config)
+			}
+		})
+	}
+}
+
+func TestPeriodConfig_Validate(t *testing.T) {
+	for _, tc := range []struct {
+		desc string
+		in   PeriodConfig
+		err  string
+	}{
+		{
+			desc: "ignore pre v10 sharding",
+			in: PeriodConfig{
+
+				Schema:      "v9",
+				IndexTables: PeriodicTableConfig{Period: 0},
+				ChunkTables: PeriodicTableConfig{Period: 0},
+			},
+		},
+		{
+			desc: "error on invalid schema",
+			in: PeriodConfig{
+
+				Schema:      "v99",
+				IndexTables: PeriodicTableConfig{Period: 0},
+				ChunkTables: PeriodicTableConfig{Period: 0},
+			},
+			err: "invalid schema version",
+		},
+		{
+			desc: "v10 with shard factor",
+			in: PeriodConfig{
+
+				Schema:      "v10",
+				RowShards:   16,
+				IndexTables: PeriodicTableConfig{Period: 0},
+				ChunkTables: PeriodicTableConfig{Period: 0},
+			},
+		},
+		{
+			desc: "v11 with shard factor",
+			in: PeriodConfig{
+
+				Schema:      "v11",
+				RowShards:   16,
+				IndexTables: PeriodicTableConfig{Period: 0},
+				ChunkTables: PeriodicTableConfig{Period: 0},
+			},
+		},
+		{
+			desc: "error v10 no specified shard factor",
+			in: PeriodConfig{
+
+				Schema:      "v10",
+				IndexTables: PeriodicTableConfig{Period: 0},
+				ChunkTables: PeriodicTableConfig{Period: 0},
+			},
+			err: "Must have row_shards > 0 (current: 0) for schema (v10)",
+		},
+	} {
+		t.Run(tc.desc, func(t *testing.T) {
+			if tc.err == "" {
+				require.Nil(t, tc.in.validate())
+			} else {
+				require.Error(t, tc.in.validate(), tc.err)
+			}
 		})
 	}
 }

From 00249e94618a11c533e68010949d078001168e45 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Thu, 26 Mar 2020 17:37:26 +0100
Subject: [PATCH 484/660] Saner and consistent YAML fields in config (#2273)

* Saner YAML fields in config

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 aws/dynamodb_storage_client.go | 22 +++++++++++-----------
 aws/metrics_autoscaling.go     | 18 +++++++++---------
 gcp/bigtable_index_client.go   |  4 ++--
 storage/factory.go             |  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index fa2b6679fdc96..47a5354b18023 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -99,13 +99,13 @@ func init() {
 
 // DynamoDBConfig specifies config for a DynamoDB database.
 type DynamoDBConfig struct {
-	DynamoDB               flagext.URLValue
-	APILimit               float64
-	ThrottleLimit          float64
-	ApplicationAutoScaling flagext.URLValue
-	Metrics                MetricsAutoScalingConfig
-	ChunkGangSize          int
-	ChunkGetMaxParallelism int
+	DynamoDB               flagext.URLValue         `yaml:"dynamodb_url"`
+	APILimit               float64                  `yaml:"api_limit"`
+	ThrottleLimit          float64                  `yaml:"throttle_limit"`
+	ApplicationAutoScaling flagext.URLValue         `yaml:"application_autoscaling_url"`
+	Metrics                MetricsAutoScalingConfig `yaml:"metrics"`
+	ChunkGangSize          int                      `yaml:"chunk_gang_size"`
+	ChunkGetMaxParallelism int                      `yaml:"chunk_get_max_parallelism"`
 	backoffConfig          util.BackoffConfig
 }
 
@@ -116,8 +116,8 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
 	f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.")
 	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
-	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk.gang.size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
-	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get.max.parallelism", 32, "Max number of chunk-get operations to start in parallel")
+	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk-gang-size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
+	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get-max-parallelism", 32, "Max number of chunk-get operations to start in parallel")
 	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
 	f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
 	f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
@@ -126,8 +126,8 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 
 // StorageConfig specifies config for storing data on AWS.
 type StorageConfig struct {
-	DynamoDBConfig
-	S3Config `yaml:",inline"`
+	DynamoDBConfig `yaml:"dynamodb"`
+	S3Config       `yaml:",inline"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 871516fdb38fd..b9958fbed5f4d 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -41,15 +41,15 @@ const (
 
 // MetricsAutoScalingConfig holds parameters to configure how it works
 type MetricsAutoScalingConfig struct {
-	URL              string  // URL to contact Prometheus store on
-	TargetQueueLen   int64   // Queue length above which we will scale up capacity
-	ScaleUpFactor    float64 // Scale up capacity by this multiple
-	MinThrottling    float64 // Ignore throttling below this level
-	QueueLengthQuery string  // Promql query to fetch ingester queue length
-	ThrottleQuery    string  // Promql query to fetch throttle rate per table
-	UsageQuery       string  // Promql query to fetch write capacity usage per table
-	ReadUsageQuery   string  // Promql query to fetch read usage per table
-	ReadErrorQuery   string  // Promql query to fetch read errors per table
+	URL              string  `yaml:"url"`                   // URL to contact Prometheus store on
+	TargetQueueLen   int64   `yaml:"target_queue_length"`   // Queue length above which we will scale up capacity
+	ScaleUpFactor    float64 `yaml:"scale_up_factor"`       // Scale up capacity by this multiple
+	MinThrottling    float64 `yaml:"ignore_throttle_below"` // Ignore throttling below this level
+	QueueLengthQuery string  `yaml:"queue_length_query"`    // Promql query to fetch ingester queue length
+	ThrottleQuery    string  `yaml:"write_throttle_query"`  // Promql query to fetch throttle rate per table
+	UsageQuery       string  `yaml:"write_usage_query"`     // Promql query to fetch write capacity usage per table
+	ReadUsageQuery   string  `yaml:"read_usage_query"`      // Promql query to fetch read usage per table
+	ReadErrorQuery   string  `yaml:"read_error_query"`      // Promql query to fetch read errors per table
 
 	deprecatedErrorRateQuery string
 }
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index c163e529f1c71..39a7f44c8788d 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -40,8 +40,8 @@ type Config struct {
 	ColumnKey      bool `yaml:"-"`
 	DistributeKeys bool `yaml:"-"`
 
-	TableCacheEnabled    bool
-	TableCacheExpiration time.Duration
+	TableCacheEnabled    bool          `yaml:"table_cache_enabled"`
+	TableCacheExpiration time.Duration `yaml:"table_cache_expiration"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/storage/factory.go b/storage/factory.go
index dc89bf67f3368..ef74d02ebf9d2 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -59,7 +59,7 @@ type Config struct {
 	BoltDBConfig           local.BoltDBConfig      `yaml:"boltdb"`
 	FSConfig               local.FSConfig          `yaml:"filesystem"`
 
-	IndexCacheValidity time.Duration
+	IndexCacheValidity time.Duration `yaml:"index_cache_validity"`
 
 	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config"`
 

From bdcc96e975d0bbfa745cd6d6ef348a4ec878e980 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Sat, 28 Mar 2020 16:43:27 +0100
Subject: [PATCH 485/660] Remove deprecated flags from months ago (#2339)

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 aws/metrics_autoscaling.go | 8 --------
 chunk_store.go             | 5 -----
 2 files changed, 13 deletions(-)

diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index b9958fbed5f4d..3df859cbdcf5c 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -50,8 +50,6 @@ type MetricsAutoScalingConfig struct {
 	UsageQuery       string  `yaml:"write_usage_query"`     // Promql query to fetch write capacity usage per table
 	ReadUsageQuery   string  `yaml:"read_usage_query"`      // Promql query to fetch read usage per table
 	ReadErrorQuery   string  `yaml:"read_error_query"`      // Promql query to fetch read errors per table
-
-	deprecatedErrorRateQuery string
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -65,8 +63,6 @@ func (cfg *MetricsAutoScalingConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.UsageQuery, "metrics.usage-query", defaultUsageQuery, "query to fetch write capacity usage per table")
 	f.StringVar(&cfg.ReadUsageQuery, "metrics.read-usage-query", defaultReadUsageQuery, "query to fetch read capacity usage per table")
 	f.StringVar(&cfg.ReadErrorQuery, "metrics.read-error-query", defaultReadErrorQuery, "query to fetch read errors per table")
-
-	f.StringVar(&cfg.deprecatedErrorRateQuery, "metrics.error-rate-query", "", "DEPRECATED: use -metrics.write-throttle-query instead")
 }
 
 type metricsData struct {
@@ -83,10 +79,6 @@ type metricsData struct {
 }
 
 func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
-	if cfg.Metrics.deprecatedErrorRateQuery != "" {
-		level.Warn(util.Logger).Log("msg", "use of deprecated flag -metrics.error-rate-query")
-		cfg.Metrics.ThrottleQuery = cfg.Metrics.deprecatedErrorRateQuery
-	}
 	client, err := promApi.NewClient(promApi.Config{Address: cfg.Metrics.URL})
 	if err != nil {
 		return nil, err
diff --git a/chunk_store.go b/chunk_store.go
index a4f67a18e10c9..ccd45b7395d82 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -21,7 +21,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -70,10 +69,6 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 
 	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
 	f.DurationVar(&cfg.MaxLookBackPeriod, "store.max-look-back-period", 0, "Limit how long back data can be queried")
-
-	// Deprecated.
-	flagext.DeprecatedFlag(f, "store.cardinality-cache-size", "DEPRECATED. Use store.index-cache-read.enable-fifocache and store.index-cache-read.fifocache.size instead.")
-	flagext.DeprecatedFlag(f, "store.cardinality-cache-validity", "DEPRECATED. Use store.index-cache-read.enable-fifocache and store.index-cache-read.fifocache.duration instead.")
 }
 
 // store implements Store

From 141102103c147016843b26803c7c43802be5bdda Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 30 Mar 2020 10:15:29 +0200
Subject: [PATCH 486/660] Add metric for deprecated flags set. (#2341)

Also, I couldn't find a better way to do this.

Fixes: https://github.com/cortexproject/cortex/issues/1425

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 schema_config.go | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/schema_config.go b/schema_config.go
index 924ecf88685e7..28bf1b8b8275e 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -14,6 +14,7 @@ import (
 	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 const (
@@ -86,6 +87,7 @@ func (cfg *SchemaConfig) loadFromFile() error {
 		cfg.fileName = cfg.legacyFileName
 
 		if cfg.legacyFileName != "" {
+			flagext.DeprecatedFlagsUsed.Inc()
 			level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag -config-yaml, use -schema-config-file instead")
 		}
 	}

From cf0ae92bc6966f98fa13000aa32500630bd66272 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 30 Mar 2020 12:29:56 +0200
Subject: [PATCH 487/660] Renamed table-manager metrics to remove cortex_dynamo
 prefix (#2307)

* Renamed table-manager metrics to remove cortex_dynamo prefix

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Update pkg/chunk/table_manager.go

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-Authored-By: Bryan Boreham <bjboreham@gmail.com>

Co-authored-by: Bryan Boreham <bjboreham@gmail.com>
---
 table_manager.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index baee41e74bbed..8be8ebec2585d 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -40,8 +40,8 @@ func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 	m := tableManagerMetrics{}
 	m.syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
-		Name:      "dynamo_sync_tables_seconds",
-		Help:      "Time spent doing SyncTables.",
+		Name:      "table_manager_sync_duration_seconds",
+		Help:      "Time spent synching tables.",
 		Buckets:   prometheus.DefBuckets,
 	}, []string{"operation", "status_code"})
 

From 6413dbc8acba1eb81eda8d70c29323d936ee2b37 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 30 Mar 2020 15:22:02 +0200
Subject: [PATCH 488/660] Remove AWS based autoscaler code (#2328)

* Remove AWS based autoscaler code

We still support metrics based autoscaling.

Fixes https://github.com/cortexproject/cortex/issues/1379

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Remove unused function.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Remove unused vendor code.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Fix bad rebase.

Signed-off-by: Tom Wilkie <tom@grafana.com>

Co-authored-by: Tom Wilkie <tom@grafana.com>
---
 aws/aws_autoscaling.go            | 226 ----------------
 aws/dynamodb_storage_client.go    |   2 -
 aws/dynamodb_table_client.go      |   7 -
 aws/dynamodb_table_client_test.go | 410 ------------------------------
 aws/metrics_autoscaling_test.go   | 104 ++++++++
 5 files changed, 104 insertions(+), 645 deletions(-)
 delete mode 100644 aws/aws_autoscaling.go
 delete mode 100644 aws/dynamodb_table_client_test.go

diff --git a/aws/aws_autoscaling.go b/aws/aws_autoscaling.go
deleted file mode 100644
index 2de3209329d8d..0000000000000
--- a/aws/aws_autoscaling.go
+++ /dev/null
@@ -1,226 +0,0 @@
-package aws
-
-import (
-	"context"
-	"fmt"
-
-	"github.com/aws/aws-sdk-go/aws"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
-	"github.com/go-kit/kit/log/level"
-	"github.com/prometheus/client_golang/prometheus"
-	"github.com/weaveworks/common/instrument"
-
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
-)
-
-const (
-	autoScalingPolicyNamePrefix = "DynamoScalingPolicy_cortex_"
-)
-
-var applicationAutoScalingRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
-	Namespace: "cortex",
-	Name:      "application_autoscaling_request_duration_seconds",
-	Help:      "Time spent doing ApplicationAutoScaling requests.",
-
-	// AWS latency seems to range from a few ms to a few sec. So use 8 buckets
-	// from 128us to 2s. TODO: Confirm that this is the case for ApplicationAutoScaling.
-	Buckets: prometheus.ExponentialBuckets(0.000128, 4, 8),
-}, []string{"operation", "status_code"}))
-
-func init() {
-	applicationAutoScalingRequestDuration.Register()
-}
-
-type awsAutoscale struct {
-	call                   callManager
-	ApplicationAutoScaling applicationautoscalingiface.ApplicationAutoScalingAPI
-}
-
-func newAWSAutoscale(cfg DynamoDBConfig, callManager callManager) (*awsAutoscale, error) {
-	session, err := awsSessionFromURL(cfg.ApplicationAutoScaling.URL)
-	if err != nil {
-		return nil, err
-	}
-	return &awsAutoscale{
-		call:                   callManager,
-		ApplicationAutoScaling: applicationautoscaling.New(session),
-	}, nil
-}
-
-func (a *awsAutoscale) PostCreateTable(ctx context.Context, desc chunk.TableDesc) error {
-	if desc.WriteScale.Enabled {
-		return a.enableAutoScaling(ctx, desc)
-	}
-	return nil
-}
-
-func (a *awsAutoscale) DescribeTable(ctx context.Context, desc *chunk.TableDesc) error {
-	err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DescribeScalableTargetsWithContext", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			out, err := a.ApplicationAutoScaling.DescribeScalableTargetsWithContext(ctx, &applicationautoscaling.DescribeScalableTargetsInput{
-				ResourceIds:       []*string{aws.String("table/" + desc.Name)},
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			})
-			if err != nil {
-				return err
-			}
-			switch l := len(out.ScalableTargets); l {
-			case 0:
-				return err
-			case 1:
-				desc.WriteScale.Enabled = true
-				if target := out.ScalableTargets[0]; target != nil {
-					if target.RoleARN != nil {
-						desc.WriteScale.RoleARN = *target.RoleARN
-					}
-					if target.MinCapacity != nil {
-						desc.WriteScale.MinCapacity = *target.MinCapacity
-					}
-					if target.MaxCapacity != nil {
-						desc.WriteScale.MaxCapacity = *target.MaxCapacity
-					}
-				}
-				return err
-			default:
-				return fmt.Errorf("more than one scalable target found for DynamoDB table")
-			}
-		})
-	})
-	if err != nil {
-		return err
-	}
-
-	err = a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DescribeScalingPoliciesWithContext", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			out, err := a.ApplicationAutoScaling.DescribeScalingPoliciesWithContext(ctx, &applicationautoscaling.DescribeScalingPoliciesInput{
-				PolicyNames:       []*string{aws.String(autoScalingPolicyNamePrefix + desc.Name)},
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			})
-			if err != nil {
-				return err
-			}
-			switch l := len(out.ScalingPolicies); l {
-			case 0:
-				return err
-			case 1:
-				config := out.ScalingPolicies[0].TargetTrackingScalingPolicyConfiguration
-				if config != nil {
-					if config.ScaleInCooldown != nil {
-						desc.WriteScale.InCooldown = *config.ScaleInCooldown
-					}
-					if config.ScaleOutCooldown != nil {
-						desc.WriteScale.OutCooldown = *config.ScaleOutCooldown
-					}
-					if config.TargetValue != nil {
-						desc.WriteScale.TargetValue = *config.TargetValue
-					}
-				}
-				return err
-			default:
-				return fmt.Errorf("more than one scaling policy found for DynamoDB table")
-			}
-		})
-	})
-	return err
-}
-
-func (a *awsAutoscale) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
-	var err error
-	if !current.WriteScale.Enabled {
-		if expected.WriteScale.Enabled {
-			level.Info(util.Logger).Log("msg", "enabling autoscaling on table", "table")
-			err = a.enableAutoScaling(ctx, *expected)
-		}
-	} else {
-		if !expected.WriteScale.Enabled {
-			level.Info(util.Logger).Log("msg", "disabling autoscaling on table", "table")
-			err = a.disableAutoScaling(ctx, *expected)
-		} else if current.WriteScale != expected.WriteScale {
-			level.Info(util.Logger).Log("msg", "enabling autoscaling on table", "table")
-			err = a.enableAutoScaling(ctx, *expected)
-		}
-	}
-	return err
-}
-
-func (a *awsAutoscale) enableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
-	// Registers or updates a scalable target
-	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.RegisterScalableTarget", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			input := &applicationautoscaling.RegisterScalableTargetInput{
-				MinCapacity:       aws.Int64(desc.WriteScale.MinCapacity),
-				MaxCapacity:       aws.Int64(desc.WriteScale.MaxCapacity),
-				ResourceId:        aws.String("table/" + desc.Name),
-				RoleARN:           aws.String(desc.WriteScale.RoleARN),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := a.ApplicationAutoScaling.RegisterScalableTarget(input)
-			if err != nil {
-				return err
-			}
-			return nil
-		})
-	}); err != nil {
-		return err
-	}
-
-	// Puts or updates a scaling policy
-	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.PutScalingPolicy", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			input := &applicationautoscaling.PutScalingPolicyInput{
-				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
-				PolicyType:        aws.String("TargetTrackingScaling"),
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
-					PredefinedMetricSpecification: &applicationautoscaling.PredefinedMetricSpecification{
-						PredefinedMetricType: aws.String("DynamoDBWriteCapacityUtilization"),
-					},
-					ScaleInCooldown:  aws.Int64(desc.WriteScale.InCooldown),
-					ScaleOutCooldown: aws.Int64(desc.WriteScale.OutCooldown),
-					TargetValue:      aws.Float64(desc.WriteScale.TargetValue),
-				},
-			}
-			_, err := a.ApplicationAutoScaling.PutScalingPolicy(input)
-			return err
-		})
-	})
-}
-
-func (a *awsAutoscale) disableAutoScaling(ctx context.Context, desc chunk.TableDesc) error {
-	// Deregister scalable target
-	if err := a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DeregisterScalableTarget", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			input := &applicationautoscaling.DeregisterScalableTargetInput{
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := a.ApplicationAutoScaling.DeregisterScalableTarget(input)
-			return err
-		})
-	}); err != nil {
-		return err
-	}
-
-	// Delete scaling policy
-	return a.call.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "ApplicationAutoScaling.DeleteScalingPolicy", applicationAutoScalingRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-			input := &applicationautoscaling.DeleteScalingPolicyInput{
-				PolicyName:        aws.String(autoScalingPolicyNamePrefix + desc.Name),
-				ResourceId:        aws.String("table/" + desc.Name),
-				ScalableDimension: aws.String("dynamodb:table:WriteCapacityUnits"),
-				ServiceNamespace:  aws.String("dynamodb"),
-			}
-			_, err := a.ApplicationAutoScaling.DeleteScalingPolicy(input)
-			return err
-		})
-	})
-}
diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 47a5354b18023..a7b1e84f6df13 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -102,7 +102,6 @@ type DynamoDBConfig struct {
 	DynamoDB               flagext.URLValue         `yaml:"dynamodb_url"`
 	APILimit               float64                  `yaml:"api_limit"`
 	ThrottleLimit          float64                  `yaml:"throttle_limit"`
-	ApplicationAutoScaling flagext.URLValue         `yaml:"application_autoscaling_url"`
 	Metrics                MetricsAutoScalingConfig `yaml:"metrics"`
 	ChunkGangSize          int                      `yaml:"chunk_gang_size"`
 	ChunkGetMaxParallelism int                      `yaml:"chunk_get_max_parallelism"`
@@ -115,7 +114,6 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<table-name> to use a mock in-memory implementation.")
 	f.Float64Var(&cfg.APILimit, "dynamodb.api-limit", 2.0, "DynamoDB table management requests per second limit.")
 	f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.")
-	f.Var(&cfg.ApplicationAutoScaling, "applicationautoscaling.url", "ApplicationAutoscaling endpoint URL with escaped Key and Secret encoded.")
 	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk-gang-size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
 	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get-max-parallelism", 32, "Max number of chunk-get operations to start in parallel")
 	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 84ae8c3c22990..1e3ff1102fa47 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -50,13 +50,6 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 	}
 
 	var autoscale autoscale
-	if cfg.ApplicationAutoScaling.URL != nil {
-		autoscale, err = newAWSAutoscale(cfg, callManager)
-		if err != nil {
-			return nil, err
-		}
-	}
-
 	if cfg.Metrics.URL != "" {
 		autoscale, err = newMetrics(cfg)
 		if err != nil {
diff --git a/aws/dynamodb_table_client_test.go b/aws/dynamodb_table_client_test.go
deleted file mode 100644
index 7725e433baf5c..0000000000000
--- a/aws/dynamodb_table_client_test.go
+++ /dev/null
@@ -1,410 +0,0 @@
-package aws
-
-import (
-	"context"
-	"fmt"
-	"testing"
-	"time"
-
-	"github.com/aws/aws-sdk-go/aws"
-	"github.com/aws/aws-sdk-go/aws/request"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling"
-	"github.com/aws/aws-sdk-go/service/applicationautoscaling/applicationautoscalingiface"
-	"github.com/prometheus/common/model"
-	"github.com/stretchr/testify/require"
-	"github.com/weaveworks/common/mtime"
-
-	"github.com/cortexproject/cortex/pkg/chunk"
-)
-
-const (
-	tablePrefix      = "cortex_"
-	chunkTablePrefix = "chunks_"
-	tablePeriod      = 7 * 24 * time.Hour
-	gracePeriod      = 15 * time.Minute
-	maxChunkAge      = 12 * time.Hour
-	inactiveWrite    = 1
-	inactiveRead     = 2
-	write            = 200
-	read             = 100
-)
-
-func fixtureWriteScale() chunk.AutoScalingConfig {
-	return chunk.AutoScalingConfig{
-		Enabled:     true,
-		MinCapacity: 100,
-		MaxCapacity: 250,
-		OutCooldown: 100,
-		InCooldown:  100,
-		TargetValue: 80.0,
-	}
-}
-
-func fixtureReadScale() chunk.AutoScalingConfig {
-	return chunk.AutoScalingConfig{
-		Enabled:     true,
-		MinCapacity: 1,
-		MaxCapacity: 2000,
-		OutCooldown: 100,
-		InCooldown:  100,
-		TargetValue: 80.0,
-	}
-}
-
-func fixturePeriodicTableConfig(prefix string) chunk.PeriodicTableConfig {
-	return chunk.PeriodicTableConfig{
-		Prefix: prefix,
-		Period: tablePeriod,
-	}
-}
-
-func fixtureProvisionConfig(inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
-	return chunk.ProvisionConfig{
-		ProvisionedWriteThroughput: write,
-		ProvisionedReadThroughput:  read,
-		InactiveWriteThroughput:    inactiveWrite,
-		InactiveReadThroughput:     inactiveRead,
-		WriteScale:                 writeScale,
-		InactiveWriteScale:         inactWriteScale,
-		InactiveWriteScaleLastN:    inactLastN,
-	}
-}
-
-func fixtureReadProvisionConfig(readScale, inactReadScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
-	return chunk.ProvisionConfig{
-		ProvisionedWriteThroughput: write,
-		ProvisionedReadThroughput:  read,
-		InactiveWriteThroughput:    inactiveWrite,
-		InactiveReadThroughput:     inactiveRead,
-		ReadScale:                  readScale,
-		InactiveReadScale:          inactReadScale,
-	}
-}
-
-func baseTable(name string, provisionedRead, provisionedWrite int64) []chunk.TableDesc {
-	return []chunk.TableDesc{
-		{
-			Name:             name,
-			ProvisionedRead:  provisionedRead,
-			ProvisionedWrite: provisionedWrite,
-		},
-	}
-}
-
-func staticTable(i int, indexRead, indexWrite, chunkRead, chunkWrite int64) []chunk.TableDesc {
-	return []chunk.TableDesc{
-		{
-			Name:             tablePrefix + fmt.Sprint(i),
-			ProvisionedRead:  indexRead,
-			ProvisionedWrite: indexWrite,
-		},
-		{
-			Name:             chunkTablePrefix + fmt.Sprint(i),
-			ProvisionedRead:  chunkRead,
-			ProvisionedWrite: chunkWrite,
-		},
-	}
-}
-
-func autoScaledTable(i int, provisionedRead, provisionedWrite int64, indexOutCooldown int64, chunkTarget float64) []chunk.TableDesc {
-	chunkASC, indexASC := fixtureWriteScale(), fixtureWriteScale()
-	indexASC.OutCooldown = indexOutCooldown
-	chunkASC.TargetValue = chunkTarget
-	return []chunk.TableDesc{
-		{
-			Name:             tablePrefix + fmt.Sprint(i),
-			ProvisionedRead:  provisionedRead,
-			ProvisionedWrite: provisionedWrite,
-			WriteScale:       indexASC,
-		},
-		{
-			Name:             chunkTablePrefix + fmt.Sprint(i),
-			ProvisionedRead:  provisionedRead,
-			ProvisionedWrite: provisionedWrite,
-			WriteScale:       chunkASC,
-		},
-	}
-}
-
-func test(t *testing.T, client dynamoTableClient, tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
-	t.Run(name, func(t *testing.T) {
-		ctx := context.Background()
-		mtime.NowForce(tm)
-		defer mtime.NowReset()
-		if err := tableManager.SyncTables(ctx); err != nil {
-			t.Fatal(err)
-		}
-		err := chunk.ExpectTables(ctx, client, expected)
-		require.NoError(t, err)
-	})
-}
-
-func TestTableManagerAutoScaling(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	applicationAutoScaling := newMockApplicationAutoScaling()
-	client := dynamoTableClient{
-		DynamoDB:  dynamoDB,
-		autoscale: &awsAutoscale{ApplicationAutoScaling: applicationAutoScaling},
-	}
-
-	cfg := chunk.SchemaConfig{
-		Configs: []chunk.PeriodConfig{
-			{
-				IndexType: "aws-dynamo",
-			},
-			{
-				IndexType:   "aws-dynamo",
-				From:        chunk.DayTime{Time: model.TimeFromUnix(0)},
-				IndexTables: fixturePeriodicTableConfig(tablePrefix),
-				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
-			}},
-	}
-	tbm := chunk.TableManagerConfig{
-		CreationGracePeriod: gracePeriod,
-		IndexTables:         fixtureProvisionConfig(0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
-		ChunkTables:         fixtureProvisionConfig(0, fixtureWriteScale(), chunk.AutoScalingConfig{}),
-	}
-
-	// Check tables are created with autoscale
-	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"Create tables",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			append(baseTable("", inactiveRead, inactiveWrite),
-				autoScaledTable(0, read, write, 100, 80)...),
-		)
-	}
-
-	// Check tables are updated with new settings
-	{
-		tbm.IndexTables.WriteScale.OutCooldown = 200
-		tbm.ChunkTables.WriteScale.TargetValue = 90.0
-
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			append(baseTable("", inactiveRead, inactiveWrite),
-				autoScaledTable(0, read, write, 200, 90)...),
-		)
-	}
-
-	// Check tables are degristered when autoscaling is disabled for inactive tables
-	{
-		tbm.IndexTables.WriteScale.OutCooldown = 200
-		tbm.ChunkTables.WriteScale.TargetValue = 90.0
-
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			append(append(baseTable("", inactiveRead, inactiveWrite),
-				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
-				autoScaledTable(1, read, write, 200, 90)...),
-		)
-	}
-
-	// Check tables are degristered when autoscaling is disabled entirely
-	{
-		tbm.IndexTables.WriteScale.Enabled = false
-		tbm.ChunkTables.WriteScale.Enabled = false
-
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"Update tables with new settings",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			append(append(baseTable("", inactiveRead, inactiveWrite),
-				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
-				staticTable(1, read, write, read, write)...),
-		)
-	}
-}
-
-func TestTableManagerInactiveAutoScaling(t *testing.T) {
-	dynamoDB := newMockDynamoDB(0, 0)
-	applicationAutoScaling := newMockApplicationAutoScaling()
-	client := dynamoTableClient{
-		DynamoDB:  dynamoDB,
-		autoscale: &awsAutoscale{ApplicationAutoScaling: applicationAutoScaling},
-	}
-
-	cfg := chunk.SchemaConfig{
-		Configs: []chunk.PeriodConfig{
-			{
-				IndexType:   "aws-dynamo",
-				IndexTables: chunk.PeriodicTableConfig{},
-			},
-			{
-				IndexType:   "aws-dynamo",
-				IndexTables: fixturePeriodicTableConfig(tablePrefix),
-				ChunkTables: fixturePeriodicTableConfig(chunkTablePrefix),
-			},
-		},
-	}
-	tbm := chunk.TableManagerConfig{
-		CreationGracePeriod: gracePeriod,
-		IndexTables:         fixtureProvisionConfig(2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
-		ChunkTables:         fixtureProvisionConfig(2, chunk.AutoScalingConfig{}, fixtureWriteScale()),
-	}
-
-	// Check legacy and latest tables do not autoscale with inactive autoscale enabled.
-	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"Legacy and latest tables",
-			time.Unix(0, 0).Add(maxChunkAge).Add(gracePeriod),
-			append(baseTable("", inactiveRead, inactiveWrite),
-				staticTable(0, read, write, read, write)...),
-		)
-	}
-
-	// Check inactive tables are autoscaled even if there are less than the limit.
-	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"1 week of inactive tables with latest",
-			time.Unix(0, 0).Add(tablePeriod).Add(maxChunkAge).Add(gracePeriod),
-			append(append(baseTable("", inactiveRead, inactiveWrite),
-				autoScaledTable(0, inactiveRead, inactiveWrite, 100, 80)...),
-				staticTable(1, read, write, read, write)...),
-		)
-	}
-
-	// Check inactive tables past the limit do not autoscale but the latest N do.
-	{
-		tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		test(t, client,
-			tableManager,
-			"3 weeks of inactive tables with latest",
-			time.Unix(0, 0).Add(tablePeriod*3).Add(maxChunkAge).Add(gracePeriod),
-			append(append(append(append(baseTable("", inactiveRead, inactiveWrite),
-				staticTable(0, inactiveRead, inactiveWrite, inactiveRead, inactiveWrite)...),
-				autoScaledTable(1, inactiveRead, inactiveWrite, 100, 80)...),
-				autoScaledTable(2, inactiveRead, inactiveWrite, 100, 80)...),
-				staticTable(3, read, write, read, write)...),
-		)
-	}
-}
-
-type mockApplicationAutoScalingClient struct {
-	applicationautoscalingiface.ApplicationAutoScalingAPI
-
-	scalableTargets map[string]mockScalableTarget
-	scalingPolicies map[string]mockScalingPolicy
-}
-
-type mockScalableTarget struct {
-	RoleARN     string
-	MinCapacity int64
-	MaxCapacity int64
-}
-
-type mockScalingPolicy struct {
-	ScaleInCooldown  int64
-	ScaleOutCooldown int64
-	TargetValue      float64
-}
-
-func newMockApplicationAutoScaling() *mockApplicationAutoScalingClient {
-	return &mockApplicationAutoScalingClient{
-		scalableTargets: map[string]mockScalableTarget{},
-		scalingPolicies: map[string]mockScalingPolicy{},
-	}
-}
-
-func (m *mockApplicationAutoScalingClient) RegisterScalableTarget(input *applicationautoscaling.RegisterScalableTargetInput) (*applicationautoscaling.RegisterScalableTargetOutput, error) {
-	m.scalableTargets[*input.ResourceId] = mockScalableTarget{
-		RoleARN:     *input.RoleARN,
-		MinCapacity: *input.MinCapacity,
-		MaxCapacity: *input.MaxCapacity,
-	}
-	return &applicationautoscaling.RegisterScalableTargetOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DeregisterScalableTarget(input *applicationautoscaling.DeregisterScalableTargetInput) (*applicationautoscaling.DeregisterScalableTargetOutput, error) {
-	delete(m.scalableTargets, *input.ResourceId)
-	return &applicationautoscaling.DeregisterScalableTargetOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DescribeScalableTargetsWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalableTargetsInput, options ...request.Option) (*applicationautoscaling.DescribeScalableTargetsOutput, error) {
-	scalableTarget, ok := m.scalableTargets[*input.ResourceIds[0]]
-	if !ok {
-		return &applicationautoscaling.DescribeScalableTargetsOutput{}, nil
-	}
-	return &applicationautoscaling.DescribeScalableTargetsOutput{
-		ScalableTargets: []*applicationautoscaling.ScalableTarget{
-			{
-				RoleARN:     aws.String(scalableTarget.RoleARN),
-				MinCapacity: aws.Int64(scalableTarget.MinCapacity),
-				MaxCapacity: aws.Int64(scalableTarget.MaxCapacity),
-			},
-		},
-	}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) PutScalingPolicy(input *applicationautoscaling.PutScalingPolicyInput) (*applicationautoscaling.PutScalingPolicyOutput, error) {
-	m.scalingPolicies[*input.ResourceId] = mockScalingPolicy{
-		ScaleInCooldown:  *input.TargetTrackingScalingPolicyConfiguration.ScaleInCooldown,
-		ScaleOutCooldown: *input.TargetTrackingScalingPolicyConfiguration.ScaleOutCooldown,
-		TargetValue:      *input.TargetTrackingScalingPolicyConfiguration.TargetValue,
-	}
-	return &applicationautoscaling.PutScalingPolicyOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DeleteScalingPolicy(input *applicationautoscaling.DeleteScalingPolicyInput) (*applicationautoscaling.DeleteScalingPolicyOutput, error) {
-	delete(m.scalingPolicies, *input.ResourceId)
-	return &applicationautoscaling.DeleteScalingPolicyOutput{}, nil
-}
-
-func (m *mockApplicationAutoScalingClient) DescribeScalingPoliciesWithContext(ctx aws.Context, input *applicationautoscaling.DescribeScalingPoliciesInput, options ...request.Option) (*applicationautoscaling.DescribeScalingPoliciesOutput, error) {
-	scalingPolicy, ok := m.scalingPolicies[*input.ResourceId]
-	if !ok {
-		return &applicationautoscaling.DescribeScalingPoliciesOutput{}, nil
-	}
-	return &applicationautoscaling.DescribeScalingPoliciesOutput{
-		ScalingPolicies: []*applicationautoscaling.ScalingPolicy{
-			{
-				TargetTrackingScalingPolicyConfiguration: &applicationautoscaling.TargetTrackingScalingPolicyConfiguration{
-					ScaleInCooldown:  aws.Int64(scalingPolicy.ScaleInCooldown),
-					ScaleOutCooldown: aws.Int64(scalingPolicy.ScaleOutCooldown),
-					TargetValue:      aws.Float64(scalingPolicy.TargetValue),
-				},
-			},
-		},
-	}, nil
-}
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 39e1e544905a0..c97b8fd1178dd 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -9,10 +9,114 @@ import (
 	"github.com/pkg/errors"
 	promV1 "github.com/prometheus/client_golang/api/prometheus/v1"
 	"github.com/prometheus/common/model"
+	"github.com/stretchr/testify/require"
+	"github.com/weaveworks/common/mtime"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
+const (
+	tablePrefix      = "cortex_"
+	chunkTablePrefix = "chunks_"
+	tablePeriod      = 7 * 24 * time.Hour
+	gracePeriod      = 15 * time.Minute
+	maxChunkAge      = 12 * time.Hour
+	inactiveWrite    = 1
+	inactiveRead     = 2
+	write            = 200
+	read             = 100
+)
+
+func fixtureWriteScale() chunk.AutoScalingConfig {
+	return chunk.AutoScalingConfig{
+		Enabled:     true,
+		MinCapacity: 100,
+		MaxCapacity: 250,
+		OutCooldown: 100,
+		InCooldown:  100,
+		TargetValue: 80.0,
+	}
+}
+
+func fixtureReadScale() chunk.AutoScalingConfig {
+	return chunk.AutoScalingConfig{
+		Enabled:     true,
+		MinCapacity: 1,
+		MaxCapacity: 2000,
+		OutCooldown: 100,
+		InCooldown:  100,
+		TargetValue: 80.0,
+	}
+}
+
+func fixturePeriodicTableConfig(prefix string) chunk.PeriodicTableConfig {
+	return chunk.PeriodicTableConfig{
+		Prefix: prefix,
+		Period: tablePeriod,
+	}
+}
+
+func fixtureProvisionConfig(inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
+	return chunk.ProvisionConfig{
+		ProvisionedWriteThroughput: write,
+		ProvisionedReadThroughput:  read,
+		InactiveWriteThroughput:    inactiveWrite,
+		InactiveReadThroughput:     inactiveRead,
+		WriteScale:                 writeScale,
+		InactiveWriteScale:         inactWriteScale,
+		InactiveWriteScaleLastN:    inactLastN,
+	}
+}
+
+func fixtureReadProvisionConfig(readScale, inactReadScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
+	return chunk.ProvisionConfig{
+		ProvisionedWriteThroughput: write,
+		ProvisionedReadThroughput:  read,
+		InactiveWriteThroughput:    inactiveWrite,
+		InactiveReadThroughput:     inactiveRead,
+		ReadScale:                  readScale,
+		InactiveReadScale:          inactReadScale,
+	}
+}
+
+func baseTable(name string, provisionedRead, provisionedWrite int64) []chunk.TableDesc {
+	return []chunk.TableDesc{
+		{
+			Name:             name,
+			ProvisionedRead:  provisionedRead,
+			ProvisionedWrite: provisionedWrite,
+		},
+	}
+}
+
+func staticTable(i int, indexRead, indexWrite, chunkRead, chunkWrite int64) []chunk.TableDesc {
+	return []chunk.TableDesc{
+		{
+			Name:             tablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  indexRead,
+			ProvisionedWrite: indexWrite,
+		},
+		{
+			Name:             chunkTablePrefix + fmt.Sprint(i),
+			ProvisionedRead:  chunkRead,
+			ProvisionedWrite: chunkWrite,
+		},
+	}
+}
+
+func test(t *testing.T, client dynamoTableClient, tableManager *chunk.TableManager, name string, tm time.Time, expected []chunk.TableDesc) {
+	t.Run(name, func(t *testing.T) {
+		ctx := context.Background()
+		mtime.NowForce(tm)
+		defer mtime.NowReset()
+		if err := tableManager.SyncTables(ctx); err != nil {
+			t.Fatal(err)
+		}
+		err := chunk.ExpectTables(ctx, client, expected)
+		require.NoError(t, err)
+	})
+}
+
 func TestTableManagerMetricsAutoScaling(t *testing.T) {
 	dynamoDB := newMockDynamoDB(0, 0)
 	mockProm := mockPrometheus{}

From e27b797ec959af080f638d79f1c01b8aa3f2f112 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Mon, 30 Mar 2020 18:43:43 +0200
Subject: [PATCH 489/660] Saner defaults for configs (#2344)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Saner defaults for configs

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Set spread_flushes to true in the code.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Make mockIngester implement HealthCheck and io.Closer so tests pass.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Verify that querier sees correct ring before using it.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

Co-authored-by: Tom Wilkie <tom@grafana.com>
Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 cache/memcached.go        | 2 +-
 cache/memcached_client.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index 3319110e97132..0b14180e11f48 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -50,7 +50,7 @@ type MemcachedConfig struct {
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *MemcachedConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
 	f.DurationVar(&cfg.Expiration, prefix+"memcached.expiration", 0, description+"How long keys stay in the memcache.")
-	f.IntVar(&cfg.BatchSize, prefix+"memcached.batchsize", 0, description+"How many keys to fetch in each batch.")
+	f.IntVar(&cfg.BatchSize, prefix+"memcached.batchsize", 1024, description+"How many keys to fetch in each batch.")
 	f.IntVar(&cfg.Parallelism, prefix+"memcached.parallelism", 100, description+"Maximum active requests to memcache.")
 }
 
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 131b60a4046c8..ed97f12b15ee1 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -75,7 +75,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.IntVar(&cfg.MaxIdleConns, prefix+"memcached.max-idle-conns", 16, description+"Maximum number of idle connections in pool.")
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
-	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", false, description+"Use consistent hashing to distribute to memcache servers.")
+	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list

From 3faf820c509712d2207d78422f795ebc79a8f5c9 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 31 Mar 2020 10:03:41 +0200
Subject: [PATCH 490/660] Rename rest of the dynamodb flags (#2359)

* Rename rest of the dynamodb flags

Fixes https://github.com/cortexproject/cortex/issues/2254

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 table_manager.go | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 8be8ebec2585d..7d188496f06b6 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -47,8 +47,8 @@ func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 
 	m.tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "cortex",
-		Name:      "dynamo_table_capacity_units",
-		Help:      "Per-table DynamoDB capacity, measured in DynamoDB capacity units.",
+		Name:      "table_capacity_units",
+		Help:      "Per-table capacity, measured in DynamoDB capacity units.",
 	}, []string{"op", "table"})
 
 	m.createFailures = prometheus.NewGauge(prometheus.GaugeOpts{
@@ -95,7 +95,7 @@ type TableManagerConfig struct {
 	RetentionPeriodModel model.Duration `yaml:"retention_period"`
 
 	// Period with which the table manager will poll for tables.
-	DynamoDBPollInterval time.Duration `yaml:"dynamodb_poll_interval"`
+	PollInterval time.Duration `yaml:"poll_interval"`
 
 	// duration a table will be created before it is needed.
 	CreationGracePeriod time.Duration `yaml:"creation_grace_period"`
@@ -139,12 +139,12 @@ func (cfg *TableManagerConfig) Validate() error {
 	return nil
 }
 
-// ProvisionConfig holds config for provisioning capacity (on DynamoDB)
+// ProvisionConfig holds config for provisioning capacity (on DynamoDB for now)
 type ProvisionConfig struct {
-	ProvisionedThroughputOnDemandMode bool  `yaml:"provisioned_throughput_on_demand_mode"`
+	ProvisionedThroughputOnDemandMode bool  `yaml:"enable_ondemand_throughput_mode"`
 	ProvisionedWriteThroughput        int64 `yaml:"provisioned_write_throughput"`
 	ProvisionedReadThroughput         int64 `yaml:"provisioned_read_throughput"`
-	InactiveThroughputOnDemandMode    bool  `yaml:"inactive_throughput_on_demand_mode"`
+	InactiveThroughputOnDemandMode    bool  `yaml:"enable_inactive_throughput_on_demand_mode"`
 	InactiveWriteThroughput           int64 `yaml:"inactive_write_throughput"`
 	InactiveReadThroughput            int64 `yaml:"inactive_read_throughput"`
 
@@ -161,21 +161,21 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.BoolVar(&cfg.RetentionDeletesEnabled, "table-manager.retention-deletes-enabled", false, "If true, enables retention deletes of DB tables")
 	f.Var(&cfg.RetentionPeriodModel, "table-manager.retention-period", "Tables older than this retention period are deleted. Note: This setting is destructive to data!(default: 0, which disables deletion)")
-	f.DurationVar(&cfg.DynamoDBPollInterval, "dynamodb.poll-interval", 2*time.Minute, "How frequently to poll DynamoDB to learn our capacity.")
-	f.DurationVar(&cfg.CreationGracePeriod, "dynamodb.periodic-table.grace-period", 10*time.Minute, "DynamoDB periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
+	f.DurationVar(&cfg.PollInterval, "table-manager.poll-interval", 2*time.Minute, "How frequently to poll backend to learn our capacity.")
+	f.DurationVar(&cfg.CreationGracePeriod, "table-manager.periodic-table.grace-period", 10*time.Minute, "Periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 
-	cfg.IndexTables.RegisterFlags("dynamodb.periodic-table", f)
-	cfg.ChunkTables.RegisterFlags("dynamodb.chunk-table", f)
+	cfg.IndexTables.RegisterFlags("table-manager.index-table", f)
+	cfg.ChunkTables.RegisterFlags("table-manager.chunk-table", f)
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 1000, "DynamoDB table default write throughput.")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "DynamoDB table default read throughput.")
-	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
-	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "DynamoDB table write throughput for inactive tables.")
-	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "DynamoDB table read throughput for inactive tables.")
-	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled")
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 1000, "Table default write throughput. Supported by DynamoDB")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "Table default read throughput. Supported by DynamoDB")
+	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
+	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "Table write throughput for inactive tables. Supported by DynamoDB")
+	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "Table read throughput for inactive tables. Supported by DynamoDB")
+	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
 
 	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
 	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
@@ -243,7 +243,7 @@ func (m *TableManager) stopping(_ error) error {
 }
 
 func (m *TableManager) loop(ctx context.Context) error {
-	ticker := time.NewTicker(m.cfg.DynamoDBPollInterval)
+	ticker := time.NewTicker(m.cfg.PollInterval)
 	defer ticker.Stop()
 
 	if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error {
@@ -254,7 +254,7 @@ func (m *TableManager) loop(ctx context.Context) error {
 
 	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
 	select {
-	case <-time.After(time.Duration(rand.Int63n(int64(m.cfg.DynamoDBPollInterval)))):
+	case <-time.After(time.Duration(rand.Int63n(int64(m.cfg.PollInterval)))):
 	case <-ctx.Done():
 		return nil
 	}

From 38b009c07f10d535d17f83f188e4be1e7d7b3456 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 31 Mar 2020 10:12:08 +0200
Subject: [PATCH 491/660] Print a warning when using an experimental feature
 (#2361)

* Print a warning when using an experimental feature

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 azure/blob_storage_client.go | 2 ++
 cache/fifo_cache.go          | 4 ++++
 cache/memcached_client.go    | 1 +
 cache/redis_cache.go         | 1 +
 cassandra/storage_client.go  | 3 +++
 purger/purger.go             | 2 ++
 6 files changed, 13 insertions(+)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 8372c6068611e..e38ca50e572bb 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -13,6 +13,7 @@ import (
 	"github.com/Azure/azure-storage-blob-go/azblob"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
@@ -63,6 +64,7 @@ type BlobStorage struct {
 
 // NewBlobStorage creates a new instance of the BlobStorage struct.
 func NewBlobStorage(cfg *BlobStorageConfig, delimiter string) (*BlobStorage, error) {
+	util.WarnExperimentalUse("Azure Blob Storage")
 	blobStorage := &BlobStorage{
 		cfg:       cfg,
 		delimiter: delimiter,
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index ef2c6a3a9a2dc..507def7418744 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -9,6 +9,8 @@ import (
 
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
 var (
@@ -113,6 +115,8 @@ type cacheEntry struct {
 // NewFifoCache returns a new initialised FifoCache of size.
 // TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
 func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
+	util.WarnExperimentalUse("In-memory (FIFO) cache")
+
 	cache := &FifoCache{
 		size:     cfg.Size,
 		validity: cfg.Validity,
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index ed97f12b15ee1..adcd25b9f5980 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -108,6 +108,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 	}
 
 	if len(cfg.Addresses) > 0 {
+		util.WarnExperimentalUse("DNS-based memcached service discovery")
 		newClient.addresses = strings.Split(cfg.Addresses, ",")
 	}
 
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index e8a9fc9134ffb..2325531d5060f 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -44,6 +44,7 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 
 // NewRedisCache creates a new RedisCache
 func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
+	util.WarnExperimentalUse("Redis cache")
 	// pool != nil only in unit tests
 	if pool == nil {
 		pool = &redis.Pool{
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 3c2e6f4071267..6a5307d6d160a 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -14,6 +14,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
+	pkgutil "github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
@@ -185,6 +186,8 @@ type StorageClient struct {
 
 // NewStorageClient returns a new StorageClient.
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
+	pkgutil.WarnExperimentalUse("Cassandra Backend")
+
 	session, err := cfg.session()
 	if err != nil {
 		return nil, errors.WithStack(err)
diff --git a/purger/purger.go b/purger/purger.go
index b969181f966c5..9441d42e23800 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -75,6 +75,8 @@ type DataPurger struct {
 
 // NewDataPurger creates a new DataPurger
 func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*DataPurger, error) {
+	util.WarnExperimentalUse("Delete series API")
+
 	dataPurger := DataPurger{
 		cfg:                 cfg,
 		deleteStore:         deleteStore,

From 65133a50980863ef3efe4218b17ef37c4d78f865 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 1 Apr 2020 14:29:02 +0530
Subject: [PATCH 492/660] add delete index support for dynamodb, cassandra and
 bigtable (#2363)

* add delete index support for dynamodb, cassandra and bigtable

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* renamed setMutation to addMutation in bigtable index client

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_storage_client.go | 10 ++++++++--
 cassandra/storage_client.go    | 16 ++++++++++++++--
 gcp/bigtable_index_client.go   | 19 +++++++++++++------
 3 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index a7b1e84f6df13..d4395038c4ad1 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -755,8 +755,14 @@ func (b dynamoDBWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 }
 
 func (b dynamoDBWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
-	// ToDo: implement this to support deleting index entries from DynamoDB
-	panic("DynamoDB does not support Deleting index entries yet")
+	b[tableName] = append(b[tableName], &dynamodb.WriteRequest{
+		DeleteRequest: &dynamodb.DeleteRequest{
+			Key: map[string]*dynamodb.AttributeValue{
+				hashKey:  {S: aws.String(hashValue)},
+				rangeKey: {B: rangeValue},
+			},
+		},
+	})
 }
 
 // Fill 'b' with WriteRequests from 'from' until 'b' has at most max requests. Remove those requests from 'from'.
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 6a5307d6d160a..5601820b876d4 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -210,6 +210,7 @@ func (s *StorageClient) Stop() {
 // atomic writes.  Therefore we just do a bunch of writes in parallel.
 type writeBatch struct {
 	entries []chunk.IndexEntry
+	deletes []chunk.IndexEntry
 }
 
 // NewWriteBatch implement chunk.IndexClient.
@@ -227,8 +228,11 @@ func (b *writeBatch) Add(tableName, hashValue string, rangeValue []byte, value [
 }
 
 func (b *writeBatch) Delete(tableName, hashValue string, rangeValue []byte) {
-	// ToDo: implement this to support deleting index entries from Cassandra
-	panic("Cassandra does not support Deleting index entries yet")
+	b.deletes = append(b.deletes, chunk.IndexEntry{
+		TableName:  tableName,
+		HashValue:  hashValue,
+		RangeValue: rangeValue,
+	})
 }
 
 // BatchWrite implement chunk.IndexClient.
@@ -243,6 +247,14 @@ func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 		}
 	}
 
+	for _, entry := range b.deletes {
+		err := s.session.Query(fmt.Sprintf("DELETE FROM %s WHERE hash = ? and range = ?",
+			entry.TableName), entry.HashValue, entry.RangeValue).WithContext(ctx).Exec()
+		if err != nil {
+			return errors.WithStack(err)
+		}
+	}
+
 	return nil
 }
 
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 39a7f44c8788d..bbaf6285b6305 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -151,6 +151,18 @@ type bigtableWriteBatch struct {
 }
 
 func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	b.addMutation(tableName, hashValue, rangeValue, func(mutation *bigtable.Mutation, columnKey string) {
+		mutation.Set(columnFamily, columnKey, 0, value)
+	})
+}
+
+func (b bigtableWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	b.addMutation(tableName, hashValue, rangeValue, func(mutation *bigtable.Mutation, columnKey string) {
+		mutation.DeleteCellsInColumn(columnFamily, columnKey)
+	})
+}
+
+func (b bigtableWriteBatch) addMutation(tableName, hashValue string, rangeValue []byte, callback func(mutation *bigtable.Mutation, columnKey string)) {
 	rows, ok := b.tables[tableName]
 	if !ok {
 		rows = map[string]*bigtable.Mutation{}
@@ -164,12 +176,7 @@ func (b bigtableWriteBatch) Add(tableName, hashValue string, rangeValue []byte,
 		rows[rowKey] = mutation
 	}
 
-	mutation.Set(columnFamily, columnKey, 0, value)
-}
-
-func (b bigtableWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
-	// ToDo: implement this to support deleting index entries from Bigtable
-	panic("Bigtable does not support Deleting index entries yet")
+	callback(mutation, columnKey)
 }
 
 func (s *storageClientColumnKey) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {

From 42c3b3c5c7ef9601a9504f6a3f0a2f545b336d65 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 1 Apr 2020 16:36:43 +0530
Subject: [PATCH 493/660] stop index client while stopping store (#2377)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk_store.go                  | 1 +
 storage/caching_index_client.go | 1 +
 2 files changed, 2 insertions(+)

diff --git a/chunk_store.go b/chunk_store.go
index ccd45b7395d82..9d4446ee9314b 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -102,6 +102,7 @@ func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client,
 func (c *store) Stop() {
 	c.storage.Stop()
 	c.Fetcher.Stop()
+	c.index.Stop()
 }
 
 // Put implements ChunkStore
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index f1863653b792a..7af46eea056d3 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -63,6 +63,7 @@ func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity tim
 
 func (s *cachingIndexClient) Stop() {
 	s.cache.Stop()
+	s.IndexClient.Stop()
 }
 
 func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {

From ed818d0b150da53457c19a4155398917893ac284 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Thu, 2 Apr 2020 23:56:48 -0700
Subject: [PATCH 494/660] FIFO cache to support eviction based on memory usage
 (#2319)

* FIFO cache to evict entries based on used memory size

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated ChangeLog

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make lint'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make check-doc'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* keeping the option to evict FIFO cache entries based on the count

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated tests

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* support both count- and memory-based eviction in FIFO cache
rename fifocache.size to fifocache.max_size_items

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make lint'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* do not create fifo cache if both 'max_size_bytes' and 'max_size_items' are set to zero

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use 'container/list' for doubly linked list

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* check fifo cache for nil pointer

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* allow both fifocache.max-size-bytes and fifocache.max-size-items to have non-zero values

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fifo cache to store []byte as a value

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* remove redundant Put() function

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* make sizeOf(*cacheEntry) more precise

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
---
 cache/cache.go                       |   5 +-
 cache/cache_test.go                  |   2 +-
 cache/fifo_cache.go                  | 254 ++++++++++-----------
 cache/fifo_cache_test.go             | 315 ++++++++++++++++++---------
 chunk_store_test.go                  |   2 +-
 storage/caching_fixtures.go          |   4 +-
 storage/caching_index_client_test.go |  10 +-
 7 files changed, 338 insertions(+), 254 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index f47be182cd469..a8e7c2c6c2fcc 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -68,8 +68,9 @@ func New(cfg Config) (Cache, error) {
 			cfg.Fifocache.Validity = cfg.DefaultValidity
 		}
 
-		cache := NewFifoCache(cfg.Prefix+"fifocache", cfg.Fifocache)
-		caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
+		if cache := NewFifoCache(cfg.Prefix+"fifocache", cfg.Fifocache); cache != nil {
+			caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
+		}
 	}
 
 	if (cfg.MemcacheClient.Host != "" || cfg.MemcacheClient.Addresses != "") && cfg.Redis.Endpoint != "" {
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 66b24d5a73635..36faa5fd4fb4b 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -172,7 +172,7 @@ func TestMemcache(t *testing.T) {
 }
 
 func TestFifoCache(t *testing.T) {
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 1e3, Validity: 1 * time.Hour})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 1e3, Validity: 1 * time.Hour})
 	testCache(t, cache)
 }
 
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 507def7418744..e8302862a8208 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -1,16 +1,19 @@
 package cache
 
 import (
+	"container/list"
 	"context"
 	"flag"
 	"sync"
 	"time"
 	"unsafe"
 
+	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 var (
@@ -71,29 +74,45 @@ var (
 	}, []string{"cache"})
 )
 
+const (
+	elementSize    = int(unsafe.Sizeof(list.Element{}))
+	elementPrtSize = int(unsafe.Sizeof(&list.Element{}))
+)
+
+// This FIFO cache implementation supports two eviction methods - based on number of items in the cache, and based on memory usage.
+// For the memory-based eviction, set FifoCacheConfig.MaxSizeBytes to a positive integer, indicating upper limit of memory allocated by items in the cache.
+// Alternatively, set FifoCacheConfig.MaxSizeItems to a positive integer, indicating maximum number of items in the cache.
+// If both parameters are set, both methods are enforced, whichever hits first.
+
 // FifoCacheConfig holds config for the FifoCache.
 type FifoCacheConfig struct {
-	Size     int           `yaml:"size"`
-	Validity time.Duration `yaml:"validity"`
+	MaxSizeBytes int           `yaml:"max_size_bytes"`
+	MaxSizeItems int           `yaml:"max_size_items"`
+	Validity     time.Duration `yaml:"validity"`
+
+	DeprecatedSize int `yaml:"size"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *FifoCacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	f.IntVar(&cfg.Size, prefix+"fifocache.size", 0, description+"The number of entries to cache.")
+	f.IntVar(&cfg.MaxSizeBytes, prefix+"fifocache.max-size-bytes", 0, description+"Maximum memory size of the cache.")
+	f.IntVar(&cfg.MaxSizeItems, prefix+"fifocache.max-size-items", 0, description+"Maximum number of entries in the cache.")
 	f.DurationVar(&cfg.Validity, prefix+"fifocache.duration", 0, description+"The expiry duration for the cache.")
+
+	f.IntVar(&cfg.DeprecatedSize, prefix+"fifocache.size", 0, "Deprecated (use max-size-items or max-size-bytes instead): "+description+"The number of entries to cache. ")
 }
 
 // FifoCache is a simple string -> interface{} cache which uses a fifo slide to
 // manage evictions.  O(1) inserts and updates, O(1) gets.
 type FifoCache struct {
-	lock     sync.RWMutex
-	size     int
-	validity time.Duration
-	entries  []cacheEntry
-	index    map[string]int
+	lock          sync.RWMutex
+	maxSizeItems  int
+	maxSizeBytes  int
+	currSizeBytes int
+	validity      time.Duration
 
-	// indexes into entries to identify the most recent and least recent entry.
-	first, last int
+	entries map[string]*list.Element
+	lru     *list.List
 
 	entriesAdded    prometheus.Counter
 	entriesAddedNew prometheus.Counter
@@ -106,10 +125,9 @@ type FifoCache struct {
 }
 
 type cacheEntry struct {
-	updated    time.Time
-	key        string
-	value      interface{}
-	prev, next int
+	updated time.Time
+	key     string
+	value   []byte
 }
 
 // NewFifoCache returns a new initialised FifoCache of size.
@@ -117,11 +135,22 @@ type cacheEntry struct {
 func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 	util.WarnExperimentalUse("In-memory (FIFO) cache")
 
-	cache := &FifoCache{
-		size:     cfg.Size,
-		validity: cfg.Validity,
-		entries:  make([]cacheEntry, 0, cfg.Size),
-		index:    make(map[string]int, cfg.Size),
+	if cfg.DeprecatedSize > 0 {
+		flagext.DeprecatedFlagsUsed.Inc()
+		level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag fifocache.size, use fifocache.max-size-items or fifocache.max-size-bytes instead", "cache", name)
+		cfg.MaxSizeItems = cfg.DeprecatedSize
+	}
+	if cfg.MaxSizeBytes == 0 && cfg.MaxSizeItems == 0 {
+		// zero cache capacity - no need to create cache
+		level.Warn(util.Logger).Log("msg", "neither fifocache.max-size-bytes nor fifocache.max-size-items is set", "cache", name)
+		return nil
+	}
+	return &FifoCache{
+		maxSizeItems: cfg.MaxSizeItems,
+		maxSizeBytes: cfg.MaxSizeBytes,
+		validity:     cfg.Validity,
+		entries:      make(map[string]*list.Element),
+		lru:          list.New(),
 
 		// TODO(bwplotka): There might be simple cache.Cache wrapper for those.
 		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
@@ -133,9 +162,6 @@ func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 		staleGets:       cacheStaleGets.WithLabelValues(name),
 		memoryBytes:     cacheMemoryBytes.WithLabelValues(name),
 	}
-	// set initial memory allocation
-	cache.memoryBytes.Set(float64(int(unsafe.Sizeof(cacheEntry{})) * cache.size))
-	return cache
 }
 
 // Fetch implements Cache.
@@ -149,125 +175,101 @@ func (c *FifoCache) Fetch(ctx context.Context, keys []string) (found []string, b
 		}
 
 		found = append(found, key)
-		bufs = append(bufs, val.([]byte))
+		bufs = append(bufs, val)
 	}
-
 	return
 }
 
 // Store implements Cache.
-func (c *FifoCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
-	values := make([]interface{}, 0, len(bufs))
-	for _, buf := range bufs {
-		values = append(values, buf)
-	}
-	c.Put(ctx, keys, values)
-}
-
-// Stop implements Cache.
-func (c *FifoCache) Stop() {
-}
-
-// Put stores the value against the key.
-func (c *FifoCache) Put(ctx context.Context, keys []string, values []interface{}) {
+func (c *FifoCache) Store(ctx context.Context, keys []string, values [][]byte) {
 	c.entriesAdded.Inc()
-	if c.size == 0 {
-		return
-	}
 
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
 	for i := range keys {
-		c.put(ctx, keys[i], values[i])
+		c.put(keys[i], values[i])
 	}
 }
 
-func (c *FifoCache) put(ctx context.Context, key string, value interface{}) {
-	// See if we already have the entry
-	index, ok := c.index[key]
-	if ok {
-		entry := c.entries[index]
-		deltaSize := sizeOf(value) - sizeOf(entry.value)
+// Stop implements Cache.
+func (c *FifoCache) Stop() {
+	c.lock.Lock()
+	defer c.lock.Unlock()
 
-		entry.updated = time.Now()
-		entry.value = value
+	c.entriesEvicted.Add(float64(c.lru.Len()))
 
-		// Remove this entry from the FIFO linked-list.
-		c.entries[entry.prev].next = entry.next
-		c.entries[entry.next].prev = entry.prev
+	c.entries = make(map[string]*list.Element)
+	c.lru.Init()
+	c.currSizeBytes = 0
 
-		// Corner case: updating last element
-		if c.last == index {
-			c.last = entry.prev
-		}
+	c.entriesCurrent.Set(float64(0))
+	c.memoryBytes.Set(float64(0))
+}
 
-		// Insert it at the beginning
-		entry.next = c.first
-		entry.prev = c.last
-		c.entries[entry.next].prev = index
-		c.entries[entry.prev].next = index
-		c.first = index
+func (c *FifoCache) put(key string, value []byte) {
+	// See if we already have the item in the cache.
+	element, ok := c.entries[key]
+	if ok {
+		// Remove the item from the cache.
+		entry := c.lru.Remove(element).(*cacheEntry)
+		delete(c.entries, key)
+		c.currSizeBytes -= sizeOf(entry)
+		c.entriesCurrent.Dec()
+	}
 
-		c.entries[index] = entry
-		c.memoryBytes.Add(float64(deltaSize))
+	entry := &cacheEntry{
+		updated: time.Now(),
+		key:     key,
+		value:   value,
+	}
+	entrySz := sizeOf(entry)
+
+	if c.maxSizeBytes > 0 && entrySz > c.maxSizeBytes {
+		// Cannot keep this item in the cache.
+		if ok {
+			// We do not replace this item.
+			c.entriesEvicted.Inc()
+		}
+		c.memoryBytes.Set(float64(c.currSizeBytes))
 		return
 	}
-	c.entriesAddedNew.Inc()
 
-	// Otherwise, see if we need to evict an entry.
-	if len(c.entries) >= c.size {
+	// Otherwise, see if we need to evict item(s).
+	for (c.maxSizeBytes > 0 && c.currSizeBytes+entrySz > c.maxSizeBytes) || (c.maxSizeItems > 0 && len(c.entries) >= c.maxSizeItems) {
+		lastElement := c.lru.Back()
+		if lastElement == nil {
+			break
+		}
+		evicted := c.lru.Remove(lastElement).(*cacheEntry)
+		delete(c.entries, evicted.key)
+		c.currSizeBytes -= sizeOf(evicted)
+		c.entriesCurrent.Dec()
 		c.entriesEvicted.Inc()
-		index = c.last
-		entry := c.entries[index]
-		deltaSize := sizeOf(key) + sizeOf(value) - sizeOf(entry.key) - sizeOf(entry.value)
-
-		c.last = entry.prev
-		c.first = index
-		delete(c.index, entry.key)
-		c.index[key] = index
-
-		entry.updated = time.Now()
-		entry.value = value
-		entry.key = key
-		c.entries[index] = entry
-		c.memoryBytes.Add(float64(deltaSize))
-		return
 	}
 
-	// Finally, no hit and we have space.
-	index = len(c.entries)
-	c.entries = append(c.entries, cacheEntry{
-		updated: time.Now(),
-		key:     key,
-		value:   value,
-		prev:    c.last,
-		next:    c.first,
-	})
-	c.entries[c.first].prev = index
-	c.entries[c.last].next = index
-	c.first = index
-	c.index[key] = index
-
-	c.memoryBytes.Add(float64(sizeOf(key) + sizeOf(value)))
+	// Finally, we have space to add the item.
+	c.entries[key] = c.lru.PushFront(entry)
+	c.currSizeBytes += entrySz
+	if !ok {
+		c.entriesAddedNew.Inc()
+	}
 	c.entriesCurrent.Inc()
+	c.memoryBytes.Set(float64(c.currSizeBytes))
 }
 
 // Get returns the stored value against the key and when the key was last updated.
-func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
+func (c *FifoCache) Get(ctx context.Context, key string) ([]byte, bool) {
 	c.totalGets.Inc()
-	if c.size == 0 {
-		return nil, false
-	}
 
 	c.lock.RLock()
 	defer c.lock.RUnlock()
 
-	index, ok := c.index[key]
+	element, ok := c.entries[key]
 	if ok {
-		updated := c.entries[index].updated
-		if c.validity == 0 || time.Since(updated) < c.validity {
-			return c.entries[index].value, true
+		entry := element.Value.(*cacheEntry)
+		if c.validity == 0 || time.Since(entry.updated) < c.validity {
+			return entry.value, true
 		}
 
 		c.totalMisses.Inc()
@@ -279,38 +281,10 @@ func (c *FifoCache) Get(ctx context.Context, key string) (interface{}, bool) {
 	return nil, false
 }
 
-func sizeOf(i interface{}) int {
-	switch v := i.(type) {
-	case string:
-		return len(v)
-	case []int8:
-		return len(v)
-	case []uint8:
-		return len(v)
-	case []int32:
-		return len(v) * 4
-	case []uint32:
-		return len(v) * 4
-	case []float32:
-		return len(v) * 4
-	case []int64:
-		return len(v) * 8
-	case []uint64:
-		return len(v) * 8
-	case []float64:
-		return len(v) * 8
-	// next 2 cases are machine dependent
-	case []int:
-		if l := len(v); l > 0 {
-			return int(unsafe.Sizeof(v[0])) * l
-		}
-		return 0
-	case []uint:
-		if l := len(v); l > 0 {
-			return int(unsafe.Sizeof(v[0])) * l
-		}
-		return 0
-	default:
-		return int(unsafe.Sizeof(i))
-	}
+func sizeOf(item *cacheEntry) int {
+	return int(unsafe.Sizeof(*item)) + // size of cacheEntry
+		len(item.key) + // size of key
+		cap(item.value) + // size of value
+		elementSize + // size of the element in linked list
+		elementPrtSize // size of the pointer to an element in the map
 }
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index 718df84906627..c03d41aa7d303 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -2,128 +2,237 @@ package cache
 
 import (
 	"context"
-	"strconv"
+	"fmt"
 	"testing"
 	"time"
-	"unsafe"
 
 	"github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
-const size = 10
-const overwrite = 5
-
-func TestFifoCache(t *testing.T) {
-	c := NewFifoCache("test1", FifoCacheConfig{Size: size, Validity: 1 * time.Minute})
-	ctx := context.Background()
-
-	// Check put / get works
-	keys := []string{}
-	values := []interface{}{}
-	for i := 0; i < size; i++ {
-		keys = append(keys, strconv.Itoa(i))
-		values = append(values, i)
+func TestFifoCacheEviction(t *testing.T) {
+	const (
+		cnt     = 10
+		evicted = 5
+	)
+	itemTemplate := &cacheEntry{
+		key:   "00",
+		value: []byte("00"),
 	}
-	c.Put(ctx, keys, values)
-	require.Len(t, c.index, size)
-	require.Len(t, c.entries, size)
 
-	for i := 0; i < size; i++ {
-		value, ok := c.Get(ctx, strconv.Itoa(i))
-		require.True(t, ok)
-		require.Equal(t, i, value.(int))
+	tests := []struct {
+		name string
+		cfg  FifoCacheConfig
+	}{
+		{
+			name: "test-memory-eviction",
+			cfg:  FifoCacheConfig{MaxSizeBytes: cnt * sizeOf(itemTemplate), Validity: 1 * time.Minute},
+		},
+		{
+			name: "test-items-eviction",
+			cfg:  FifoCacheConfig{MaxSizeItems: cnt, Validity: 1 * time.Minute},
+		},
 	}
 
-	// Check evictions
-	keys = []string{}
-	values = []interface{}{}
-	for i := size; i < size+overwrite; i++ {
-		keys = append(keys, strconv.Itoa(i))
-		values = append(values, i)
+	for _, test := range tests {
+		c := NewFifoCache(test.name, test.cfg)
+		ctx := context.Background()
+
+		// Check put / get works
+		keys := []string{}
+		values := [][]byte{}
+		for i := 0; i < cnt; i++ {
+			key := fmt.Sprintf("%02d", i)
+			value := make([]byte, len(key))
+			copy(value, key)
+			keys = append(keys, key)
+			values = append(values, value)
+		}
+		c.Store(ctx, keys, values)
+		require.Len(t, c.entries, cnt)
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(cnt*sizeOf(itemTemplate)))
+
+		for i := 0; i < cnt; i++ {
+			key := fmt.Sprintf("%02d", i)
+			value, ok := c.Get(ctx, key)
+			require.True(t, ok)
+			require.Equal(t, []byte(key), value)
+		}
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(cnt*sizeOf(itemTemplate)))
+
+		// Check evictions
+		keys = []string{}
+		values = [][]byte{}
+		for i := cnt - evicted; i < cnt+evicted; i++ {
+			key := fmt.Sprintf("%02d", i)
+			value := make([]byte, len(key))
+			copy(value, key)
+			keys = append(keys, key)
+			values = append(values, value)
+		}
+		c.Store(ctx, keys, values)
+		require.Len(t, c.entries, cnt)
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(cnt+evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(cnt*sizeOf(itemTemplate)))
+
+		for i := 0; i < cnt-evicted; i++ {
+			_, ok := c.Get(ctx, fmt.Sprintf("%02d", i))
+			require.False(t, ok)
+		}
+		for i := cnt - evicted; i < cnt+evicted; i++ {
+			key := fmt.Sprintf("%02d", i)
+			value, ok := c.Get(ctx, key)
+			require.True(t, ok)
+			require.Equal(t, []byte(key), value)
+		}
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(cnt+evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(cnt*2+evicted))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(cnt-evicted))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(cnt*sizeOf(itemTemplate)))
+
+		// Check updates work
+		keys = []string{}
+		values = [][]byte{}
+		for i := cnt; i < cnt+evicted; i++ {
+			keys = append(keys, fmt.Sprintf("%02d", i))
+			vstr := fmt.Sprintf("%02d", i*2)
+			value := make([]byte, len(vstr))
+			copy(value, vstr)
+			values = append(values, value)
+		}
+		c.Store(ctx, keys, values)
+		require.Len(t, c.entries, cnt)
+
+		for i := cnt; i < cnt+evicted; i++ {
+			value, ok := c.Get(ctx, fmt.Sprintf("%02d", i))
+			require.True(t, ok)
+			require.Equal(t, []byte(fmt.Sprintf("%02d", i*2)), value)
+		}
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(3))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(cnt+evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(evicted))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(cnt))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(cnt*2+evicted*2))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(cnt-evicted))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(cnt*sizeOf(itemTemplate)))
+
+		c.Stop()
 	}
-	c.Put(ctx, keys, values)
-	require.Len(t, c.index, size)
-	require.Len(t, c.entries, size)
+}
 
-	for i := 0; i < size-overwrite; i++ {
-		_, ok := c.Get(ctx, strconv.Itoa(i))
-		require.False(t, ok)
-	}
-	for i := size; i < size+overwrite; i++ {
-		value, ok := c.Get(ctx, strconv.Itoa(i))
-		require.True(t, ok)
-		require.Equal(t, i, value.(int))
+func TestFifoCacheExpiry(t *testing.T) {
+	key1, key2, key3, key4 := "01", "02", "03", "04"
+	data1, data2, data3 := genBytes(24), []byte("testdata"), genBytes(8)
+
+	memorySz := sizeOf(&cacheEntry{key: key1, value: data1}) +
+		sizeOf(&cacheEntry{key: key2, value: data2}) +
+		sizeOf(&cacheEntry{key: key3, value: data3})
+
+	tests := []struct {
+		name string
+		cfg  FifoCacheConfig
+	}{
+		{
+			name: "test-memory-expiry",
+			cfg:  FifoCacheConfig{MaxSizeBytes: memorySz, Validity: 5 * time.Millisecond},
+		},
+		{
+			name: "test-items-expiry",
+			cfg:  FifoCacheConfig{MaxSizeItems: 3, Validity: 5 * time.Millisecond},
+		},
 	}
 
-	// Check updates work
-	keys = []string{}
-	values = []interface{}{}
-	for i := size; i < size+overwrite; i++ {
-		keys = append(keys, strconv.Itoa(i))
-		values = append(values, i*2)
-	}
-	c.Put(ctx, keys, values)
-	require.Len(t, c.index, size)
-	require.Len(t, c.entries, size)
+	for _, test := range tests {
+		c := NewFifoCache(test.name, test.cfg)
+		ctx := context.Background()
 
-	for i := size; i < size+overwrite; i++ {
-		value, ok := c.Get(ctx, strconv.Itoa(i))
+		c.Store(ctx,
+			[]string{key1, key2, key4, key3, key2, key1},
+			[][]byte{genBytes(16), []byte("dummy"), genBytes(20), data3, data2, data1})
+
+		value, ok := c.Get(ctx, key1)
 		require.True(t, ok)
-		require.Equal(t, i*2, value.(int))
+		require.Equal(t, data1, value)
+
+		_, ok = c.Get(ctx, key4)
+		require.False(t, ok)
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
+
+		// Expire the item.
+		time.Sleep(5 * time.Millisecond)
+		_, ok = c.Get(ctx, key1)
+		require.False(t, ok)
+
+		assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
+		assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(len(c.entries)))
+		assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(c.lru.Len()))
+		assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(3))
+		assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(2))
+		assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(1))
+		assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
+
+		c.Stop()
 	}
 }
 
-func TestFifoCacheEvictionExpiry(t *testing.T) {
-	c := NewFifoCache("test2", FifoCacheConfig{Size: 3, Validity: 5 * time.Millisecond})
-	ctx := context.Background()
-	memorySz := int(unsafe.Sizeof(cacheEntry{})) * 3
-
-	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
-
-	key1, key2, key3, key4 := "key1", "key23", "key345", "key45676789"
-	data1, data2, data3 := []float64{1.0, 2.0, 3.0}, "testdata", []byte{1, 2, 3, 4, 5, 6, 7, 8}
-	memorySz += len(key1) + len(key2) + len(key3) + len(data1)*8 + len(data2) + len(data3)
-
-	c.Put(ctx,
-		[]string{key1, key2, key4, key3, key2, key1},
-		[]interface{}{[]int32{1, 2, 3, 4}, "dummy", []int{5, 4, 3, 2, 1}, data3, data2, data1})
-
-	value, ok := c.Get(ctx, key1)
-	require.True(t, ok)
-	require.Equal(t, data1, value.([]float64))
-
-	_, ok = c.Get(ctx, key4)
-	require.False(t, ok)
-
-	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
-	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
-	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
-	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
-	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(2))
-	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(1))
-	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(0))
-	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
-
-	// Expire the entry.
-	time.Sleep(5 * time.Millisecond)
-	_, ok = c.Get(ctx, key1)
-	require.False(t, ok)
-
-	assert.Equal(t, testutil.ToFloat64(c.entriesAdded), float64(1))
-	assert.Equal(t, testutil.ToFloat64(c.entriesAddedNew), float64(5))
-	assert.Equal(t, testutil.ToFloat64(c.entriesEvicted), float64(2))
-	assert.Equal(t, testutil.ToFloat64(c.entriesCurrent), float64(3))
-	assert.Equal(t, testutil.ToFloat64(c.totalGets), float64(3))
-	assert.Equal(t, testutil.ToFloat64(c.totalMisses), float64(2))
-	assert.Equal(t, testutil.ToFloat64(c.staleGets), float64(1))
-	assert.Equal(t, testutil.ToFloat64(c.memoryBytes), float64(memorySz))
+func genBytes(n uint8) []byte {
+	arr := make([]byte, n)
+	for i := range arr {
+		arr[i] = byte(i)
+	}
+	return arr
 }
diff --git a/chunk_store_test.go b/chunk_store_test.go
index b0963de2bbd44..3aa06a059b2d9 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -47,7 +47,7 @@ var stores = []struct {
 			var storeCfg StoreConfig
 			flagext.DefaultValues(&storeCfg)
 			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
-				Size: 500,
+				MaxSizeItems: 500,
 			})
 			return storeCfg
 		},
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index bf08a64ab8f7f..fc94907a14475 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -25,8 +25,8 @@ func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient,
 	}
 	indexClient, chunkClient, tableClient, schemaConfig, err := f.fixture.Clients()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
-		Size:     500,
-		Validity: 5 * time.Minute,
+		MaxSizeItems: 500,
+		Validity:     5 * time.Minute,
 	}), 5*time.Minute, limits)
 	return indexClient, chunkClient, tableClient, schemaConfig, err
 }
diff --git a/storage/caching_index_client_test.go b/storage/caching_index_client_test.go
index 3cbee18e3a308..3b01e0688d852 100644
--- a/storage/caching_index_client_test.go
+++ b/storage/caching_index_client_test.go
@@ -40,7 +40,7 @@ func TestCachingStorageClientBasic(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
 	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
@@ -71,7 +71,7 @@ func TestTempCachingStorageClient(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
 	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
@@ -129,7 +129,7 @@ func TestPermCachingStorageClient(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
 	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", Immutable: true},
@@ -180,7 +180,7 @@ func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	store := &mockStore{}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
 	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
 	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
@@ -218,7 +218,7 @@ func TestCachingStorageClientCollision(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{Size: 10, Validity: 10 * time.Second})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
 	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},

From c4c5d402aacfd065410f12c9f136fe7844e84b2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Joshua=20M=C3=BChlfort?= <muehlfort@gonicus.de>
Date: Fri, 3 Apr 2020 18:09:19 +0200
Subject: [PATCH 495/660] Cassandra: Fix TLS host verification (#2109)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix Cassandra TLS host verification

Refs #2007

Signed-off-by: Joshua Mühlfort <muehlfort@gonicus.de>

* Add changelog entry for Cassandra TLS verification fix

Signed-off-by: Joshua Mühlfort <muehlfort@gonicus.de>

* Move check for number of cassandra hosts to cfg.Validate()

Signed-off-by: Joshua Mühlfort <muehlfort@gonicus.de>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 cassandra/storage_client.go | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 5601820b876d4..ce2b662cd9fc0 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -3,6 +3,7 @@ package cassandra
 import (
 	"bytes"
 	"context"
+	"crypto/tls"
 	"flag"
 	"fmt"
 	"io/ioutil"
@@ -68,6 +69,9 @@ func (cfg *Config) Validate() error {
 	if cfg.Password.Value != "" && cfg.PasswordFile != "" {
 		return errors.Errorf("The password and password_file config options are mutually exclusive.")
 	}
+	if cfg.SSL && cfg.HostVerification && len(strings.Split(cfg.Addresses, ",")) != 1 {
+		return errors.Errorf("Host verification is only possible for a single host.")
+	}
 	return nil
 }
 
@@ -118,9 +122,18 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
 	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
 
 	if cfg.SSL {
-		cluster.SslOpts = &gocql.SslOptions{
-			CaPath:                 cfg.CAPath,
-			EnableHostVerification: cfg.HostVerification,
+		if cfg.HostVerification {
+			cluster.SslOpts = &gocql.SslOptions{
+				CaPath:                 cfg.CAPath,
+				EnableHostVerification: true,
+				Config: &tls.Config{
+					ServerName: strings.Split(cfg.Addresses, ",")[0],
+				},
+			}
+		} else {
+			cluster.SslOpts = &gocql.SslOptions{
+				EnableHostVerification: false,
+			}
 		}
 	}
 	if cfg.Auth {

From 9b34a8abacdba6aaf467b708c6c29e6aa218651e Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 6 Apr 2020 16:54:14 +0200
Subject: [PATCH 496/660] Generalize client pool (#2394)

* Moved gRPC client pool files

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Generalise ring clients pool

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Renamed serviceName into clientName in the ring pool

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Moved ingester clients pool to the distributor pkg

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed package

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 storage/factory.go      | 5 -----
 storage/factory_test.go | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index ef74d02ebf9d2..3ae95173da69a 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -36,11 +36,6 @@ func RegisterIndexClient(name string, factory IndexClientFactoryFunc) {
 	customIndexClients[name] = factory
 }
 
-// useful for cleaning up state after tests
-func unregisterAllCustomIndexClients() {
-	customIndexClients = map[string]IndexClientFactoryFunc{}
-}
-
 // StoreLimits helps get Limits specific to Queries for Stores
 type StoreLimits interface {
 	CardinalityLimit(userID string) int
diff --git a/storage/factory_test.go b/storage/factory_test.go
index fd635cf270d4d..1135300966eae 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -117,3 +117,8 @@ func TestCustomIndexClient(t *testing.T) {
 	_, ok = indexClient.(*local.BoltIndexClient)
 	require.Equal(t, true, ok)
 }
+
+// useful for cleaning up state after tests
+func unregisterAllCustomIndexClients() {
+	customIndexClients = map[string]IndexClientFactoryFunc{}
+}

From 728ca9ab463feb1c5a217ab711da93b486cc4103 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 7 Apr 2020 11:39:19 +0530
Subject: [PATCH 497/660] add support for creating more ObjectClient types and
 flush file immediately for filesystem type (#2390)

* add support for more ObjectClient types and flush file immediately for filesystem type

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* checking for error while closing file in FSObjectClient.PutObject

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/fs_object_client.go | 14 ++++++++++++--
 storage/factory.go        |  8 +++++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index a10bd297648ab..40b081c78052d 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/thanos-io/thanos/pkg/runutil"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
@@ -74,10 +75,19 @@ func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object
 		return err
 	}
 
-	defer fl.Close()
+	defer runutil.CloseWithLogOnErr(pkgUtil.Logger, fl, "fullPath: %s", fullPath)
 
 	_, err = io.Copy(fl, object)
-	return err
+	if err != nil {
+		return err
+	}
+
+	err = fl.Sync()
+	if err != nil {
+		return err
+	}
+
+	return fl.Close()
 }
 
 // List only objects from the store non-recursively
diff --git a/storage/factory.go b/storage/factory.go
index 3ae95173da69a..6fe6594915b3b 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -260,11 +260,17 @@ func NewBucketClient(storageConfig Config) (chunk.BucketClient, error) {
 // NewObjectClient makes a new StorageClient of the desired types.
 func NewObjectClient(name string, cfg Config) (chunk.ObjectClient, error) {
 	switch name {
+	case "aws", "s3":
+		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config, chunk.DirDelim)
+	case "gcs":
+		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim)
+	case "azure":
+		return azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim)
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "filesystem":
 		return local.NewFSObjectClient(cfg.FSConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: filesystem", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, s3, gcs, azure, filesystem", name)
 	}
 }

From 81746a8f97521846d72731b5ed18a60ca2803a69 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 8 Apr 2020 16:41:50 +0530
Subject: [PATCH 498/660] support for registering custom table client with
 index client (#2408)

* support for registering custom table client with index client

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* renamed some functions to look better

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix broken test

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_table_client.go |  14 ++--
 storage/factory.go           |  29 ++++++--
 storage/factory_test.go      | 124 ++++++++++++++++++++++-------------
 3 files changed, 110 insertions(+), 57 deletions(-)

diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index 4c3659a429e90..b85012f656887 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -8,16 +8,16 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
-type tableClient struct {
+type TableClient struct {
 	directory string
 }
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(directory string) (chunk.TableClient, error) {
-	return &tableClient{directory: directory}, nil
+	return &TableClient{directory: directory}, nil
 }
 
-func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
+func (c *TableClient) ListTables(ctx context.Context) ([]string, error) {
 	boltDbFiles := []string{}
 	err := filepath.Walk(c.directory, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
@@ -35,20 +35,20 @@ func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 	return boltDbFiles, nil
 }
 
-func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
+func (c *TableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	return nil
 }
 
-func (c *tableClient) DeleteTable(ctx context.Context, name string) error {
+func (c *TableClient) DeleteTable(ctx context.Context, name string) error {
 	return os.Remove(filepath.Join(c.directory, name))
 }
 
-func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
+func (c *TableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	return chunk.TableDesc{
 		Name: name,
 	}, true, nil
 }
 
-func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
+func (c *TableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	return nil
 }
diff --git a/storage/factory.go b/storage/factory.go
index 6fe6594915b3b..436f6e92ba432 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -28,12 +28,23 @@ const (
 	StorageEngineTSDB   = "tsdb"
 )
 
+type indexStoreFactories struct {
+	indexClientFactoryFunc IndexClientFactoryFunc
+	tableClientFactoryFunc TableClientFactoryFunc
+}
+
+// IndexClientFactoryFunc defines signature of function which creates chunk.IndexClient for managing index in index store
 type IndexClientFactoryFunc func() (chunk.IndexClient, error)
 
-var customIndexClients = map[string]IndexClientFactoryFunc{}
+// TableClientFactoryFunc defines signature of function which creates chunk.TableClient for managing tables in index store
+type TableClientFactoryFunc func() (chunk.TableClient, error)
 
-func RegisterIndexClient(name string, factory IndexClientFactoryFunc) {
-	customIndexClients[name] = factory
+var customIndexStores = map[string]indexStoreFactories{}
+
+// RegisterIndexStore is used for registering a custom index type.
+// When an index type is registered here with same name as existing types, the registered one takes the precedence.
+func RegisterIndexStore(name string, indexClientFactory IndexClientFactoryFunc, tableClientFactory TableClientFactoryFunc) {
+	customIndexStores[name] = indexStoreFactories{indexClientFactory, tableClientFactory}
 }
 
 // StoreLimits helps get Limits specific to Queries for Stores
@@ -146,8 +157,10 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 
 // NewIndexClient makes a new index client of the desired type.
 func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	if factory, ok := customIndexClients[name]; ok {
-		return factory()
+	if indexClientFactory, ok := customIndexStores[name]; ok {
+		if indexClientFactory.indexClientFactoryFunc != nil {
+			return indexClientFactory.indexClientFactoryFunc()
+		}
 	}
 
 	switch name {
@@ -225,6 +238,12 @@ func newChunkClientFromStore(store chunk.ObjectClient, err error) (chunk.Client,
 
 // NewTableClient makes a new table client based on the configuration.
 func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
+	if indexClientFactory, ok := customIndexStores[name]; ok {
+		if indexClientFactory.tableClientFactoryFunc != nil {
+			return indexClientFactory.tableClientFactoryFunc()
+		}
+	}
+
 	switch name {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 1135300966eae..4ed3509edf502 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -3,6 +3,7 @@ package storage
 import (
 	"io/ioutil"
 	"os"
+	"reflect"
 	"testing"
 
 	"github.com/prometheus/common/model"
@@ -55,17 +56,17 @@ func newBoltDBCustomIndexClient(cfg local.BoltDBConfig) (chunk.IndexClient, erro
 	return &customBoltDBIndexClient{boltdbClient}, nil
 }
 
-type anotherIndexClient struct {
-	*local.BoltIndexClient
+type customBoltDBTableClient struct {
+	chunk.TableClient
 }
 
-func newAnotherIndexClient(cfg local.BoltDBConfig) (chunk.IndexClient, error) {
-	boltdbClient, err := local.NewBoltDBIndexClient(cfg)
+func newBoltDBCustomTableClient(directory string) (chunk.TableClient, error) {
+	tableClient, err := local.NewTableClient(directory)
 	if err != nil {
 		return nil, err
 	}
 
-	return &anotherIndexClient{boltdbClient}, nil
+	return &customBoltDBTableClient{tableClient}, nil
 }
 
 func TestCustomIndexClient(t *testing.T) {
@@ -78,47 +79,80 @@ func TestCustomIndexClient(t *testing.T) {
 	}
 	cfg.BoltDBConfig.Directory = dirname
 
-	// register custom index clients, overwriting boltdb client and a new one with different name
-	RegisterIndexClient("boltdb", func() (client chunk.IndexClient, e error) {
-		return newBoltDBCustomIndexClient(cfg.BoltDBConfig)
-	})
-
-	RegisterIndexClient("another-index-client", func() (client chunk.IndexClient, e error) {
-		return newAnotherIndexClient(cfg.BoltDBConfig)
-	})
-
-	// try creating a new index client for boltdb
-	indexClient, err := NewIndexClient("boltdb", cfg, schemaCfg)
-	require.NoError(t, err)
-
-	// check whether we got custom boltdb index client type registered above
-	_, ok := indexClient.(*customBoltDBIndexClient)
-	require.Equal(t, true, ok)
-
-	// check whether non-existent index client returns an error
-	_, err = NewIndexClient("boltdb1", cfg, schemaCfg)
-	require.Error(t, err)
-
-	// try creating a new index client for another index client
-	indexClient, err = NewIndexClient("another-index-client", cfg, schemaCfg)
-	require.NoError(t, err)
-
-	// check whether we got another index client type registered above
-	_, ok = indexClient.(*anotherIndexClient)
-	require.Equal(t, true, ok)
-
-	unregisterAllCustomIndexClients()
-
-	// try creating a new index client for boltdb
-	indexClient, err = NewIndexClient("boltdb", cfg, schemaCfg)
-	require.NoError(t, err)
-
-	// check whether we got original boltdb index client
-	_, ok = indexClient.(*local.BoltIndexClient)
-	require.Equal(t, true, ok)
+	for _, tc := range []struct {
+		indexClientName         string
+		indexClientFactories    indexStoreFactories
+		errorExpected           bool
+		expectedIndexClientType reflect.Type
+		expectedTableClientType reflect.Type
+	}{
+		{
+			indexClientName:         "boltdb",
+			expectedIndexClientType: reflect.TypeOf(&local.BoltIndexClient{}),
+			expectedTableClientType: reflect.TypeOf(&local.TableClient{}),
+		},
+		{
+			indexClientName: "boltdb",
+			indexClientFactories: indexStoreFactories{
+				indexClientFactoryFunc: func() (client chunk.IndexClient, e error) {
+					return newBoltDBCustomIndexClient(cfg.BoltDBConfig)
+				},
+			},
+			expectedIndexClientType: reflect.TypeOf(&customBoltDBIndexClient{}),
+			expectedTableClientType: reflect.TypeOf(&local.TableClient{}),
+		},
+		{
+			indexClientName: "boltdb",
+			indexClientFactories: indexStoreFactories{
+				tableClientFactoryFunc: func() (client chunk.TableClient, e error) {
+					return newBoltDBCustomTableClient(cfg.BoltDBConfig.Directory)
+				},
+			},
+			expectedIndexClientType: reflect.TypeOf(&local.BoltIndexClient{}),
+			expectedTableClientType: reflect.TypeOf(&customBoltDBTableClient{}),
+		},
+		{
+			indexClientName: "boltdb",
+			indexClientFactories: indexStoreFactories{
+				indexClientFactoryFunc: func() (client chunk.IndexClient, e error) {
+					return newBoltDBCustomIndexClient(cfg.BoltDBConfig)
+				},
+				tableClientFactoryFunc: func() (client chunk.TableClient, e error) {
+					return newBoltDBCustomTableClient(cfg.BoltDBConfig.Directory)
+				},
+			},
+			expectedIndexClientType: reflect.TypeOf(&customBoltDBIndexClient{}),
+			expectedTableClientType: reflect.TypeOf(&customBoltDBTableClient{}),
+		},
+		{
+			indexClientName: "boltdb1",
+			errorExpected:   true,
+		},
+	} {
+		if tc.indexClientFactories.indexClientFactoryFunc != nil || tc.indexClientFactories.tableClientFactoryFunc != nil {
+			RegisterIndexStore(tc.indexClientName, tc.indexClientFactories.indexClientFactoryFunc, tc.indexClientFactories.tableClientFactoryFunc)
+		}
+
+		indexClient, err := NewIndexClient(tc.indexClientName, cfg, schemaCfg)
+		if tc.errorExpected {
+			require.Error(t, err)
+		} else {
+			require.NoError(t, err)
+			require.Equal(t, tc.expectedIndexClientType, reflect.TypeOf(indexClient))
+		}
+
+		tableClient, err := NewTableClient(tc.indexClientName, cfg)
+		if tc.errorExpected {
+			require.Error(t, err)
+		} else {
+			require.NoError(t, err)
+			require.Equal(t, tc.expectedTableClientType, reflect.TypeOf(tableClient))
+		}
+		unregisterAllCustomIndexStores()
+	}
 }
 
 // useful for cleaning up state after tests
-func unregisterAllCustomIndexClients() {
-	customIndexClients = map[string]IndexClientFactoryFunc{}
+func unregisterAllCustomIndexStores() {
+	customIndexStores = map[string]indexStoreFactories{}
 }

From a06352bb9277ea69e6fced77136aebebd7c60bff Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Wed, 8 Apr 2020 14:29:06 -0400
Subject: [PATCH 499/660] API Implementation (#2372)

* initial v1 api routes except AM

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* all modules registered using API

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update vendor directory and remove unused handlers

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* disable single binary alertmanager & ruler

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add logging to api package

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove unused auth setup in initAPI

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* revert gorilla mux to current version

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove unneeded vendored import

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* revert go.sum

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove unused field

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update docs

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* ensure legacy prefix is propagated to API module

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* goimports for handlers.go

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* ensure prom wrapped with fakeAddr

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix prefix for promrouter

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* use single util function for route registration

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* ensure correct methods are used for prometheus API calls

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove unused prometheuRouter

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* Update pkg/api/api.go

Co-Authored-By: Marco Pracucci <marco@pracucci.com>
Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* address straight forward PR feedback

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* remove ingester specific health check

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix import error introduced in rebase

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix alertmanager routing and update docs

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix typo

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update docs to fix typo

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add bugfix to changelog

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* add http prefix config to changelog

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* clarify changelog

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/request_handler.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/purger/request_handler.go b/purger/request_handler.go
index 30a4035e3c2e7..0ec5bd35aac5b 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -18,12 +18,12 @@ type DeleteRequestHandler struct {
 }
 
 // NewDeleteRequestHandler creates a DeleteRequestHandler
-func NewDeleteRequestHandler(deleteStore *DeleteStore) (*DeleteRequestHandler, error) {
+func NewDeleteRequestHandler(deleteStore *DeleteStore) *DeleteRequestHandler {
 	deleteMgr := DeleteRequestHandler{
 		deleteStore: deleteStore,
 	}
 
-	return &deleteMgr, nil
+	return &deleteMgr
 }
 
 // AddDeleteRequestHandler handles addition of new delete request

From 5a08b8a25bc2c45aa8879adeaf20c5a98adbc67a Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 10 Apr 2020 09:08:29 +0200
Subject: [PATCH 500/660] Define store-gateway protobuf and client (#2433)

* Defined store-gateway protobuf and client

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Created store-gateway client factory for the querier

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 gcp/bigtable_index_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index bbaf6285b6305..9e2628825e6bc 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -51,7 +51,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.TableCacheEnabled, "bigtable.table-cache.enabled", true, "If enabled, once a tables info is fetched, it is cached.")
 	f.DurationVar(&cfg.TableCacheExpiration, "bigtable.table-cache.expiration", 30*time.Minute, "Duration to cache tables before checking again.")
 
-	cfg.GRPCClientConfig.RegisterFlags("bigtable", f)
+	cfg.GRPCClientConfig.RegisterFlagsWithPrefix("bigtable", f)
 }
 
 // storageClientColumnKey implements chunk.storageClient for GCP.

From b3c3a2c7a275b1e1da8bd111d0bc8befaba4ad78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Fri, 10 Apr 2020 20:12:18 +0200
Subject: [PATCH 501/660] Refactor Schema to make code more explicit about
 interface usage (#2444)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Split Schema into BaseSchema, StoreSchema and SeriesStoreSchema.

BaseSchema is common "supertype", StoreSchema is used by "store"
and SeriesStoreSchema by seriesStore.

Similarly, extract baseStore from store, and make seriesStore use that,
instead of store directly.

Entries interface was also split into baseEntries, storeEntries and
seriesStoreEntries.

This makes code more explicit about what is used where, or why some methods
are not implemented in various schema / entries versions.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store.go          |  45 ++++++----
 chunk_store_utils.go    |   2 +-
 composite_store.go      |  11 ++-
 schema.go               | 178 +++++++++++++++++-----------------------
 schema_caching.go       |  12 +--
 schema_caching_test.go  |  11 +--
 schema_config.go        |  30 +++----
 schema_test.go          |  51 +++++-------
 series_store.go         |  23 ++----
 storage/factory_test.go |   2 +
 10 files changed, 170 insertions(+), 195 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 9d4446ee9314b..f84f6a588ac05 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -71,24 +71,23 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MaxLookBackPeriod, "store.max-look-back-period", 0, "Limit how long back data can be queried")
 }
 
-// store implements Store
-type store struct {
+type baseStore struct {
 	cfg StoreConfig
 
 	index  IndexClient
 	chunks Client
-	schema Schema
+	schema BaseSchema
 	limits StoreLimits
 	*Fetcher
 }
 
-func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits, chunksCache cache.Cache) (Store, error) {
+func newBaseStore(cfg StoreConfig, schema BaseSchema, index IndexClient, chunks Client, limits StoreLimits, chunksCache cache.Cache) (baseStore, error) {
 	fetcher, err := NewChunkFetcher(chunksCache, cfg.chunkCacheStubs, chunks)
 	if err != nil {
-		return nil, err
+		return baseStore{}, err
 	}
 
-	return &store{
+	return baseStore{
 		cfg:     cfg,
 		index:   index,
 		chunks:  chunks,
@@ -98,6 +97,24 @@ func newStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client,
 	}, nil
 }
 
+// store implements Store
+type store struct {
+	baseStore
+	schema StoreSchema
+}
+
+func newStore(cfg StoreConfig, schema StoreSchema, index IndexClient, chunks Client, limits StoreLimits, chunksCache cache.Cache) (Store, error) {
+	rs, err := newBaseStore(cfg, schema, index, chunks, limits, chunksCache)
+	if err != nil {
+		return nil, err
+	}
+
+	return &store{
+		baseStore: rs,
+		schema:    schema,
+	}, nil
+}
+
 // Stop any background goroutines (ie in the cache.)
 func (c *store) Stop() {
 	c.storage.Stop()
@@ -187,7 +204,7 @@ func (c *store) GetChunkRefs(ctx context.Context, userID string, from, through m
 }
 
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
-func (c *store) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName, labelName string) ([]string, error) {
+func (c *baseStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName, labelName string) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.LabelValues")
 	defer log.Span.Finish()
 	level.Debug(log).Log("from", from, "through", through, "metricName", metricName, "labelName", labelName)
@@ -253,7 +270,7 @@ func (c *store) LabelNamesForMetricName(ctx context.Context, userID string, from
 	return labelNamesFromChunks(allChunks), nil
 }
 
-func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from *model.Time, through *model.Time) (bool, error) {
+func (c *baseStore) validateQueryTimeRange(ctx context.Context, userID string, from *model.Time, through *model.Time) (bool, error) {
 	//nolint:ineffassign,staticcheck //Leaving ctx even though we don't currently use it, we want to make it available for when we might need it and hopefully will ensure us using the correct context at that time
 	log, ctx := spanlogger.New(ctx, "store.validateQueryTimeRange")
 	defer log.Span.Finish()
@@ -291,7 +308,7 @@ func (c *store) validateQueryTimeRange(ctx context.Context, userID string, from
 	return false, nil
 }
 
-func (c *store) validateQuery(ctx context.Context, userID string, from *model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
+func (c *baseStore) validateQuery(ctx context.Context, userID string, from *model.Time, through *model.Time, matchers []*labels.Matcher) (string, []*labels.Matcher, bool, error) {
 	log, ctx := spanlogger.New(ctx, "store.validateQuery")
 	defer log.Span.Finish()
 
@@ -436,7 +453,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 	return c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
 }
 
-func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
 	defer log.Span.Finish()
 
@@ -462,7 +479,7 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 	return entries, err
 }
 
-func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+func (c *baseStore) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
 		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
@@ -481,7 +498,7 @@ func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, mat
 	return result, nil
 }
 
-func (c *store) convertChunkIDsToChunks(ctx context.Context, userID string, chunkIDs []string) ([]Chunk, error) {
+func (c *baseStore) convertChunkIDsToChunks(ctx context.Context, userID string, chunkIDs []string) ([]Chunk, error) {
 	chunkSet := make([]Chunk, 0, len(chunkIDs))
 	for _, chunkID := range chunkIDs {
 		chunk, err := ParseExternalKey(userID, chunkID)
@@ -510,7 +527,7 @@ func (c *store) DeleteChunk(ctx context.Context, from, through model.Time, userI
 	})
 }
 
-func (c *store) deleteChunk(ctx context.Context,
+func (c *baseStore) deleteChunk(ctx context.Context,
 	userID string,
 	chunkID string,
 	metric labels.Labels,
@@ -552,7 +569,7 @@ func (c *store) deleteChunk(ctx context.Context,
 	return nil
 }
 
-func (c *store) reboundChunk(ctx context.Context, userID, chunkID string, partiallyDeletedInterval model.Interval, putChunkFunc func(chunk Chunk) error) error {
+func (c *baseStore) reboundChunk(ctx context.Context, userID, chunkID string, partiallyDeletedInterval model.Interval, putChunkFunc func(chunk Chunk) error) error {
 	chunk, err := ParseExternalKey(userID, chunkID)
 	if err != nil {
 		return errors.Wrap(err, "when parsing external key")
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 78cd7c14fe49e..c18a38a167252 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -40,7 +40,7 @@ func labelNamesFromChunks(chunks []Chunk) []string {
 	var result UniqueStrings
 	for _, c := range chunks {
 		for _, l := range c.Metric {
-			result.Add(string(l.Name))
+			result.Add(l.Name)
 		}
 	}
 	return result.Strings()
diff --git a/composite_store.go b/composite_store.go
index 366ca1de86dca..f3f8bced5f9cb 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -2,6 +2,7 @@ package chunk
 
 import (
 	"context"
+	"errors"
 	"sort"
 	"time"
 
@@ -62,11 +63,13 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index
 	schema := cfg.CreateSchema()
 	var store Store
 	var err error
-	switch cfg.Schema {
-	case "v9", "v10", "v11":
-		store, err = newSeriesStore(storeCfg, schema, index, chunks, limits, chunksCache, writeDedupeCache)
+	switch s := schema.(type) {
+	case SeriesStoreSchema:
+		store, err = newSeriesStore(storeCfg, s, index, chunks, limits, chunksCache, writeDedupeCache)
+	case StoreSchema:
+		store, err = newStore(storeCfg, s, index, chunks, limits, chunksCache)
 	default:
-		store, err = newStore(storeCfg, schema, index, chunks, limits, chunksCache)
+		err = errors.New("invalid schema type")
 	}
 	if err != nil {
 		return err
diff --git a/schema.go b/schema.go
index c7af79a2763ca..e6a8c68d0cca6 100644
--- a/schema.go
+++ b/schema.go
@@ -40,22 +40,33 @@ var (
 
 type hasChunksForIntervalFunc func(userID, seriesID string, from, through model.Time) (bool, error)
 
-// Schema interface defines methods to calculate the hash and range keys needed
+// Schema interfaces define methods to calculate the hash and range keys needed
 // to write or read chunks from the external index.
-type Schema interface {
-	// When doing a write, use this method to return the list of entries you should write to.
-	GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
-
-	// Should only be used with the seriesStore. TODO: Make seriesStore implement a different interface altogether.
-	// returns cache key string and []IndexEntry per bucket, matched in order
-	GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error)
-	GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
+// BasicSchema has operation shared between StoreSchema and SeriesStoreSchema
+type BaseSchema interface {
 	// When doing a read, use these methods to return the list of entries you should query
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
+}
+
+// StoreSchema is a schema used by store
+type StoreSchema interface {
+	BaseSchema
+
+	// When doing a write, use this method to return the list of entries you should write to.
+	GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+}
+
+// SeriesStoreSchema is a schema used by seriesStore
+type SeriesStoreSchema interface {
+	BaseSchema
+
+	// returns cache key string and []IndexEntry per bucket, matched in order
+	GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error)
+	GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
 	// If the query resulted in series IDs, use this method to find chunks.
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
@@ -102,13 +113,39 @@ type IndexEntry struct {
 
 type schemaBucketsFunc func(from, through model.Time, userID string) []Bucket
 
-// schema implements Schema given a bucketing function and and set of range key callbacks
-type schema struct {
+// baseSchema implements BaseSchema given a bucketing function and and set of range key callbacks
+type baseSchema struct {
 	buckets schemaBucketsFunc
-	entries entries
+	entries baseEntries
+}
+
+// storeSchema implements StoreSchema given a bucketing function and and set of range key callbacks
+type storeSchema struct {
+	baseSchema
+	entries storeEntries
+}
+
+// seriesStoreSchema implements SeriesStoreSchema given a bucketing function and and set of range key callbacks
+type seriesStoreSchema struct {
+	baseSchema
+	entries seriesStoreEntries
+}
+
+func newStoreSchema(buckets schemaBucketsFunc, entries storeEntries) storeSchema {
+	return storeSchema{
+		baseSchema: baseSchema{buckets: buckets, entries: entries},
+		entries:    entries,
+	}
 }
 
-func (s schema) GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
+func newSeriesStoreSchema(buckets schemaBucketsFunc, entries seriesStoreEntries) seriesStoreSchema {
+	return seriesStoreSchema{
+		baseSchema: baseSchema{buckets: buckets, entries: entries},
+		entries:    entries,
+	}
+}
+
+func (s storeSchema) GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
@@ -122,7 +159,7 @@ func (s schema) GetWriteEntries(from, through model.Time, userID string, metricN
 }
 
 // returns cache key string and []IndexEntry per bucket, matched in order
-func (s schema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error) {
+func (s seriesStoreSchema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error) {
 	var keys []string
 	var indexEntries [][]IndexEntry
 
@@ -148,7 +185,7 @@ func (s schema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userI
 	return keys, indexEntries, nil
 }
 
-func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
+func (s seriesStoreSchema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	var result []IndexEntry
 
 	for _, bucket := range s.buckets(from, through, userID) {
@@ -162,7 +199,7 @@ func (s schema) GetChunkWriteEntries(from, through model.Time, userID string, me
 
 }
 
-func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
+func (s baseSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -176,7 +213,7 @@ func (s schema) GetReadQueriesForMetric(from, through model.Time, userID string,
 	return result, nil
 }
 
-func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
+func (s baseSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -190,7 +227,7 @@ func (s schema) GetReadQueriesForMetricLabel(from, through model.Time, userID st
 	return result, nil
 }
 
-func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
+func (s baseSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -204,7 +241,7 @@ func (s schema) GetReadQueriesForMetricLabelValue(from, through model.Time, user
 	return result, nil
 }
 
-func (s schema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+func (s seriesStoreSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -222,7 +259,7 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri
 // Since SeriesIDs are created per bucket, it makes sure that we don't include series entries which are in use by verifying using hasChunksForIntervalFunc i.e
 // It checks first and last buckets covered by the time interval to see if a SeriesID still has chunks in the store,
 // if yes then it doesn't include IndexEntry's for that bucket for deletion.
-func (s schema) GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error) {
+func (s seriesStoreSchema) GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error) {
 	metricName := metric.Get(model.MetricNameLabel)
 	if metricName == "" {
 		return nil, ErrMetricNameLabelMissing
@@ -290,7 +327,7 @@ func (s schema) GetSeriesDeleteEntries(from, through model.Time, userID string,
 	return result, nil
 }
 
-func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+func (s seriesStoreSchema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
 	var result []IndexQuery
 
 	buckets := s.buckets(from, through, userID)
@@ -304,21 +341,33 @@ func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string,
 	return result, nil
 }
 
-func (s schema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+func (s baseSchema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
 	return s.entries.FilterReadQueries(queries, shard)
 }
 
-type entries interface {
+type baseEntries interface {
+	GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error)
+	GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error)
+	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
+	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
+}
+
+// used by storeSchema
+type storeEntries interface {
+	baseEntries
+
 	GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
+}
+
+// used by seriesStoreSchema
+type seriesStoreEntries interface {
+	baseEntries
+
 	GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 	GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 
-	GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error)
-	GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error)
-	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 	GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
-	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
 }
 
 // original entries:
@@ -346,13 +395,6 @@ func (originalEntries) GetWriteEntries(bucket Bucket, metricName string, labels
 	return result, nil
 }
 
-func (originalEntries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-func (originalEntries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (originalEntries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -386,14 +428,6 @@ func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName
 	}, nil
 }
 
-func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
-func (originalEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
 func (originalEntries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
 	return queries
 }
@@ -423,13 +457,6 @@ func (base64Entries) GetWriteEntries(bucket Bucket, metricName string, labels la
 	return result, nil
 }
 
-func (base64Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-func (base64Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
 	encodedBytes := encodeBase64Value(labelValue)
 	return []IndexQuery{
@@ -473,13 +500,6 @@ func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName strin
 	return entries, nil
 }
 
-func (labelNameInHashKeyEntries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-func (labelNameInHashKeyEntries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (labelNameInHashKeyEntries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -509,14 +529,6 @@ func (labelNameInHashKeyEntries) GetReadMetricLabelValueQueries(bucket Bucket, m
 	}, nil
 }
 
-func (labelNameInHashKeyEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
-func (labelNameInHashKeyEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
 func (labelNameInHashKeyEntries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
 	return queries
 }
@@ -553,13 +565,6 @@ func (v5Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 	return entries, nil
 }
 
-func (v5Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-func (v5Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (v5Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	return []IndexQuery{
 		{
@@ -587,14 +592,6 @@ func (v5Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string
 	}, nil
 }
 
-func (v5Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
-func (v5Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
 func (v5Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
 	return queries
 }
@@ -630,13 +627,6 @@ func (v6Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 	return entries, nil
 }
 
-func (v6Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-func (v6Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (v6Entries) GetReadMetricQueries(bucket Bucket, metricName string) ([]IndexQuery, error) {
 	encodedFromBytes := encodeTime(bucket.from)
 	return []IndexQuery{
@@ -671,14 +661,6 @@ func (v6Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string
 	}, nil
 }
 
-func (v6Entries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
-func (v6Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) {
-	return nil, ErrNotSupported
-}
-
 func (v6Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
 	return queries
 }
@@ -686,10 +668,6 @@ func (v6Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardA
 // v9Entries adds a layer of indirection between labels -> series -> chunks.
 type v9Entries struct{}
 
-func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := labelsSeriesID(labels)
 
@@ -790,10 +768,6 @@ type v10Entries struct {
 	rowShards uint32
 }
 
-func (v10Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return nil, ErrNotSupported
-}
-
 func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	seriesID := labelsSeriesID(labels)
 
diff --git a/schema_caching.go b/schema_caching.go
index 73c8986269310..467f572f33783 100644
--- a/schema_caching.go
+++ b/schema_caching.go
@@ -8,13 +8,13 @@ import (
 )
 
 type schemaCaching struct {
-	Schema
+	SeriesStoreSchema
 
 	cacheOlderThan time.Duration
 }
 
 func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
-	queries, err := s.Schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	queries, err := s.SeriesStoreSchema.GetReadQueriesForMetric(from, through, userID, metricName)
 	if err != nil {
 		return nil, err
 	}
@@ -22,7 +22,7 @@ func (s *schemaCaching) GetReadQueriesForMetric(from, through model.Time, userID
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
-	queries, err := s.Schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+	queries, err := s.SeriesStoreSchema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
 	if err != nil {
 		return nil, err
 	}
@@ -30,7 +30,7 @@ func (s *schemaCaching) GetReadQueriesForMetricLabel(from, through model.Time, u
 }
 
 func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
-	queries, err := s.Schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+	queries, err := s.SeriesStoreSchema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
 	if err != nil {
 		return nil, err
 	}
@@ -39,7 +39,7 @@ func (s *schemaCaching) GetReadQueriesForMetricLabelValue(from, through model.Ti
 
 // If the query resulted in series IDs, use this method to find chunks.
 func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	queries, err := s.Schema.GetChunksForSeries(from, through, userID, seriesID)
+	queries, err := s.SeriesStoreSchema.GetChunksForSeries(from, through, userID, seriesID)
 	if err != nil {
 		return nil, err
 	}
@@ -47,7 +47,7 @@ func (s *schemaCaching) GetChunksForSeries(from, through model.Time, userID stri
 }
 
 func (s *schemaCaching) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	queries, err := s.Schema.GetLabelNamesForSeries(from, through, userID, seriesID)
+	queries, err := s.SeriesStoreSchema.GetLabelNamesForSeries(from, through, userID, seriesID)
 	if err != nil {
 		return nil, err
 	}
diff --git a/schema_caching_test.go b/schema_caching_test.go
index 4cd3d232555e2..455d88e8c6639 100644
--- a/schema_caching_test.go
+++ b/schema_caching_test.go
@@ -12,15 +12,12 @@ import (
 )
 
 func TestCachingSchema(t *testing.T) {
-	const (
-		userID         = "userid"
-		periodicPrefix = "periodicPrefix"
-	)
+	const userID = "userid"
 
-	dailyBuckets := makeSchema("v3")
+	dailyBuckets := makeSeriesStoreSchema("v9")
 	schema := &schemaCaching{
-		Schema:         dailyBuckets,
-		cacheOlderThan: 24 * time.Hour,
+		SeriesStoreSchema: dailyBuckets,
+		cacheOlderThan:    24 * time.Hour,
 	}
 
 	baseTime := time.Unix(0, 0)
diff --git a/schema_config.go b/schema_config.go
index 28bf1b8b8275e..8a0dd456cfa01 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -145,41 +145,35 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 }
 
 // CreateSchema returns the schema defined by the PeriodConfig
-func (cfg PeriodConfig) CreateSchema() Schema {
+func (cfg PeriodConfig) CreateSchema() BaseSchema {
+	buckets, _ := cfg.createBucketsFunc()
 
-	var e entries
 	switch cfg.Schema {
 	case "v1":
-		e = originalEntries{}
+		return newStoreSchema(buckets, originalEntries{})
 	case "v2":
-		e = originalEntries{}
+		return newStoreSchema(buckets, originalEntries{})
 	case "v3":
-		e = base64Entries{originalEntries{}}
+		return newStoreSchema(buckets, base64Entries{originalEntries{}})
 	case "v4":
-		e = labelNameInHashKeyEntries{}
+		return newStoreSchema(buckets, labelNameInHashKeyEntries{})
 	case "v5":
-		e = v5Entries{}
+		return newStoreSchema(buckets, v5Entries{})
 	case "v6":
-		e = v6Entries{}
+		return newStoreSchema(buckets, v6Entries{})
 	case "v9":
-		e = v9Entries{}
+		return newSeriesStoreSchema(buckets, v9Entries{})
 	case "v10":
-		e = v10Entries{
-			rowShards: cfg.RowShards,
-		}
+		return newSeriesStoreSchema(buckets, v10Entries{rowShards: cfg.RowShards})
 	case "v11":
-		e = v11Entries{
+		return newSeriesStoreSchema(buckets, v11Entries{
 			v10Entries: v10Entries{
 				rowShards: cfg.RowShards,
 			},
-		}
+		})
 	default:
 		return nil
 	}
-
-	buckets, _ := cfg.createBucketsFunc()
-
-	return schema{buckets, e}
 }
 
 func (cfg PeriodConfig) createBucketsFunc() (schemaBucketsFunc, time.Duration) {
diff --git a/schema_test.go b/schema_test.go
index 0393b118ee560..58e6ff2ec23be 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -39,11 +39,18 @@ func mergeResults(rss ...[]IndexEntry) []IndexEntry {
 
 const table = "table"
 
-func makeSchema(schemaName string) Schema {
+func makeSeriesStoreSchema(schemaName string) SeriesStoreSchema {
 	return PeriodConfig{
 		Schema:      schemaName,
 		IndexTables: PeriodicTableConfig{Prefix: table},
-	}.CreateSchema()
+	}.CreateSchema().(SeriesStoreSchema)
+}
+
+func makeStoreSchema(schemaName string) StoreSchema {
+	return PeriodConfig{
+		Schema:      schemaName,
+		IndexTables: PeriodicTableConfig{Prefix: table},
+	}.CreateSchema().(StoreSchema)
 }
 
 func TestSchemaHashKeys(t *testing.T) {
@@ -63,9 +70,9 @@ func TestSchemaHashKeys(t *testing.T) {
 		periodicPrefix = "periodicPrefix"
 	)
 
-	hourlyBuckets := makeSchema("v1")
-	dailyBuckets := makeSchema("v3")
-	labelBuckets := makeSchema("v4")
+	hourlyBuckets := makeStoreSchema("v1")
+	dailyBuckets := makeStoreSchema("v3")
+	labelBuckets := makeStoreSchema("v4")
 	metric := labels.Labels{
 		{Name: model.MetricNameLabel, Value: "foo"},
 		{Name: "bar", Value: "baz"},
@@ -73,7 +80,7 @@ func TestSchemaHashKeys(t *testing.T) {
 	chunkID := "chunkID"
 
 	for i, tc := range []struct {
-		Schema
+		StoreSchema
 		from, through int64
 		metricName    string
 		want          []IndexEntry
@@ -109,7 +116,7 @@ func TestSchemaHashKeys(t *testing.T) {
 		},
 	} {
 		t.Run(fmt.Sprintf("TestSchemaHashKeys[%d]", i), func(t *testing.T) {
-			have, err := tc.Schema.GetWriteEntries(
+			have, err := tc.StoreSchema.GetWriteEntries(
 				model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through),
 				userID, tc.metricName,
 				metric, chunkID,
@@ -186,12 +193,12 @@ func TestSchemaRangeKey(t *testing.T) {
 	)
 
 	var (
-		hourlyBuckets = makeSchema("v1")
-		dailyBuckets  = makeSchema("v2")
-		base64Keys    = makeSchema("v3")
-		labelBuckets  = makeSchema("v4")
-		tsRangeKeys   = makeSchema("v5")
-		v6RangeKeys   = makeSchema("v6")
+		hourlyBuckets = makeStoreSchema("v1")
+		dailyBuckets  = makeStoreSchema("v2")
+		base64Keys    = makeStoreSchema("v3")
+		labelBuckets  = makeStoreSchema("v4")
+		tsRangeKeys   = makeStoreSchema("v5")
+		v6RangeKeys   = makeStoreSchema("v6")
 		metric        = labels.Labels{
 			{Name: model.MetricNameLabel, Value: metricName},
 			{Name: "bar", Value: "bary"},
@@ -217,7 +224,7 @@ func TestSchemaRangeKey(t *testing.T) {
 	}
 
 	for i, tc := range []struct {
-		Schema
+		StoreSchema
 		want []IndexEntry
 	}{
 		// Basic test case for the various bucketing schemes
@@ -304,7 +311,7 @@ func TestSchemaRangeKey(t *testing.T) {
 		},
 	} {
 		t.Run(fmt.Sprintf("TestSchameRangeKey[%d]", i), func(t *testing.T) {
-			have, err := tc.Schema.GetWriteEntries(
+			have, err := tc.StoreSchema.GetWriteEntries(
 				model.TimeFromUnix(0), model.TimeFromUnix(60*60)-1,
 				userID, metricName,
 				metric, chunkID,
@@ -391,20 +398,6 @@ func BenchmarkEncodeLabelsString(b *testing.B) {
 	b.Log("decode", decoded)
 }
 
-// Ensure all currently defined entries can inhabit the entries interface
-func TestEnsureEntriesInhabitInterface(t *testing.T) {
-	var _ = []entries{
-		originalEntries{},
-		base64Entries{},
-		labelNameInHashKeyEntries{},
-		v5Entries{},
-		v6Entries{},
-		v9Entries{},
-		v10Entries{},
-		v11Entries{},
-	}
-}
-
 func TestV10IndexQueries(t *testing.T) {
 	fromShards := func(n int) (res []IndexQuery) {
 		for i := 0; i < n; i++ {
diff --git a/series_store.go b/series_store.go
index 7d99e0349c5cb..c8786e70367cd 100644
--- a/series_store.go
+++ b/series_store.go
@@ -64,32 +64,27 @@ var (
 
 // seriesStore implements Store
 type seriesStore struct {
-	store
+	baseStore
+	schema           SeriesStoreSchema
 	writeDedupeCache cache.Cache
 }
 
-func newSeriesStore(cfg StoreConfig, schema Schema, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) (Store, error) {
-	fetcher, err := NewChunkFetcher(chunksCache, cfg.chunkCacheStubs, chunks)
+func newSeriesStore(cfg StoreConfig, schema SeriesStoreSchema, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) (Store, error) {
+	rs, err := newBaseStore(cfg, schema, index, chunks, limits, chunksCache)
 	if err != nil {
 		return nil, err
 	}
 
 	if cfg.CacheLookupsOlderThan != 0 {
 		schema = &schemaCaching{
-			Schema:         schema,
-			cacheOlderThan: cfg.CacheLookupsOlderThan,
+			SeriesStoreSchema: schema,
+			cacheOlderThan:    cfg.CacheLookupsOlderThan,
 		}
 	}
 
 	return &seriesStore{
-		store: store{
-			cfg:     cfg,
-			index:   index,
-			chunks:  chunks,
-			schema:  schema,
-			limits:  limits,
-			Fetcher: fetcher,
-		},
+		baseStore:        rs,
+		schema:           schema,
 		writeDedupeCache: writeDedupeCache,
 	}, nil
 }
@@ -183,7 +178,7 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, userID string, from, thr
 	level.Debug(log).Log("chunks-post-filtering", len(chunks))
 	chunksPerQuery.Observe(float64(len(chunks)))
 
-	return [][]Chunk{chunks}, []*Fetcher{c.store.Fetcher}, nil
+	return [][]Chunk{chunks}, []*Fetcher{c.baseStore.Fetcher}, nil
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 4ed3509edf502..10d03ebace5f7 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -27,10 +27,12 @@ func TestFactoryStop(t *testing.T) {
 		{
 			From:      chunk.DayTime{Time: model.Time(0)},
 			IndexType: "inmemory",
+			Schema:    "v3",
 		},
 		{
 			From:      chunk.DayTime{Time: model.Time(1)},
 			IndexType: "inmemory",
+			Schema:    "v9",
 		},
 	}
 

From 558b87781ed4f0376598ff0b7de362fc5e3a97e3 Mon Sep 17 00:00:00 2001
From: Wei He <weihe924stephen@gmail.com>
Date: Tue, 14 Apr 2020 16:52:53 +0900
Subject: [PATCH 502/660] Allow 1w and 1y when setting store.* (#2454)

* allow 1w and 1y for -store.*

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>
---
 chunk_store.go      | 10 +++++-----
 chunk_store_test.go |  2 +-
 series_store.go     |  3 ++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index f84f6a588ac05..111493969d400 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -49,10 +49,10 @@ type StoreConfig struct {
 	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config"`
 	WriteDedupeCacheConfig cache.Config `yaml:"write_dedupe_cache_config"`
 
-	CacheLookupsOlderThan time.Duration `yaml:"cache_lookups_older_than"`
+	CacheLookupsOlderThan model.Duration `yaml:"cache_lookups_older_than"`
 
 	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
-	MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
+	MaxLookBackPeriod model.Duration `yaml:"max_look_back_period"`
 
 	// Not visible in yaml because the setting shouldn't be common between ingesters and queriers.
 	// This exists in case we don't want to cache all the chunks but still want to take advantage of
@@ -67,8 +67,8 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.chunkCacheStubs, "store.chunks-cache.cache-stubs", false, "If true, don't write the full chunk to cache, just a stub entry.")
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
-	f.DurationVar(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", 0, "Cache index entries older than this period. 0 to disable.")
-	f.DurationVar(&cfg.MaxLookBackPeriod, "store.max-look-back-period", 0, "Limit how long back data can be queried")
+	f.Var(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", "Cache index entries older than this period. 0 to disable.")
+	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Limit how long back data can be queried")
 }
 
 type baseStore struct {
@@ -293,7 +293,7 @@ func (c *baseStore) validateQueryTimeRange(ctx context.Context, userID string, f
 	}
 
 	if c.cfg.MaxLookBackPeriod != 0 {
-		oldestStartTime := model.Now().Add(-c.cfg.MaxLookBackPeriod)
+		oldestStartTime := model.Now().Add(-time.Duration(c.cfg.MaxLookBackPeriod))
 		if oldestStartTime.After(*from) {
 			*from = oldestStartTime
 		}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 3aa06a059b2d9..deaea2ad382f0 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -782,7 +782,7 @@ func TestStoreMaxLookBack(t *testing.T) {
 	storeWithoutLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
 	defer storeWithoutLookBackLimit.Stop()
 
-	storeCfg.MaxLookBackPeriod = 30 * time.Minute
+	storeCfg.MaxLookBackPeriod = model.Duration(30 * time.Minute)
 	storeWithLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
 	defer storeWithLookBackLimit.Stop()
 
diff --git a/series_store.go b/series_store.go
index c8786e70367cd..d4966ceb5cc0b 100644
--- a/series_store.go
+++ b/series_store.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"net/http"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 	jsoniter "github.com/json-iterator/go"
@@ -78,7 +79,7 @@ func newSeriesStore(cfg StoreConfig, schema SeriesStoreSchema, index IndexClient
 	if cfg.CacheLookupsOlderThan != 0 {
 		schema = &schemaCaching{
 			SeriesStoreSchema: schema,
-			cacheOlderThan:    cfg.CacheLookupsOlderThan,
+			cacheOlderThan:    time.Duration(cfg.CacheLookupsOlderThan),
 		}
 	}
 

From 1041ed806994c4c00f95a9a94f8e342b409df55e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 15 Apr 2020 09:21:03 +0200
Subject: [PATCH 503/660] Move validation code into schema creation (#2451)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This helps to keep schema-specific logic at one place only.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 composite_store.go |  7 ++--
 schema_config.go   | 79 +++++++++++++++++++---------------------------
 schema_test.go     | 19 +++++++----
 3 files changed, 49 insertions(+), 56 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index f3f8bced5f9cb..5f55ec4310d85 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -60,9 +60,12 @@ func NewCompositeStore() CompositeStore {
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
 func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) error {
-	schema := cfg.CreateSchema()
+	schema, err := cfg.CreateSchema()
+	if err != nil {
+		return err
+	}
+
 	var store Store
-	var err error
 	switch s := schema.(type) {
 	case SeriesStoreSchema:
 		store, err = newSeriesStore(storeCfg, s, index, chunks, limits, chunksCache, writeDedupeCache)
diff --git a/schema_config.go b/schema_config.go
index 8a0dd456cfa01..617cdbd577ab3 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -145,34 +145,46 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 }
 
 // CreateSchema returns the schema defined by the PeriodConfig
-func (cfg PeriodConfig) CreateSchema() BaseSchema {
-	buckets, _ := cfg.createBucketsFunc()
+func (cfg PeriodConfig) CreateSchema() (BaseSchema, error) {
+	buckets, bucketsPeriod := cfg.createBucketsFunc()
+
+	// Ensure the tables period is a multiple of the bucket period
+	if cfg.IndexTables.Period > 0 && cfg.IndexTables.Period%bucketsPeriod != 0 {
+		return nil, errInvalidTablePeriod
+	}
+
+	if cfg.ChunkTables.Period > 0 && cfg.ChunkTables.Period%bucketsPeriod != 0 {
+		return nil, errInvalidTablePeriod
+	}
 
 	switch cfg.Schema {
 	case "v1":
-		return newStoreSchema(buckets, originalEntries{})
+		return newStoreSchema(buckets, originalEntries{}), nil
 	case "v2":
-		return newStoreSchema(buckets, originalEntries{})
+		return newStoreSchema(buckets, originalEntries{}), nil
 	case "v3":
-		return newStoreSchema(buckets, base64Entries{originalEntries{}})
+		return newStoreSchema(buckets, base64Entries{originalEntries{}}), nil
 	case "v4":
-		return newStoreSchema(buckets, labelNameInHashKeyEntries{})
+		return newStoreSchema(buckets, labelNameInHashKeyEntries{}), nil
 	case "v5":
-		return newStoreSchema(buckets, v5Entries{})
+		return newStoreSchema(buckets, v5Entries{}), nil
 	case "v6":
-		return newStoreSchema(buckets, v6Entries{})
+		return newStoreSchema(buckets, v6Entries{}), nil
 	case "v9":
-		return newSeriesStoreSchema(buckets, v9Entries{})
-	case "v10":
-		return newSeriesStoreSchema(buckets, v10Entries{rowShards: cfg.RowShards})
-	case "v11":
-		return newSeriesStoreSchema(buckets, v11Entries{
-			v10Entries: v10Entries{
-				rowShards: cfg.RowShards,
-			},
-		})
+		return newSeriesStoreSchema(buckets, v9Entries{}), nil
+	case "v10", "v11":
+		if cfg.RowShards == 0 {
+			return nil, fmt.Errorf("Must have row_shards > 0 (current: %d) for schema (%s)", cfg.RowShards, cfg.Schema)
+		}
+
+		v10 := v10Entries{rowShards: cfg.RowShards}
+		if cfg.Schema == "v10" {
+			return newSeriesStoreSchema(buckets, v10), nil
+		}
+
+		return newSeriesStoreSchema(buckets, v11Entries{v10}), nil
 	default:
-		return nil
+		return nil, errInvalidSchemaVersion
 	}
 }
 
@@ -193,35 +205,8 @@ func (cfg *PeriodConfig) applyDefaults() {
 
 // Validate the period config.
 func (cfg PeriodConfig) validate() error {
-	// Ensure the schema version exists
-	schema := cfg.CreateSchema()
-	if schema == nil {
-		return errInvalidSchemaVersion
-	}
-
-	// Ensure the tables period is a multiple of the bucket period
-	_, bucketsPeriod := cfg.createBucketsFunc()
-
-	if cfg.IndexTables.Period > 0 && cfg.IndexTables.Period%bucketsPeriod != 0 {
-		return errInvalidTablePeriod
-	}
-
-	if cfg.ChunkTables.Period > 0 && cfg.ChunkTables.Period%bucketsPeriod != 0 {
-		return errInvalidTablePeriod
-	}
-
-	switch cfg.Schema {
-	case "v1", "v2", "v3", "v4", "v5", "v6", "v9":
-	case "v10", "v11":
-		if cfg.RowShards == 0 {
-			return fmt.Errorf("Must have row_shards > 0 (current: %d) for schema (%s)", cfg.RowShards, cfg.Schema)
-		}
-	default:
-		// This generally unreachable path protects us from adding schemas and not handling them in this function.
-		return fmt.Errorf("unexpected schema (%s)", cfg.Schema)
-	}
-
-	return nil
+	_, err := cfg.CreateSchema()
+	return err
 }
 
 // Load the yaml file, or build the config from legacy command-line flags
diff --git a/schema_test.go b/schema_test.go
index 58e6ff2ec23be..b2b8e67970511 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -39,18 +39,23 @@ func mergeResults(rss ...[]IndexEntry) []IndexEntry {
 
 const table = "table"
 
-func makeSeriesStoreSchema(schemaName string) SeriesStoreSchema {
-	return PeriodConfig{
+func mustMakeSchema(schemaName string) BaseSchema {
+	s, err := PeriodConfig{
 		Schema:      schemaName,
 		IndexTables: PeriodicTableConfig{Prefix: table},
-	}.CreateSchema().(SeriesStoreSchema)
+	}.CreateSchema()
+	if err != nil {
+		panic(err)
+	}
+	return s
+}
+
+func makeSeriesStoreSchema(schemaName string) SeriesStoreSchema {
+	return mustMakeSchema(schemaName).(SeriesStoreSchema)
 }
 
 func makeStoreSchema(schemaName string) StoreSchema {
-	return PeriodConfig{
-		Schema:      schemaName,
-		IndexTables: PeriodicTableConfig{Prefix: table},
-	}.CreateSchema().(StoreSchema)
+	return mustMakeSchema(schemaName).(StoreSchema)
 }
 
 func TestSchemaHashKeys(t *testing.T) {

From 5dc91ea45a61f4f932556aea8b6f4e6b4daed6bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 15 Apr 2020 11:29:28 +0200
Subject: [PATCH 504/660] Regex set optimization when querying for chunks in
 index (#2446)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Moved seriesStore lookupSeriesByMetricNameMatcher to store.lookupChunkIdsByMetricNameMatcher.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Implement optimization for set regex.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for regex set optimization.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix race errors.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation errors after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Rename method to lookupIdsByMetricNameMatcher

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store.go      |  85 +++++++++++++------
 chunk_store_test.go | 196 +++++++++++++++++++++++++++++++++++++++++---
 composite_store.go  |  12 ++-
 opts.go             |  60 ++++++++++++++
 opts_test.go        |  50 +++++++++++
 series_store.go     |  44 +---------
 6 files changed, 367 insertions(+), 80 deletions(-)
 create mode 100644 opts.go
 create mode 100644 opts_test.go

diff --git a/chunk_store.go b/chunk_store.go
index 111493969d400..02cc7da879baf 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -396,36 +396,12 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 	incomingErrors := make(chan error)
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
-			// Lookup IndexQuery's
-			var queries []IndexQuery
-			var err error
-			if matcher.Type != labels.MatchEqual {
-				queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
-			} else {
-				queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
-			}
+			chunkIDs, err := c.lookupIdsByMetricNameMatcher(ctx, from, through, userID, metricName, matcher, nil)
 			if err != nil {
 				incomingErrors <- err
-				return
-			}
-			level.Debug(log).Log("matcher", matcher, "queries", len(queries))
-
-			// Lookup IndexEntry's
-			entries, err := c.lookupEntriesByQueries(ctx, queries)
-			if err != nil {
-				incomingErrors <- err
-				return
-			}
-			level.Debug(log).Log("matcher", matcher, "entries", len(entries))
-
-			// Convert IndexEntry's to chunk IDs, filter out non-matchers at the same time.
-			chunkIDs, err := c.parseIndexEntries(ctx, entries, matcher)
-			if err != nil {
-				incomingErrors <- err
-				return
+			} else {
+				incomingChunkIDs <- chunkIDs
 			}
-			level.Debug(log).Log("matcher", matcher, "chunkIDs", len(chunkIDs))
-			incomingChunkIDs <- chunkIDs
 		}(matcher)
 	}
 
@@ -453,6 +429,61 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 	return c.convertChunkIDsToChunks(ctx, userID, chunkIDs)
 }
 
+func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, filter func([]IndexQuery) []IndexQuery) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "Store.lookupIdsByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
+	defer log.Span.Finish()
+
+	var err error
+	var queries []IndexQuery
+	var labelName string
+	if matcher == nil {
+		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
+	} else if matcher.Type == labels.MatchEqual {
+		labelName = matcher.Name
+		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
+	} else if matcher.Type == labels.MatchRegexp && len(findSetMatches(matcher.Value)) > 0 {
+		set := findSetMatches(matcher.Value)
+		for _, v := range set {
+			var qs []IndexQuery
+			qs, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, v)
+			if err != nil {
+				break
+			}
+			queries = append(queries, qs...)
+		}
+	} else {
+		labelName = matcher.Name
+		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
+	}
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("matcher", matcher, "queries", len(queries))
+
+	if filter != nil {
+		queries = filter(queries)
+		level.Debug(log).Log("matcher", matcher, "filteredQueries", len(queries))
+	}
+
+	entries, err := c.lookupEntriesByQueries(ctx, queries)
+	if e, ok := err.(CardinalityExceededError); ok {
+		e.MetricName = metricName
+		e.LabelName = labelName
+		return nil, e
+	} else if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("matcher", matcher, "entries", len(entries))
+
+	ids, err := c.parseIndexEntries(ctx, entries, matcher)
+	if err != nil {
+		return nil, err
+	}
+	level.Debug(log).Log("matcher", matcher, "ids", len(ids))
+
+	return ids, nil
+}
+
 func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
 	defer log.Span.Finish()
diff --git a/chunk_store_test.go b/chunk_store_test.go
index deaea2ad382f0..34aa72852a28b 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"math/rand"
 	"reflect"
+	"sort"
+	"sync"
 	"testing"
 	"time"
 
@@ -19,6 +21,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -62,10 +65,16 @@ func newTestChunkStore(t require.TestingT, schemaName string) Store {
 }
 
 func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg StoreConfig) Store {
-	var (
-		tbmConfig TableManagerConfig
-		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
-	)
+	schemaCfg := DefaultSchemaConfig("", schemaName, 0)
+
+	schema, err := schemaCfg.Configs[0].CreateSchema()
+	require.NoError(t, err)
+
+	return newTestChunkStoreConfigWithMockStorage(t, schemaCfg, schema, storeCfg)
+}
+
+func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg SchemaConfig, schema BaseSchema, storeCfg StoreConfig) Store {
+	var tbmConfig TableManagerConfig
 	err := schemaCfg.Validate()
 	require.NoError(t, err)
 	flagext.DefaultValues(&tbmConfig)
@@ -88,7 +97,7 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 	require.NoError(t, err)
 
 	store := NewCompositeStore()
-	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides, chunksCache, writeDedupeCache)
+	err = store.addSchema(storeCfg, schema, schemaCfg.Configs[0].From.Time, storage, storage, overrides, chunksCache, writeDedupeCache)
 	require.NoError(t, err)
 	return store
 }
@@ -529,12 +538,179 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	}
 }
 
-func mustNewLabelMatcher(matchType labels.MatchType, name string, value string) *labels.Matcher {
-	matcher, err := labels.NewMatcher(matchType, name, value)
-	if err != nil {
-		panic(err)
+// TestChunkStore_verifyRegexSetOptimizations tests if chunks are fetched correctly when we have the metric name
+func TestChunkStore_verifyRegexSetOptimizations(t *testing.T) {
+	ctx := context.Background()
+	now := model.Now()
+
+	testCases := []struct {
+		query  string
+		expect []string
+	}{
+		{
+			`foo`,
+			[]string{"foo"},
+		},
+		{
+			`foo{bar="baz"}`,
+			[]string{"foo{bar=\"baz\"}"},
+		},
+		{
+			`foo{bar!="baz"}`,
+			[]string{"foo"},
+		},
+		{
+			`foo{toms="code", bar="beep"}`,
+			[]string{"foo{bar=\"beep\"}", "foo{toms=\"code\"}"},
+		},
+		{
+			`foo{bar=~"beep"}`,
+			[]string{"foo{bar=\"beep\"}"},
+		},
+		{
+			`foo{bar=~"beep|baz"}`,
+			[]string{"foo{bar=\"baz\"}", "foo{bar=\"beep\"}"},
+		},
+		{
+			`foo{toms="code", bar=~"beep|baz"}`,
+			[]string{"foo{bar=\"baz\"}", "foo{bar=\"beep\"}", "foo{toms=\"code\"}"},
+		},
+		{
+			`foo{bar=~".+"}`,
+			[]string{"foo{bar}"},
+		},
+	}
+
+	for _, schema := range schemas {
+		var storeCfg StoreConfig
+		flagext.DefaultValues(&storeCfg)
+
+		schemaCfg := DefaultSchemaConfig("", schema, 0)
+		schemaObj, err := schemaCfg.Configs[0].CreateSchema()
+		require.NoError(t, err)
+
+		var mockSchema = &mockBaseSchema{schema: schemaObj}
+
+		switch s := schemaObj.(type) {
+		case StoreSchema:
+			schemaObj = mockStoreSchema{mockBaseSchema: mockSchema, schema: s}
+		case SeriesStoreSchema:
+			schemaObj = mockSeriesStoreSchema{mockBaseSchema: mockSchema, schema: s}
+		}
+
+		store := newTestChunkStoreConfigWithMockStorage(t, schemaCfg, schemaObj, storeCfg)
+		defer store.Stop()
+
+		from := now.Add(-time.Hour)
+		through := now
+
+		for _, tc := range testCases {
+			t.Run(fmt.Sprintf("%s / %s", tc.query, schema), func(t *testing.T) {
+				// reset queries for test
+				mockSchema.resetQueries()
+
+				t.Log("========= Running query", tc.query, "with schema", schema)
+				matchers, err := promql.ParseMetricSelector(tc.query)
+				if err != nil {
+					t.Fatal(err)
+				}
+
+				_, err = store.Get(ctx, userID, from, through, matchers...)
+				require.NoError(t, err)
+
+				qs := mockSchema.getQueries()
+				sort.Strings(qs)
+
+				if !reflect.DeepEqual(tc.expect, qs) {
+					t.Fatalf("%s: wrong queries - %s", tc.query, test.Diff(tc.expect, qs))
+				}
+			})
+		}
 	}
-	return matcher
+}
+
+type mockBaseSchema struct {
+	schema BaseSchema
+
+	mu      sync.Mutex
+	queries []string
+}
+
+func (m *mockBaseSchema) getQueries() []string {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.queries
+}
+
+func (m *mockBaseSchema) resetQueries() {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	m.queries = nil
+}
+
+func (m *mockBaseSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
+	m.mu.Lock()
+	m.queries = append(m.queries, metricName)
+	m.mu.Unlock()
+
+	return m.schema.GetReadQueriesForMetric(from, through, userID, metricName)
+}
+
+func (m *mockBaseSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
+	m.mu.Lock()
+	m.queries = append(m.queries, fmt.Sprintf("%s{%s}", metricName, labelName))
+	m.mu.Unlock()
+
+	return m.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
+}
+
+func (m *mockBaseSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
+	m.mu.Lock()
+	m.queries = append(m.queries, fmt.Sprintf("%s{%s=%q}", metricName, labelName, labelValue))
+	m.mu.Unlock()
+	return m.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
+}
+
+func (m *mockBaseSchema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return m.schema.FilterReadQueries(queries, shard)
+}
+
+type mockStoreSchema struct {
+	*mockBaseSchema
+	schema StoreSchema
+}
+
+func (m mockStoreSchema) GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
+	return m.schema.GetWriteEntries(from, through, userID, metricName, labels, chunkID)
+}
+
+type mockSeriesStoreSchema struct {
+	*mockBaseSchema
+	schema SeriesStoreSchema
+}
+
+func (m mockSeriesStoreSchema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error) {
+	return m.schema.GetCacheKeysAndLabelWriteEntries(from, through, userID, metricName, labels, chunkID)
+}
+
+func (m mockSeriesStoreSchema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
+	return m.schema.GetChunkWriteEntries(from, through, userID, metricName, labels, chunkID)
+}
+
+func (m mockSeriesStoreSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	return m.schema.GetChunksForSeries(from, through, userID, seriesID)
+}
+
+func (m mockSeriesStoreSchema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
+	return m.schema.GetLabelNamesForSeries(from, through, userID, seriesID)
+}
+
+func (m mockSeriesStoreSchema) GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error) {
+	return m.schema.GetSeriesDeleteEntries(from, through, userID, metric, hasChunksForIntervalFunc)
+}
+
+func mustNewLabelMatcher(matchType labels.MatchType, name string, value string) *labels.Matcher {
+	return labels.MustNewMatcher(matchType, name, value)
 }
 
 func TestChunkStoreRandom(t *testing.T) {
diff --git a/composite_store.go b/composite_store.go
index 5f55ec4310d85..2dc4b155bb468 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -65,7 +65,15 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index
 		return err
 	}
 
-	var store Store
+	return c.addSchema(storeCfg, schema, cfg.From.Time, index, chunks, limits, chunksCache, writeDedupeCache)
+}
+
+func (c *CompositeStore) addSchema(storeCfg StoreConfig, schema BaseSchema, start model.Time, index IndexClient, chunks Client, limits StoreLimits, chunksCache, writeDedupeCache cache.Cache) error {
+	var (
+		err   error
+		store Store
+	)
+
 	switch s := schema.(type) {
 	case SeriesStoreSchema:
 		store, err = newSeriesStore(storeCfg, s, index, chunks, limits, chunksCache, writeDedupeCache)
@@ -77,7 +85,7 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index
 	if err != nil {
 		return err
 	}
-	c.stores = append(c.stores, compositeStoreEntry{start: model.TimeFromUnixNano(cfg.From.UnixNano()), Store: store})
+	c.stores = append(c.stores, compositeStoreEntry{start: start, Store: store})
 	return nil
 }
 
diff --git a/opts.go b/opts.go
new file mode 100644
index 0000000000000..10d07b012f756
--- /dev/null
+++ b/opts.go
@@ -0,0 +1,60 @@
+package chunk
+
+import (
+	"strings"
+	"unicode/utf8"
+)
+
+// Bitmap used by func isRegexMetaCharacter to check whether a character needs to be escaped.
+var regexMetaCharacterBytes [16]byte
+
+// isRegexMetaCharacter reports whether byte b needs to be escaped.
+func isRegexMetaCharacter(b byte) bool {
+	return b < utf8.RuneSelf && regexMetaCharacterBytes[b%16]&(1<<(b/16)) != 0
+}
+
+func init() {
+	for _, b := range []byte(`.+*?()|[]{}^$`) {
+		regexMetaCharacterBytes[b%16] |= 1 << (b / 16)
+	}
+}
+
+// copied from Prometheus querier.go, removed check for Prometheus wrapper.
+// Returns list of values that can regex matches.
+func findSetMatches(pattern string) []string {
+	escaped := false
+	sets := []*strings.Builder{{}}
+	for i := 0; i < len(pattern); i++ {
+		if escaped {
+			switch {
+			case isRegexMetaCharacter(pattern[i]):
+				sets[len(sets)-1].WriteByte(pattern[i])
+			case pattern[i] == '\\':
+				sets[len(sets)-1].WriteByte('\\')
+			default:
+				return nil
+			}
+			escaped = false
+		} else {
+			switch {
+			case isRegexMetaCharacter(pattern[i]):
+				if pattern[i] == '|' {
+					sets = append(sets, &strings.Builder{})
+				} else {
+					return nil
+				}
+			case pattern[i] == '\\':
+				escaped = true
+			default:
+				sets[len(sets)-1].WriteByte(pattern[i])
+			}
+		}
+	}
+	matches := make([]string, 0, len(sets))
+	for _, s := range sets {
+		if s.Len() > 0 {
+			matches = append(matches, s.String())
+		}
+	}
+	return matches
+}
diff --git a/opts_test.go b/opts_test.go
new file mode 100644
index 0000000000000..282049f66963c
--- /dev/null
+++ b/opts_test.go
@@ -0,0 +1,50 @@
+package chunk
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+// Refer to https://github.com/prometheus/prometheus/issues/2651.
+func TestFindSetMatches(t *testing.T) {
+	cases := []struct {
+		pattern string
+		exp     []string
+	}{
+		// Simple sets.
+		{
+			pattern: "foo|bar|baz",
+			exp: []string{
+				"foo",
+				"bar",
+				"baz",
+			},
+		},
+		// Simple sets containing escaped characters.
+		{
+			pattern: "fo\\.o|bar\\?|\\^baz",
+			exp: []string{
+				"fo.o",
+				"bar?",
+				"^baz",
+			},
+		},
+		// Simple sets containing special characters without escaping.
+		{
+			pattern: "fo.o|bar?|^baz",
+			exp:     nil,
+		},
+		{
+			pattern: "foo\\|bar\\|baz",
+			exp: []string{
+				"foo|bar|baz",
+			},
+		},
+	}
+
+	for _, c := range cases {
+		matches := findSetMatches(c.pattern)
+		require.Equal(t, c.exp, matches)
+	}
+}
diff --git a/series_store.go b/series_store.go
index d4966ceb5cc0b..3c94143c073cf 100644
--- a/series_store.go
+++ b/series_store.go
@@ -334,47 +334,9 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 }
 
 func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, shard *astmapper.ShardAnnotation) ([]string, error) {
-	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
-	defer log.Span.Finish()
-
-	var err error
-	var queries []IndexQuery
-	var labelName string
-	if matcher == nil {
-		queries, err = c.schema.GetReadQueriesForMetric(from, through, userID, metricName)
-	} else if matcher.Type != labels.MatchEqual {
-		labelName = matcher.Name
-		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
-	} else {
-		labelName = matcher.Name
-		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
-	}
-	if err != nil {
-		return nil, err
-	}
-	level.Debug(log).Log("queries", len(queries))
-
-	queries = c.schema.FilterReadQueries(queries, shard)
-
-	level.Debug(log).Log("filteredQueries", len(queries))
-
-	entries, err := c.lookupEntriesByQueries(ctx, queries)
-	if e, ok := err.(CardinalityExceededError); ok {
-		e.MetricName = metricName
-		e.LabelName = labelName
-		return nil, e
-	} else if err != nil {
-		return nil, err
-	}
-	level.Debug(log).Log("entries", len(entries))
-
-	ids, err := c.parseIndexEntries(ctx, entries, matcher)
-	if err != nil {
-		return nil, err
-	}
-	level.Debug(log).Log("ids", len(ids))
-
-	return ids, nil
+	return c.lookupIdsByMetricNameMatcher(ctx, from, through, userID, metricName, matcher, func(queries []IndexQuery) []IndexQuery {
+		return c.schema.FilterReadQueries(queries, shard)
+	})
 }
 
 func (c *seriesStore) lookupChunksBySeries(ctx context.Context, from, through model.Time, userID string, seriesIDs []string) ([]string, error) {

From 4a5cbd21bfe1b1eae0a4bf4d2cb6dc77ef487626 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 17 Apr 2020 11:28:25 +0200
Subject: [PATCH 505/660] Regex set optimisation when looking series in
 ingester (#2475)

* Regex set optimisation when looking series in ingester

Similar to https://github.com/cortexproject/cortex/pull/2446/

Super useful for templated grafana dashboards which send matchers such
as =~"a|b|c|d|e"

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add CHANGELOG entry

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add benchmark for set optimisation

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 chunk_store.go | 4 ++--
 opts.go        | 4 ++--
 opts_test.go   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 02cc7da879baf..6e904f6d85daa 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -441,8 +441,8 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	} else if matcher.Type == labels.MatchEqual {
 		labelName = matcher.Name
 		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
-	} else if matcher.Type == labels.MatchRegexp && len(findSetMatches(matcher.Value)) > 0 {
-		set := findSetMatches(matcher.Value)
+	} else if matcher.Type == labels.MatchRegexp && len(FindSetMatches(matcher.Value)) > 0 {
+		set := FindSetMatches(matcher.Value)
 		for _, v := range set {
 			var qs []IndexQuery
 			qs, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, v)
diff --git a/opts.go b/opts.go
index 10d07b012f756..3384ecd7a4309 100644
--- a/opts.go
+++ b/opts.go
@@ -19,9 +19,9 @@ func init() {
 	}
 }
 
+// FindSetMatches returns list of values that can be equality matched on.
 // copied from Prometheus querier.go, removed check for Prometheus wrapper.
-// Returns list of values that can regex matches.
-func findSetMatches(pattern string) []string {
+func FindSetMatches(pattern string) []string {
 	escaped := false
 	sets := []*strings.Builder{{}}
 	for i := 0; i < len(pattern); i++ {
diff --git a/opts_test.go b/opts_test.go
index 282049f66963c..01d05407e6d46 100644
--- a/opts_test.go
+++ b/opts_test.go
@@ -44,7 +44,7 @@ func TestFindSetMatches(t *testing.T) {
 	}
 
 	for _, c := range cases {
-		matches := findSetMatches(c.pattern)
+		matches := FindSetMatches(c.pattern)
 		require.Equal(t, c.exp, matches)
 	}
 }

From c79e1984272e1a27f5bb86b3cff164c9671001d1 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Fri, 17 Apr 2020 14:40:37 +0200
Subject: [PATCH 506/660] Add initial per tenant query and chunk metrics AND
 RENAME SOME EXISTING METRICS (#2463)

* Add initial per tenant query metric

More will be added once the following is merged:
https://github.com/prometheus/prometheus/pull/6890

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add per tenant chunks stored and fetched metrics

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 storage/factory.go      |   7 ++-
 storage/factory_test.go |   2 +-
 storage/metrics.go      | 110 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 storage/metrics.go

diff --git a/storage/factory.go b/storage/factory.go
index 436f6e92ba432..99d53c5b15d4f 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/aws"
@@ -100,7 +101,9 @@ func (cfg *Config) Validate() error {
 }
 
 // NewStore makes the storage clients based on the configuration.
-func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits) (chunk.Store, error) {
+func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits, reg prometheus.Registerer) (chunk.Store, error) {
+	chunkMetrics := newChunkClientMetrics(reg)
+
 	indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig)
 	if err != nil {
 		return nil, err
@@ -146,6 +149,8 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 			return nil, errors.Wrap(err, "error creating object client")
 		}
 
+		chunks = newMetricsChunkClient(chunks, chunkMetrics)
+
 		err = stores.AddPeriod(storeCfg, s, index, chunks, limits, chunksCache, writeDedupeCache)
 		if err != nil {
 			return nil, err
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 10d03ebace5f7..a2ff89b5b54c7 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -39,7 +39,7 @@ func TestFactoryStop(t *testing.T) {
 	limits, err := validation.NewOverrides(defaults, nil)
 	require.NoError(t, err)
 
-	store, err := NewStore(cfg, storeConfig, schemaConfig, limits)
+	store, err := NewStore(cfg, storeConfig, schemaConfig, limits, nil)
 	require.NoError(t, err)
 
 	store.Stop()
diff --git a/storage/metrics.go b/storage/metrics.go
new file mode 100644
index 0000000000000..28feece364cc1
--- /dev/null
+++ b/storage/metrics.go
@@ -0,0 +1,110 @@
+package storage
+
+import (
+	"context"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+// takes a chunk client and exposes metrics for its operations.
+type metricsChunkClient struct {
+	client chunk.Client
+
+	metrics chunkClientMetrics
+}
+
+func newMetricsChunkClient(client chunk.Client, metrics chunkClientMetrics) metricsChunkClient {
+	return metricsChunkClient{
+		client:  client,
+		metrics: metrics,
+	}
+}
+
+type chunkClientMetrics struct {
+	chunksPutPerUser         *prometheus.CounterVec
+	chunksSizePutPerUser     *prometheus.CounterVec
+	chunksFetchedPerUser     *prometheus.CounterVec
+	chunksSizeFetchedPerUser *prometheus.CounterVec
+}
+
+func newChunkClientMetrics(reg prometheus.Registerer) chunkClientMetrics {
+	return chunkClientMetrics{
+		chunksPutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Namespace: "cortex",
+			Name:      "chunk_store_stored_chunks_total",
+			Help:      "Total stored chunks per user.",
+		}, []string{"user"}),
+		chunksSizePutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Namespace: "cortex",
+			Name:      "chunk_store_stored_chunk_bytes_total",
+			Help:      "Total bytes stored in chunks per user.",
+		}, []string{"user"}),
+		chunksFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Namespace: "cortex",
+			Name:      "chunk_store_fetched_chunks_total",
+			Help:      "Total fetched chunks per user.",
+		}, []string{"user"}),
+		chunksSizeFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
+			Namespace: "cortex",
+			Name:      "chunk_store_fetched_chunk_bytes_total",
+			Help:      "Total bytes fetched in chunks per user.",
+		}, []string{"user"}),
+	}
+}
+
+func (c metricsChunkClient) Stop() {
+	c.client.Stop()
+}
+
+func (c metricsChunkClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	if err := c.client.PutChunks(ctx, chunks); err != nil {
+		return err
+	}
+
+	// For PutChunks, we explicitly encode the userID in the chunk and don't use context.
+	userSizes := map[string]int{}
+	userCounts := map[string]int{}
+	for _, c := range chunks {
+		userSizes[c.UserID] += c.Data.Size()
+		userCounts[c.UserID]++
+	}
+	for user, size := range userSizes {
+		c.metrics.chunksSizePutPerUser.WithLabelValues(user).Add(float64(size))
+	}
+	for user, num := range userCounts {
+		c.metrics.chunksPutPerUser.WithLabelValues(user).Add(float64(num))
+	}
+
+	return nil
+}
+
+func (c metricsChunkClient) GetChunks(ctx context.Context, chunks []chunk.Chunk) ([]chunk.Chunk, error) {
+	chks, err := c.client.GetChunks(ctx, chunks)
+	if err != nil {
+		return chks, err
+	}
+
+	// For GetChunks, userID is the chunk and we don't need to use context.
+	// For now, we just load one user chunks at once, but the interface lets us do it for multiple users.
+	userSizes := map[string]int{}
+	userCounts := map[string]int{}
+	for _, c := range chks {
+		userSizes[c.UserID] += c.Data.Size()
+		userCounts[c.UserID]++
+	}
+	for user, size := range userSizes {
+		c.metrics.chunksSizeFetchedPerUser.WithLabelValues(user).Add(float64(size))
+	}
+	for user, num := range userCounts {
+		c.metrics.chunksFetchedPerUser.WithLabelValues(user).Add(float64(num))
+	}
+
+	return chks, nil
+}
+
+func (c metricsChunkClient) DeleteChunk(ctx context.Context, chunkID string) error {
+	return c.client.DeleteChunk(ctx, chunkID)
+}

From 0302d22429be6994e35d41c7a5dff0f5d54d434c Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 17 Apr 2020 20:10:02 +0530
Subject: [PATCH 507/660] metrics for monitoring delete requests (#2445)

* metrics for monitoring delete requests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog and fix some minor issues

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed metric names in changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/purger.go          | 64 +++++++++++++++++++++++++++++++++++----
 purger/purger_test.go     | 13 ++++++--
 purger/request_handler.go | 29 ++++++++++++++++--
 3 files changed, 95 insertions(+), 11 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index 9441d42e23800..709074a9f82b5 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -12,6 +12,8 @@ import (
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/gogo/protobuf/proto"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/weaveworks/common/user"
@@ -24,6 +26,34 @@ import (
 
 const millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
 
+type purgerMetrics struct {
+	deleteRequestsProcessedTotal      *prometheus.CounterVec
+	deleteRequestsChunksSelectedTotal *prometheus.CounterVec
+	deleteRequestsProcessingFailures  *prometheus.CounterVec
+}
+
+func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
+	m := purgerMetrics{}
+
+	m.deleteRequestsProcessedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "purger_delete_requests_processed_total",
+		Help:      "Number of delete requests processed per user",
+	}, []string{"user"})
+	m.deleteRequestsChunksSelectedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "purger_delete_requests_chunks_selected_total",
+		Help:      "Number of chunks selected while building delete plans per user",
+	}, []string{"user"})
+	m.deleteRequestsProcessingFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "purger_delete_requests_processing_failures_total",
+		Help:      "Number of delete requests processing failures per user",
+	}, []string{"user"})
+
+	return &m
+}
+
 type deleteRequestWithLogger struct {
 	DeleteRequest
 	logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this
@@ -58,6 +88,7 @@ type DataPurger struct {
 	deleteStore  *DeleteStore
 	chunkStore   chunk.Store
 	objectClient chunk.ObjectClient
+	metrics      *purgerMetrics
 
 	executePlansChan chan deleteRequestWithLogger
 	workerJobChan    chan workerJob
@@ -74,7 +105,7 @@ type DataPurger struct {
 }
 
 // NewDataPurger creates a new DataPurger
-func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*DataPurger, error) {
+func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient, registerer prometheus.Registerer) (*DataPurger, error) {
 	util.WarnExperimentalUse("Delete series API")
 
 	dataPurger := DataPurger{
@@ -82,6 +113,7 @@ func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store,
 		deleteStore:         deleteStore,
 		chunkStore:          chunkStore,
 		objectClient:        storageClient,
+		metrics:             newPurgerMetrics(registerer),
 		executePlansChan:    make(chan deleteRequestWithLogger, 50),
 		workerJobChan:       make(chan workerJob, 50),
 		inProcessRequestIDs: map[string]string{},
@@ -140,6 +172,7 @@ func (dp *DataPurger) workerJobCleanup(job workerJob) {
 			level.Error(job.logger).Log("msg", "error updating delete request status to process", "err", err)
 		}
 
+		dp.metrics.deleteRequestsProcessedTotal.WithLabelValues(job.userID).Inc()
 		delete(dp.pendingPlansCount, job.deleteRequestID)
 		dp.pendingPlansCountMtx.Unlock()
 
@@ -182,6 +215,7 @@ func (dp *DataPurger) worker() {
 	for job := range dp.workerJobChan {
 		err := dp.executePlan(job.userID, job.deleteRequestID, job.planNo, job.logger)
 		if err != nil {
+			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(job.userID).Inc()
 			level.Error(job.logger).Log("msg", "error executing delete plan",
 				"plan_no", job.planNo, "err", err)
 			continue
@@ -267,7 +301,9 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
 		err := dp.buildDeletePlan(req)
 		if err != nil {
+			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
 			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
+			continue
 		}
 
 		level.Info(req.logger).Log("msg", "sending delete request for execution")
@@ -329,6 +365,8 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 
 		err := dp.buildDeletePlan(req)
 		if err != nil {
+			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
+
 			// We do not want to remove this delete request from inProcessRequestIDs to make sure
 			// we do not move multiple deleting requests in deletion process.
 			// None of the other delete requests from the user would be considered for processing until then.
@@ -355,6 +393,8 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 	level.Info(req.logger).Log("msg", "building delete plan", "num_plans", len(perDayTimeRange))
 
 	plans := make([][]byte, len(perDayTimeRange))
+	includedChunkIDs := map[string]struct{}{}
+
 	for i, planRange := range perDayTimeRange {
 		chunksGroups := []ChunksGroup{}
 
@@ -364,13 +404,17 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 				return err
 			}
 
-			// ToDo: remove duplicate chunks
 			chunks, err := dp.chunkStore.Get(ctx, req.UserID, planRange.Start, planRange.End, matchers...)
 			if err != nil {
 				return err
 			}
 
-			chunksGroups = append(chunksGroups, groupChunks(chunks, req.StartTime, req.EndTime)...)
+			var cg []ChunksGroup
+			cg, includedChunkIDs = groupChunks(chunks, req.StartTime, req.EndTime, includedChunkIDs)
+
+			if len(cg) != 0 {
+				chunksGroups = append(chunksGroups, cg...)
+			}
 		}
 
 		plan := DeletePlan{
@@ -399,6 +443,8 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 		return err
 	}
 
+	dp.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(req.UserID).Add(float64(len(includedChunkIDs)))
+
 	level.Info(req.logger).Log("msg", "built delete plans", "num_plans", len(perDayTimeRange))
 
 	return nil
@@ -482,10 +528,15 @@ func numPlans(start, end model.Time) int {
 
 // groups chunks together by unique label sets i.e all the chunks with same labels would be stored in a group
 // chunk details are stored in groups for each unique label set to avoid storing them repetitively for each chunk
-func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time) []ChunksGroup {
+func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time, includedChunkIDs map[string]struct{}) ([]ChunksGroup, map[string]struct{}) {
 	metricToChunks := make(map[string]ChunksGroup)
 
 	for _, chk := range chunks {
+		chunkID := chk.ExternalKey()
+
+		if _, ok := includedChunkIDs[chunkID]; ok {
+			continue
+		}
 		// chunk.Metric are assumed to be sorted which should give same value from String() for same series.
 		// If they stop being sorted then in the worst case we would lose the benefit of grouping chunks to avoid storing labels repetitively.
 		metricString := chk.Metric.String()
@@ -494,7 +545,7 @@ func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time) []C
 			group = ChunksGroup{Labels: client.FromLabelsToLabelAdapters(chk.Metric)}
 		}
 
-		chunkDetails := ChunkDetails{ID: chk.ExternalKey()}
+		chunkDetails := ChunkDetails{ID: chunkID}
 
 		if deleteFrom > chk.From || deleteThrough < chk.Through {
 			partiallyDeletedInterval := Interval{StartTimestampMs: int64(chk.From), EndTimestampMs: int64(chk.Through)}
@@ -510,6 +561,7 @@ func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time) []C
 		}
 
 		group.Chunks = append(group.Chunks, chunkDetails)
+		includedChunkIDs[chunkID] = struct{}{}
 		metricToChunks[metricString] = group
 	}
 
@@ -519,7 +571,7 @@ func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time) []C
 		chunksGroups = append(chunksGroups, group)
 	}
 
-	return chunksGroups
+	return chunksGroups, includedChunkIDs
 }
 
 func isMissingChunkErr(err error) bool {
diff --git a/purger/purger_test.go b/purger/purger_test.go
index c91c0f020b2a0..fbdf2eef6aefd 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -53,7 +53,7 @@ func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.Object
 	var cfg Config
 	flagext.DefaultValues(&cfg)
 
-	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient)
+	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, nil)
 	require.NoError(t, err)
 
 	return deleteStore, chunkStore, storageClient, dataPurger
@@ -149,6 +149,15 @@ var purgePlanTestCases = []struct {
 		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(modelTimeDay.Add(-30 * time.Minute)),
 			EndTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute))},
 	},
+	{
+		name:                   "building multi-day chunk and deleting part of it for each day",
+		chunkStoreDataInterval: model.Interval{Start: modelTimeDay.Add(-30 * time.Minute), End: modelTimeDay.Add(30 * time.Minute)},
+		deleteRequestInterval:  model.Interval{Start: modelTimeDay.Add(-15 * time.Minute), End: modelTimeDay.Add(15 * time.Minute)},
+		expectedNumberOfPlans:  2,
+		numChunksToDelete:      1,
+		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute)),
+			EndTimestampMs: int64(modelTimeDay.Add(15 * time.Minute))},
+	},
 }
 
 func TestDataPurger_BuildPlan(t *testing.T) {
@@ -327,7 +336,7 @@ func TestDataPurger_Restarts(t *testing.T) {
 	// create a new purger to check whether it picks up in process delete requests
 	var cfg Config
 	flagext.DefaultValues(&cfg)
-	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient)
+	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, nil)
 	require.NoError(t, err)
 
 	// load in process delete requests by calling Run
diff --git a/purger/request_handler.go b/purger/request_handler.go
index 0ec5bd35aac5b..8a933a540025c 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -5,22 +5,42 @@ import (
 	"fmt"
 	"net/http"
 
-	"github.com/cortexproject/cortex/pkg/util"
-
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/util"
 )
 
+type deleteRequestHandlerMetrics struct {
+	deleteRequestsReceivedTotal *prometheus.CounterVec
+}
+
+func newDeleteRequestHandlerMetrics(r prometheus.Registerer) *deleteRequestHandlerMetrics {
+	m := deleteRequestHandlerMetrics{}
+
+	m.deleteRequestsReceivedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "purger_delete_requests_received_total",
+		Help:      "Number of delete requests received per user",
+	}, []string{"user"})
+
+	return &m
+}
+
 // DeleteRequestHandler provides handlers for delete requests
 type DeleteRequestHandler struct {
 	deleteStore *DeleteStore
+	metrics     *deleteRequestHandlerMetrics
 }
 
 // NewDeleteRequestHandler creates a DeleteRequestHandler
-func NewDeleteRequestHandler(deleteStore *DeleteStore) *DeleteRequestHandler {
+func NewDeleteRequestHandler(deleteStore *DeleteStore, registerer prometheus.Registerer) *DeleteRequestHandler {
 	deleteMgr := DeleteRequestHandler{
 		deleteStore: deleteStore,
+		metrics:     newDeleteRequestHandlerMetrics(registerer),
 	}
 
 	return &deleteMgr
@@ -83,7 +103,10 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 
 	if err := dm.deleteStore.AddDeleteRequest(ctx, userID, model.Time(startTime), model.Time(endTime), match); err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
 	}
+
+	dm.metrics.deleteRequestsReceivedTotal.WithLabelValues(userID).Inc()
 }
 
 // GetAllDeleteRequestsHandler handles get all delete requests

From 1904b150d155a7e6e6c2ce6594288e7598f10036 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 22 Apr 2020 21:07:33 +0530
Subject: [PATCH 508/660] return directories in respose from List calls to
 object stores (#2494)

* return directories in respose from List calls to object stores

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/s3_storage_client.go       | 13 +++++---
 azure/blob_storage_client.go   | 16 ++++++---
 gcp/gcs_object_client.go       | 13 ++++----
 inmemory_storage_client.go     |  4 +--
 local/fs_object_client.go      | 19 +++++++----
 local/fs_object_client_test.go | 61 +++++++++++++++++++++++++++++++++-
 purger/purger_test.go          |  2 +-
 storage_client.go              |  6 +++-
 8 files changed, 107 insertions(+), 27 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index f9fc0c5401141..9744d3bf0da3b 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -168,9 +168,10 @@ func (a *S3ObjectClient) PutObject(ctx context.Context, objectKey string, object
 	})
 }
 
-// List only objects from the store non-recursively
-func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+// List objects and common-prefixes i.e synthetic directories from the store non-recursively
+func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
+	var commonPrefixes []chunk.StorageCommonPrefix
 
 	for i := range a.bucketNames {
 		err := instrument.CollectedRequest(ctx, "S3.List", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
@@ -193,6 +194,10 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 					})
 				}
 
+				for _, commonPrefix := range output.CommonPrefixes {
+					commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(commonPrefix.String()))
+				}
+
 				if !*output.IsTruncated {
 					// No more results to fetch
 					break
@@ -205,9 +210,9 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 		})
 
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 	}
 
-	return storageObjects, nil
+	return storageObjects, commonPrefixes, nil
 }
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index e38ca50e572bb..fd856302ed473 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -167,18 +167,19 @@ func (b *BlobStorage) newPipeline() (pipeline.Pipeline, error) {
 	}), nil
 }
 
-// List only objects from the store non-recursively
-func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+// List objects and common-prefixes i.e synthetic directories from the store non-recursively
+func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
+	var commonPrefixes []chunk.StorageCommonPrefix
 
 	for marker := (azblob.Marker{}); marker.NotDone(); {
 		if ctx.Err() != nil {
-			return nil, ctx.Err()
+			return nil, nil, ctx.Err()
 		}
 
 		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, b.delimiter, azblob.ListBlobsSegmentOptions{Prefix: prefix})
 		if err != nil {
-			return nil, err
+			return nil, nil, err
 		}
 
 		marker = listBlob.NextMarker
@@ -190,9 +191,14 @@ func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageO
 				ModifiedAt: blobInfo.Properties.LastModified,
 			})
 		}
+
+		// Process the BlobPrefixes so called commonPrefixes or synthetic directories in the listed synthetic directory
+		for _, blobPrefix := range listBlob.Segment.BlobPrefixes {
+			commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(blobPrefix.Name))
+		}
 	}
 
-	return storageObjects, nil
+	return storageObjects, commonPrefixes, nil
 }
 
 func (b *BlobStorage) DeleteObject(ctx context.Context, chunkID string) error {
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 49c497c7367dc..2029e9e11b26c 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -107,14 +107,15 @@ func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, objec
 	return nil
 }
 
-// List only objects from the store non-recursively
-func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+// List objects and common-prefixes i.e synthetic directories from the store non-recursively
+func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
+	var commonPrefixes []chunk.StorageCommonPrefix
 
 	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: s.delimiter})
 	for {
 		if ctx.Err() != nil {
-			return nil, ctx.Err()
+			return nil, nil, ctx.Err()
 		}
 
 		attr, err := iter.Next()
@@ -122,12 +123,12 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stor
 			if err == iterator.Done {
 				break
 			}
-			return nil, err
+			return nil, nil, err
 		}
 
 		// When doing query with Delimiter, Prefix is the only field set for entries which represent synthetic "directory entries".
-		// We do not want to consider those entries since we are doing only non-recursive listing of objects for now.
 		if attr.Name == "" {
+			commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(attr.Prefix))
 			continue
 		}
 
@@ -137,7 +138,7 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stor
 		})
 	}
 
-	return storageObjects, nil
+	return storageObjects, commonPrefixes, nil
 }
 
 // DeleteObject deletes the specified object key from the configured GCS bucket. If the
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 80c17678b928c..5a3aed3b6db38 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -374,7 +374,7 @@ func (m *MockStorage) DeleteObject(ctx context.Context, objectKey string) error
 	return nil
 }
 
-func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject, error) {
+func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject, []StorageCommonPrefix, error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
@@ -384,7 +384,7 @@ func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject,
 		storageObjects = append(storageObjects, StorageObject{Key: key})
 	}
 
-	return storageObjects, nil
+	return storageObjects, []StorageCommonPrefix{}, nil
 }
 
 type mockWriteBatch struct {
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 40b081c78052d..1cb066532a45a 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -90,35 +90,40 @@ func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object
 	return fl.Close()
 }
 
-// List only objects from the store non-recursively
-func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, error) {
+// List objects and common-prefixes i.e directories from the store non-recursively
+func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
+	var commonPrefixes []chunk.StorageCommonPrefix
+
 	folderPath := filepath.Join(f.cfg.Directory, prefix)
 
 	_, err := os.Stat(folderPath)
 	if err != nil {
 		if os.IsNotExist(err) {
-			return storageObjects, nil
+			return storageObjects, commonPrefixes, nil
 		}
-		return nil, err
+		return nil, nil, err
 	}
 
 	filesInfo, err := ioutil.ReadDir(folderPath)
 	if err != nil {
-		return nil, err
+		return nil, nil, err
 	}
 
 	for _, fileInfo := range filesInfo {
+		nameWithPrefix := filepath.Join(prefix, fileInfo.Name())
+
 		if fileInfo.IsDir() {
+			commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+chunk.DirDelim))
 			continue
 		}
 		storageObjects = append(storageObjects, chunk.StorageObject{
-			Key:        filepath.Join(prefix, fileInfo.Name()),
+			Key:        nameWithPrefix,
 			ModifiedAt: fileInfo.ModTime(),
 		})
 	}
 
-	return storageObjects, nil
+	return storageObjects, commonPrefixes, nil
 }
 
 func (f *FSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
index 141e61dc5adef..76709b098a096 100644
--- a/local/fs_object_client_test.go
+++ b/local/fs_object_client_test.go
@@ -1,6 +1,7 @@
 package local
 
 import (
+	"bytes"
 	"context"
 	"io/ioutil"
 	"os"
@@ -10,7 +11,7 @@ import (
 	"github.com/stretchr/testify/require"
 )
 
-func TestFsObjectClient_DeleteChunksBefore(t *testing.T) {
+func TestFSObjectClient_DeleteChunksBefore(t *testing.T) {
 	deleteFilesOlderThan := 10 * time.Minute
 
 	fsChunksDir, err := ioutil.TempDir(os.TempDir(), "fs-chunks")
@@ -56,3 +57,61 @@ func TestFsObjectClient_DeleteChunksBefore(t *testing.T) {
 	files, _ = ioutil.ReadDir(".")
 	require.Equal(t, 1, len(files), "Number of files should be 1 after enforcing retention")
 }
+
+func TestFSObjectClient_List(t *testing.T) {
+	fsObjectsDir, err := ioutil.TempDir(os.TempDir(), "fs-objects")
+	require.NoError(t, err)
+
+	bucketClient, err := NewFSObjectClient(FSConfig{
+		Directory: fsObjectsDir,
+	})
+	require.NoError(t, err)
+
+	defer func() {
+		require.NoError(t, os.RemoveAll(fsObjectsDir))
+	}()
+
+	foldersWithFiles := make(map[string][]string)
+	foldersWithFiles["folder1/"] = []string{"file1", "file2"}
+	foldersWithFiles["folder2/"] = []string{"file3", "file4", "file5"}
+
+	for folder, files := range foldersWithFiles {
+		for _, filename := range files {
+			err := bucketClient.PutObject(context.Background(), folder+filename, bytes.NewReader([]byte(filename)))
+			require.NoError(t, err)
+		}
+	}
+
+	files := []string{"outer-file1", "outer-file2"}
+
+	for _, fl := range files {
+		err := bucketClient.PutObject(context.Background(), fl, bytes.NewReader([]byte(fl)))
+		require.NoError(t, err)
+	}
+
+	storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), "")
+	require.NoError(t, err)
+
+	require.Len(t, storageObjects, len(files))
+	for i := range storageObjects {
+		require.Equal(t, storageObjects[i].Key, files[i])
+	}
+
+	require.Len(t, commonPrefixes, len(foldersWithFiles))
+	for _, commonPrefix := range commonPrefixes {
+		_, ok := foldersWithFiles[string(commonPrefix)]
+		require.True(t, ok)
+	}
+
+	for folder, files := range foldersWithFiles {
+		storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), folder)
+		require.NoError(t, err)
+
+		require.Len(t, storageObjects, len(files))
+		for i := range storageObjects {
+			require.Equal(t, storageObjects[i].Key, folder+files[i])
+		}
+
+		require.Len(t, commonPrefixes, 0)
+	}
+}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index fbdf2eef6aefd..e46f2755f235d 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -189,7 +189,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 				require.NoError(t, err)
 				planPath := fmt.Sprintf("%s:%s/", userID, deleteRequest.RequestID)
 
-				plans, err := storageClient.List(context.Background(), planPath)
+				plans, _, err := storageClient.List(context.Background(), planPath)
 				require.NoError(t, err)
 				require.Equal(t, tc.expectedNumberOfPlans, len(plans))
 
diff --git a/storage_client.go b/storage_client.go
index 85c4babddca38..6797e1493f04a 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -65,7 +65,7 @@ type ReadBatchIterator interface {
 type ObjectClient interface {
 	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
-	List(ctx context.Context, prefix string) ([]StorageObject, error)
+	List(ctx context.Context, prefix string) ([]StorageObject, []StorageCommonPrefix, error)
 	DeleteObject(ctx context.Context, objectKey string) error
 	Stop()
 }
@@ -75,3 +75,7 @@ type StorageObject struct {
 	Key        string
 	ModifiedAt time.Time
 }
+
+// StorageCommonPrefix represents a common prefix aka a synthetic directory in Object Store.
+// It is guaranteed to always end with DirDelim
+type StorageCommonPrefix string

From 1ec418062ea78201c60dfaf9832ca53f1bbf96c3 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 22 Apr 2020 16:50:09 +0100
Subject: [PATCH 509/660] Shortcut DoParallelQueries for single-query case
 (#2280)

* Refactor: simplify query batching by removing adaptor function

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Shortcut DoParallelQueries for single-query case

Single query case is very common, e.g. for series-to-chunks lookup in
schema v9.

Shortcutting avoids the creation of two goroutines and associated data
structures, and removes an uninteresting tracing span.

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Clarify CHANGELOG entry

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Simplify by using existing Callback type

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Fix up type signatures where chunk_util.Callback is used

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go |  4 ++--
 cassandra/storage_client.go    |  4 ++--
 gcp/bigtable_index_client.go   |  4 ++--
 local/boltdb_index_client.go   |  6 +++---
 util/util.go                   | 21 ++++++++++-----------
 5 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index d4395038c4ad1..a39a0ef422051 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -287,7 +287,7 @@ func (a dynamoDBStorageClient) QueryPages(ctx context.Context, queries []chunk.I
 	return chunk_util.DoParallelQueries(ctx, a.query, queries, callback)
 }
 
-func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery, callback chunk_util.Callback) error {
 	input := &dynamodb.QueryInput{
 		TableName: aws.String(query.TableName),
 		KeyConditions: map[string]*dynamodb.Condition{
@@ -341,7 +341,7 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 			return err
 		}
 
-		if !callback(response) {
+		if !callback(query, response) {
 			if err != nil {
 				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
 			}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index ce2b662cd9fc0..34480c595b6b7 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -276,7 +276,7 @@ func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQue
 	return util.DoParallelQueries(ctx, s.query, queries, callback)
 }
 
-func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback util.Callback) error {
 	var q *gocql.Query
 
 	switch {
@@ -313,7 +313,7 @@ func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 		if err := scanner.Scan(&b.rangeValue, &b.value); err != nil {
 			return errors.WithStack(err)
 		}
-		if !callback(b) {
+		if !callback(query, b) {
 			return nil
 		}
 	}
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 9e2628825e6bc..f5822bdc5a7e9 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -317,7 +317,7 @@ func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQ
 	return chunk_util.DoParallelQueries(ctx, s.query, queries, callback)
 }
 
-func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback func(result chunk.ReadBatch) (shouldContinue bool)) error {
+func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback chunk_util.Callback) error {
 	const null = string('\xff')
 
 	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
@@ -346,7 +346,7 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 
 	err := table.ReadRows(ctx, rowRange, func(r bigtable.Row) bool {
 		if query.ValueEqual == nil || bytes.Equal(r[columnFamily][0].Value, query.ValueEqual) {
-			return callback(&rowBatch{
+			return callback(query, &rowBatch{
 				row: r,
 			})
 		}
diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index b9aadd338219c..8dad54ee5e100 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -228,7 +228,7 @@ func (b *BoltIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQ
 	return chunk_util.DoParallelQueries(ctx, b.query, queries, callback)
 }
 
-func (b *BoltIndexClient) query(ctx context.Context, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
+func (b *BoltIndexClient) query(ctx context.Context, query chunk.IndexQuery, callback chunk_util.Callback) error {
 	db, err := b.GetDB(query.TableName, DBOperationRead)
 	if err != nil {
 		if err == ErrUnexistentBoltDB {
@@ -241,7 +241,7 @@ func (b *BoltIndexClient) query(ctx context.Context, query chunk.IndexQuery, cal
 	return b.QueryDB(ctx, db, query, callback)
 }
 
-func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk.IndexQuery, callback func(chunk.ReadBatch) (shouldContinue bool)) error {
+func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	var start []byte
 	if len(query.RangeValuePrefix) > 0 {
 		start = []byte(query.HashValue + separator + string(query.RangeValuePrefix))
@@ -276,7 +276,7 @@ func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk
 
 			batch.rangeValue = k[len(rowPrefix):]
 			batch.value = v
-			if !callback(&batch) {
+			if !callback(query, &batch) {
 				break
 			}
 		}
diff --git a/util/util.go b/util/util.go
index 17ce7830951a1..64ae96a969e4a 100644
--- a/util/util.go
+++ b/util/util.go
@@ -12,11 +12,11 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
+// Callback from an IndexQuery.
+type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
+
 // DoSingleQuery is the interface for indexes that don't support batching yet.
-type DoSingleQuery func(
-	ctx context.Context, query chunk.IndexQuery,
-	callback func(chunk.ReadBatch) bool,
-) error
+type DoSingleQuery func(context.Context, chunk.IndexQuery, Callback) error
 
 // QueryParallelism is the maximum number of subqueries run in
 // parallel per higher-level query
@@ -26,8 +26,12 @@ var QueryParallelism = 100
 // and indexes that don't yet support batching.
 func DoParallelQueries(
 	ctx context.Context, doSingleQuery DoSingleQuery, queries []chunk.IndexQuery,
-	callback func(chunk.IndexQuery, chunk.ReadBatch) bool,
+	callback Callback,
 ) error {
+	if len(queries) == 1 {
+		return doSingleQuery(ctx, queries[0], callback)
+	}
+
 	queue := make(chan chunk.IndexQuery)
 	incomingErrors := make(chan error)
 	n := util.Min(len(queries), QueryParallelism)
@@ -41,9 +45,7 @@ func DoParallelQueries(
 				if !ok {
 					return
 				}
-				incomingErrors <- doSingleQuery(ctx, query, func(r chunk.ReadBatch) bool {
-					return callback(query, r)
-				})
+				incomingErrors <- doSingleQuery(ctx, query, callback)
 			}
 		}()
 	}
@@ -67,9 +69,6 @@ func DoParallelQueries(
 	return lastErr
 }
 
-// Callback from an IndexQuery.
-type Callback func(chunk.IndexQuery, chunk.ReadBatch) bool
-
 type filteringBatch struct {
 	query chunk.IndexQuery
 	chunk.ReadBatch

From c681ee4f58590e49490ddf9ad4b35cdfe360e246 Mon Sep 17 00:00:00 2001
From: Ed Welch <edward.welch@grafana.com>
Date: Wed, 22 Apr 2020 12:02:23 -0400
Subject: [PATCH 510/660] Add a counter to track chunks which were
 de-duplicated at store time. (#2485)

* Add a counter to track chunks which were de-duplicated at store time.

Signed-off-by: Ed Welch <edward.welch@grafana.com>

* update changelog

Signed-off-by: Ed Welch <edward.welch@grafana.com>

* fix white noise lint

Signed-off-by: Ed Welch <edward.welch@grafana.com>
---
 series_store.go | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/series_store.go b/series_store.go
index 3c94143c073cf..1495d6579fb15 100644
--- a/series_store.go
+++ b/series_store.go
@@ -61,6 +61,11 @@ var (
 		// For 100k series for 7 week, could be 1.2m - 10*(8^(7-1)) = 2.6m.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 7),
 	})
+	dedupedChunksTotal = promauto.NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "chunk_store_deduped_chunks_total",
+		Help:      "Count of chunks which were not stored because they have already been stored by another replica.",
+	})
 )
 
 // seriesStore implements Store
@@ -414,6 +419,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 	// If this chunk is in cache it must already be in the database so we don't need to write it again
 	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
 	if len(found) > 0 {
+		dedupedChunksTotal.Inc()
 		return nil
 	}
 

From 8a9f4ff2a18ee0a37c9744171781f6b080e75367 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 24 Apr 2020 09:58:41 -0400
Subject: [PATCH 511/660] Adds object storage client for Swift OpenStack
 (#2440)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Implement swift storage client.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Pass nil registrerer.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make lint happy.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 openstack/swift_object_client.go | 178 +++++++++++++++++++++++++++++++
 storage/factory.go               |  10 ++
 2 files changed, 188 insertions(+)
 create mode 100644 openstack/swift_object_client.go

diff --git a/openstack/swift_object_client.go b/openstack/swift_object_client.go
new file mode 100644
index 0000000000000..a30dfbbbfd70b
--- /dev/null
+++ b/openstack/swift_object_client.go
@@ -0,0 +1,178 @@
+package openstack
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"io"
+	"io/ioutil"
+
+	"github.com/ncw/swift"
+	thanos "github.com/thanos-io/thanos/pkg/objstore/swift"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+type SwiftObjectClient struct {
+	conn      *swift.Connection
+	cfg       SwiftConfig
+	delimiter rune
+}
+
+// SwiftConfig is config for the Swift Chunk Client.
+type SwiftConfig struct {
+	thanos.SwiftConfig `yaml:",inline"`
+}
+
+// RegisterFlags registers flags.
+func (cfg *SwiftConfig) RegisterFlags(f *flag.FlagSet) {
+	cfg.RegisterFlagsWithPrefix("", f)
+}
+
+// Validate config and returns error on failure
+func (cfg *SwiftConfig) Validate() error {
+	return nil
+}
+
+// RegisterFlagsWithPrefix registers flags with prefix.
+func (cfg *SwiftConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&cfg.ContainerName, prefix+"swift.container-name", "cortex", "Name of the Swift container to put chunks in.")
+	f.StringVar(&cfg.DomainName, prefix+"swift.domain-name", "", "Openstack user's domain name.")
+	f.StringVar(&cfg.DomainId, prefix+"swift.domain-id", "", "Openstack user's domain id.")
+	f.StringVar(&cfg.UserDomainName, prefix+"swift.user-domain-name", "", "Openstack user's domain name.")
+	f.StringVar(&cfg.UserDomainID, prefix+"swift.user-domain-id", "", "Openstack user's domain id.")
+	f.StringVar(&cfg.Username, prefix+"swift.username", "", "Openstack username for the api.")
+	f.StringVar(&cfg.UserId, prefix+"swift.user-id", "", "Openstack userid for the api.")
+	f.StringVar(&cfg.Password, prefix+"swift.password", "", "Openstack api key.")
+	f.StringVar(&cfg.AuthUrl, prefix+"swift.auth-url", "", "Openstack authentication URL.")
+	f.StringVar(&cfg.RegionName, prefix+"swift.region-name", "", "Openstack Region to use eg LON, ORD - default is use first region (v2,v3 auth only)")
+	f.StringVar(&cfg.ProjectName, prefix+"swift.project-name", "", "Openstack project name (v2,v3 auth only).")
+	f.StringVar(&cfg.ProjectID, prefix+"swift.project-id", "", "Openstack project id (v2,v3 auth only).")
+	f.StringVar(&cfg.ProjectDomainName, prefix+"swift.project-domain-name", "", "Name of the project's domain (v3 auth only), only needed if it differs from the user domain.")
+	f.StringVar(&cfg.ProjectDomainID, prefix+"swift.project-domain-id", "", "Id of the project's domain (v3 auth only), only needed if it differs the from user domain.")
+}
+
+// NewSwiftObjectClient makes a new chunk.Client that writes chunks to OpenStack Swift.
+func NewSwiftObjectClient(cfg SwiftConfig, delimiter string) (*SwiftObjectClient, error) {
+	util.WarnExperimentalUse("OpenStack Swift Storage")
+
+	// Create a connection
+	c := &swift.Connection{
+		AuthUrl:  cfg.AuthUrl,
+		ApiKey:   cfg.Password,
+		UserName: cfg.Username,
+		UserId:   cfg.UserId,
+
+		TenantId:       cfg.ProjectID,
+		Tenant:         cfg.ProjectName,
+		TenantDomain:   cfg.ProjectDomainName,
+		TenantDomainId: cfg.ProjectDomainID,
+
+		Domain:   cfg.DomainName,
+		DomainId: cfg.DomainId,
+
+		Region: cfg.RegionName,
+	}
+
+	switch {
+	case cfg.UserDomainName != "":
+		c.Domain = cfg.UserDomainName
+	case cfg.UserDomainID != "":
+		c.DomainId = cfg.UserDomainID
+	}
+
+	if len(delimiter) > 1 {
+		return nil, fmt.Errorf("delimiter must be a single character but was %s", delimiter)
+	}
+	var delim rune
+	if len(delimiter) != 0 {
+		delim = []rune(delimiter)[0]
+	}
+
+	// Authenticate
+	err := c.Authenticate()
+	if err != nil {
+		return nil, err
+	}
+
+	// Ensure the container is created, no error is returned if it already exists.
+	if err := c.ContainerCreate(cfg.ContainerName, nil); err != nil {
+		return nil, err
+	}
+
+	return &SwiftObjectClient{
+		conn:      c,
+		cfg:       cfg,
+		delimiter: delim,
+	}, nil
+}
+
+func (s *SwiftObjectClient) Stop() {
+	s.conn.UnAuthenticate()
+}
+
+// GetObject returns a reader for the specified object key from the configured swift container. If the
+// key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
+func (s *SwiftObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	var buf bytes.Buffer
+	_, err := s.conn.ObjectGet(s.cfg.ContainerName, objectKey, &buf, false, nil)
+	if err != nil {
+		if err == swift.ObjectNotFound {
+			return nil, chunk.ErrStorageObjectNotFound
+		}
+		return nil, err
+	}
+
+	return ioutil.NopCloser(&buf), nil
+}
+
+// PutObject puts the specified bytes into the configured Swift container at the provided key
+func (s *SwiftObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
+	_, err := s.conn.ObjectPut(s.cfg.ContainerName, objectKey, object, false, "", "", nil)
+	return err
+}
+
+// List only objects from the store non-recursively
+func (s *SwiftObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+	objs, err := s.conn.Objects(s.cfg.ContainerName, &swift.ObjectsOpts{
+		Prefix:    prefix,
+		Delimiter: s.delimiter,
+	})
+	if err != nil {
+		return nil, nil, err
+	}
+
+	var storageObjects []chunk.StorageObject
+	var storagePrefixes []chunk.StorageCommonPrefix
+
+	for _, obj := range objs {
+		// based on the docs when subdir is set, it means it's a pseudo directory.
+		// see https://docs.openstack.org/swift/latest/api/pseudo-hierarchical-folders-directories.html
+		if obj.SubDir != "" {
+			storagePrefixes = append(storagePrefixes, chunk.StorageCommonPrefix(obj.SubDir))
+			continue
+		}
+
+		storageObjects = append(storageObjects, chunk.StorageObject{
+			Key:        obj.Name,
+			ModifiedAt: obj.LastModified,
+		})
+	}
+
+	return storageObjects, storagePrefixes, nil
+}
+
+// DeleteObject deletes the specified object key from the configured Swift container. If the
+// key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
+func (s *SwiftObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
+	err := s.conn.ObjectDelete(s.cfg.ContainerName, objectKey)
+	if err == nil {
+		return nil
+	}
+	if err == swift.ObjectNotFound {
+		return chunk.ErrStorageObjectNotFound
+	}
+	return err
+}
diff --git a/storage/factory.go b/storage/factory.go
index 99d53c5b15d4f..30c91c5bae9c8 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -19,6 +19,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
+	"github.com/cortexproject/cortex/pkg/chunk/openstack"
 	"github.com/cortexproject/cortex/pkg/chunk/purger"
 	"github.com/cortexproject/cortex/pkg/util"
 )
@@ -65,6 +66,7 @@ type Config struct {
 	CassandraStorageConfig cassandra.Config        `yaml:"cassandra"`
 	BoltDBConfig           local.BoltDBConfig      `yaml:"boltdb"`
 	FSConfig               local.FSConfig          `yaml:"filesystem"`
+	Swift                  openstack.SwiftConfig   `yaml:"swift"`
 
 	IndexCacheValidity time.Duration `yaml:"index_cache_validity"`
 
@@ -83,6 +85,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.BoltDBConfig.RegisterFlags(f)
 	cfg.FSConfig.RegisterFlags(f)
 	cfg.DeleteStoreConfig.RegisterFlags(f)
+	cfg.Swift.RegisterFlags(f)
 
 	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or tsdb. Be aware tsdb is experimental and shouldn't be used in production.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
@@ -97,6 +100,9 @@ func (cfg *Config) Validate() error {
 	if err := cfg.CassandraStorageConfig.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Cassandra Storage config")
 	}
+	if err := cfg.Swift.Validate(); err != nil {
+		return errors.Wrap(err, "invalid Swift Storage config")
+	}
 	return nil
 }
 
@@ -221,6 +227,8 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
 		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim))
+	case "swift":
+		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim))
 	case "cassandra":
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":
@@ -290,6 +298,8 @@ func NewObjectClient(name string, cfg Config) (chunk.ObjectClient, error) {
 		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim)
 	case "azure":
 		return azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim)
+	case "swift":
+		return openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim)
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "filesystem":

From a1d518dc0cae63095d43a3576733d55f3c536953 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 27 Apr 2020 21:19:38 +0530
Subject: [PATCH 512/660] cache invalidation using cache gen numbers to
 invalidate only specific users data (#2279)

* cache invalidation using cache gen numbers to invalidate only specific users data

using a middleware, all cache keys are prefixed with a gen number.
gen numbers are specific to users and are incremented whenever a new request comes in or
request is processed i.e requested data got deleted.

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* not caching results in querier frontend when there is inconsistency in cache generation numbers

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* use delete requests table for storing cache gen numbers and other changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* check results cache gen numbers directly from delete requests store in frontend.

compares gen numbers from response headers and store to decide whether we need to cache results or not.
results cache would eliminate headers before encoding values to cache.
query sharding to pass back all the headers for sharded queries.

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* avoid throwing error while failing to load cache gen numbers

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix merge conflicts

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 cache/cache_gen.go              |  89 +++++++++++++++++++++++
 chunk_store_test.go             |   2 +-
 composite_store.go              |  57 +++++++++------
 composite_store_test.go         |   6 +-
 purger/delete_requests_store.go | 111 +++++++++++++++++++++++------
 purger/tombstones.go            | 121 +++++++++++++++++++++++++++++---
 purger/tombstones_test.go       |   2 +-
 storage/factory.go              |   9 ++-
 storage/factory_test.go         |   2 +-
 testutils/testutils.go          |   2 +-
 10 files changed, 340 insertions(+), 61 deletions(-)
 create mode 100644 cache/cache_gen.go

diff --git a/cache/cache_gen.go b/cache/cache_gen.go
new file mode 100644
index 0000000000000..292981c5eb1b4
--- /dev/null
+++ b/cache/cache_gen.go
@@ -0,0 +1,89 @@
+package cache
+
+import (
+	"context"
+)
+
+type contextKey int
+
+// cacheGenContextKey is used for setting a Cache Generation number in context.
+const cacheGenContextKey contextKey = 0
+
+// GenNumMiddleware adds gen number to keys from context. Expected size of gen numbers is upto 2 digits.
+// If we start seeing problems with keys exceeding length limit, we need to look into resetting gen numbers.
+type GenNumMiddleware struct {
+	downstreamCache Cache
+}
+
+// NewCacheGenNumMiddleware creates a new GenNumMiddleware.
+func NewCacheGenNumMiddleware(downstreamCache Cache) Cache {
+	return &GenNumMiddleware{downstreamCache}
+}
+
+// Store adds cache gen number to keys before calling Store method of downstream cache.
+func (c GenNumMiddleware) Store(ctx context.Context, keys []string, buf [][]byte) {
+	keys = addCacheGenNumToCacheKeys(ctx, keys)
+	c.downstreamCache.Store(ctx, keys, buf)
+}
+
+// Fetch adds cache gen number to keys before calling Fetch method of downstream cache.
+// It also removes gen number before responding back with found and missing keys to make sure consumer of response gets to see same keys.
+func (c GenNumMiddleware) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missing []string) {
+	keys = addCacheGenNumToCacheKeys(ctx, keys)
+
+	found, bufs, missing = c.downstreamCache.Fetch(ctx, keys)
+
+	found = removeCacheGenNumFromKeys(ctx, found)
+	missing = removeCacheGenNumFromKeys(ctx, missing)
+
+	return
+}
+
+// Stop calls Stop method of downstream cache.
+func (c GenNumMiddleware) Stop() {
+	c.downstreamCache.Stop()
+}
+
+// InjectCacheGenNumber returns a derived context containing the cache gen.
+func InjectCacheGenNumber(ctx context.Context, cacheGen string) context.Context {
+	return context.WithValue(ctx, interface{}(cacheGenContextKey), cacheGen)
+}
+
+// ExtractCacheGenNumbersFromHeaders gets the cache gen from the context.
+func ExtractCacheGenNumber(ctx context.Context) string {
+	cacheGenNumber, ok := ctx.Value(cacheGenContextKey).(string)
+	if !ok {
+		return ""
+	}
+	return cacheGenNumber
+}
+
+// addCacheGenNumToCacheKeys adds gen number to keys as suffix.
+func addCacheGenNumToCacheKeys(ctx context.Context, keys []string) []string {
+	cacheGen := ExtractCacheGenNumber(ctx)
+	if cacheGen == "" {
+		return keys
+	}
+
+	for i := range keys {
+		keys[i] = cacheGen + keys[i]
+	}
+
+	return keys
+}
+
+// removeCacheGenNumFromKeys removes suffixed gen number from keys.
+func removeCacheGenNumFromKeys(ctx context.Context, keys []string) []string {
+	cacheGen := ExtractCacheGenNumber(ctx)
+	if cacheGen == "" {
+		return keys
+	}
+
+	cacheGenSuffixLen := len(cacheGen) - 1
+
+	for i := range keys {
+		keys[i] = keys[i][cacheGenSuffixLen:]
+	}
+
+	return keys
+}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 34aa72852a28b..fe53db2025393 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -96,7 +96,7 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg Schema
 	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig)
 	require.NoError(t, err)
 
-	store := NewCompositeStore()
+	store := NewCompositeStore(nil)
 	err = store.addSchema(storeCfg, schema, schemaCfg.Configs[0].From.Time, storage, storage, overrides, chunksCache, writeDedupeCache)
 	require.NoError(t, err)
 	return store
diff --git a/composite_store.go b/composite_store.go
index 2dc4b155bb468..659ef65edf6e0 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -18,6 +18,10 @@ type StoreLimits interface {
 	MaxQueryLength(userID string) time.Duration
 }
 
+type CacheGenNumLoader interface {
+	GetStoreCacheGenNumber(userID string) string
+}
+
 // Store for chunks.
 type Store interface {
 	Put(ctx context.Context, chunks []Chunk) error
@@ -44,7 +48,8 @@ type CompositeStore struct {
 }
 
 type compositeStore struct {
-	stores []compositeStoreEntry
+	cacheGenNumLoader CacheGenNumLoader
+	stores            []compositeStoreEntry
 }
 
 type compositeStoreEntry struct {
@@ -54,8 +59,8 @@ type compositeStoreEntry struct {
 
 // NewCompositeStore creates a new Store which delegates to different stores depending
 // on time.
-func NewCompositeStore() CompositeStore {
-	return CompositeStore{}
+func NewCompositeStore(cacheGenNumLoader CacheGenNumLoader) CompositeStore {
+	return CompositeStore{compositeStore{cacheGenNumLoader: cacheGenNumLoader}}
 }
 
 // AddPeriod adds the configuration for a period of time to the CompositeStore
@@ -91,8 +96,8 @@ func (c *CompositeStore) addSchema(storeCfg StoreConfig, schema BaseSchema, star
 
 func (c compositeStore) Put(ctx context.Context, chunks []Chunk) error {
 	for _, chunk := range chunks {
-		err := c.forStores(chunk.From, chunk.Through, func(from, through model.Time, store Store) error {
-			return store.PutOne(ctx, from, through, chunk)
+		err := c.forStores(ctx, chunk.UserID, chunk.From, chunk.Through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+			return store.PutOne(innerCtx, from, through, chunk)
 		})
 		if err != nil {
 			return err
@@ -102,15 +107,15 @@ func (c compositeStore) Put(ctx context.Context, chunks []Chunk) error {
 }
 
 func (c compositeStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
-	return c.forStores(from, through, func(from, through model.Time, store Store) error {
-		return store.PutOne(ctx, from, through, chunk)
+	return c.forStores(ctx, chunk.UserID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		return store.PutOne(innerCtx, from, through, chunk)
 	})
 }
 
 func (c compositeStore) Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]Chunk, error) {
 	var results []Chunk
-	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		chunks, err := store.Get(ctx, userID, from, through, matchers...)
+	err := c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		chunks, err := store.Get(innerCtx, userID, from, through, matchers...)
 		if err != nil {
 			return err
 		}
@@ -123,8 +128,8 @@ func (c compositeStore) Get(ctx context.Context, userID string, from, through mo
 // LabelValuesForMetricName retrieves all label values for a single label name and metric name.
 func (c compositeStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
 	var result UniqueStrings
-	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		labelValues, err := store.LabelValuesForMetricName(ctx, userID, from, through, metricName, labelName)
+	err := c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		labelValues, err := store.LabelValuesForMetricName(innerCtx, userID, from, through, metricName, labelName)
 		if err != nil {
 			return err
 		}
@@ -137,8 +142,8 @@ func (c compositeStore) LabelValuesForMetricName(ctx context.Context, userID str
 // LabelNamesForMetricName retrieves all label names for a metric name.
 func (c compositeStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	var result UniqueStrings
-	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		labelNames, err := store.LabelNamesForMetricName(ctx, userID, from, through, metricName)
+	err := c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		labelNames, err := store.LabelNamesForMetricName(innerCtx, userID, from, through, metricName)
 		if err != nil {
 			return err
 		}
@@ -151,8 +156,8 @@ func (c compositeStore) LabelNamesForMetricName(ctx context.Context, userID stri
 func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error) {
 	chunkIDs := [][]Chunk{}
 	fetchers := []*Fetcher{}
-	err := c.forStores(from, through, func(from, through model.Time, store Store) error {
-		ids, fetcher, err := store.GetChunkRefs(ctx, userID, from, through, matchers...)
+	err := c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		ids, fetcher, err := store.GetChunkRefs(innerCtx, userID, from, through, matchers...)
 		if err != nil {
 			return err
 		}
@@ -166,16 +171,16 @@ func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, t
 
 // DeleteSeriesIDs deletes series IDs from index in series store
 func (c CompositeStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
-	return c.forStores(from, through, func(from, through model.Time, store Store) error {
-		return store.DeleteSeriesIDs(ctx, from, through, userID, metric)
+	return c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		return store.DeleteSeriesIDs(innerCtx, from, through, userID, metric)
 	})
 }
 
 // DeleteChunk deletes a chunks index entry and then deletes the actual chunk from chunk storage.
 // It takes care of chunks which are deleting partially by creating and inserting a new chunk first and then deleting the original chunk
 func (c CompositeStore) DeleteChunk(ctx context.Context, from, through model.Time, userID, chunkID string, metric labels.Labels, partiallyDeletedInterval *model.Interval) error {
-	return c.forStores(from, through, func(from, through model.Time, store Store) error {
-		return store.DeleteChunk(ctx, from, through, userID, chunkID, metric, partiallyDeletedInterval)
+	return c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
+		return store.DeleteChunk(innerCtx, from, through, userID, chunkID, metric, partiallyDeletedInterval)
 	})
 }
 
@@ -185,11 +190,13 @@ func (c compositeStore) Stop() {
 	}
 }
 
-func (c compositeStore) forStores(from, through model.Time, callback func(from, through model.Time, store Store) error) error {
+func (c compositeStore) forStores(ctx context.Context, userID string, from, through model.Time, callback func(innerCtx context.Context, from, through model.Time, store Store) error) error {
 	if len(c.stores) == 0 {
 		return nil
 	}
 
+	ctx = c.injectCacheGen(ctx, userID)
+
 	// first, find the schema with the highest start _before or at_ from
 	i := sort.Search(len(c.stores), func(i int) bool {
 		return c.stores[i].start > from
@@ -228,7 +235,7 @@ func (c compositeStore) forStores(from, through model.Time, callback func(from,
 		}
 
 		end := min(through, nextSchemaStarts-1)
-		err := callback(start, end, c.stores[i].Store)
+		err := callback(ctx, start, end, c.stores[i].Store)
 		if err != nil {
 			return err
 		}
@@ -238,3 +245,11 @@ func (c compositeStore) forStores(from, through model.Time, callback func(from,
 
 	return nil
 }
+
+func (c compositeStore) injectCacheGen(ctx context.Context, userID string) context.Context {
+	if c.cacheGenNumLoader == nil {
+		return ctx
+	}
+
+	return cache.InjectCacheGenNumber(ctx, c.cacheGenNumLoader.GetStoreCacheGenNumber(userID))
+}
diff --git a/composite_store_test.go b/composite_store_test.go
index 4c35ed5d666cc..e1d7a95707a2e 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -52,8 +52,8 @@ func TestCompositeStore(t *testing.T) {
 		from, through model.Time
 		store         Store
 	}
-	collect := func(results *[]result) func(from, through model.Time, store Store) error {
-		return func(from, through model.Time, store Store) error {
+	collect := func(results *[]result) func(_ context.Context, from, through model.Time, store Store) error {
+		return func(_ context.Context, from, through model.Time, store Store) error {
 			*results = append(*results, result{from, through, store})
 			return nil
 		}
@@ -182,7 +182,7 @@ func TestCompositeStore(t *testing.T) {
 	} {
 		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
 			have := []result{}
-			err := tc.cs.forStores(model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
+			err := tc.cs.forStores(context.Background(), userID, model.TimeFromUnix(tc.from), model.TimeFromUnix(tc.through), collect(&have))
 			require.NoError(t, err)
 			if !reflect.DeepEqual(tc.want, have) {
 				t.Fatalf("wrong stores - %s", test.Diff(tc.want, have))
diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index 029fa966c4f5d..1743648db32e3 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -18,7 +18,11 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 )
 
-type DeleteRequestStatus string
+type (
+	DeleteRequestStatus string
+	CacheKind           string
+	indexType           string
+)
 
 const (
 	StatusReceived     DeleteRequestStatus = "received"
@@ -27,6 +31,15 @@ const (
 	StatusProcessed    DeleteRequestStatus = "processed"
 
 	separator = "\000" // separator for series selectors in delete requests
+
+	// CacheKindStore is for cache gen number for store cache
+	CacheKindStore CacheKind = "store"
+	// CacheKindResults is for cache gen number for results cache
+	CacheKindResults CacheKind = "results"
+
+	deleteRequestID      indexType = "1"
+	deleteRequestDetails indexType = "2"
+	cacheGenNum          indexType = "3"
 )
 
 var (
@@ -35,7 +48,7 @@ var (
 	ErrDeleteRequestNotFound = errors.New("could not find matching delete request")
 )
 
-// DeleteRequest holds all the details about a delete request
+// DeleteRequest holds all the details about a delete request.
 type DeleteRequest struct {
 	RequestID string              `json:"request_id"`
 	UserID    string              `json:"-"`
@@ -47,13 +60,18 @@ type DeleteRequest struct {
 	CreatedAt model.Time          `json:"created_at"`
 }
 
-// DeleteStore provides all the methods required to manage lifecycle of delete request and things related to it
+// cacheGenNumbers holds store and results cache gen numbers for a user.
+type cacheGenNumbers struct {
+	store, results string
+}
+
+// DeleteStore provides all the methods required to manage lifecycle of delete request and things related to it.
 type DeleteStore struct {
 	cfg         DeleteStoreConfig
 	indexClient chunk.IndexClient
 }
 
-// DeleteStoreConfig holds configuration for delete store
+// DeleteStoreConfig holds configuration for delete store.
 type DeleteStoreConfig struct {
 	Store             string `yaml:"store"`
 	RequestsTableName string `yaml:"requests_table_name"`
@@ -65,7 +83,7 @@ func (cfg *DeleteStoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.RequestsTableName, "deletes.requests-table-name", "delete_requests", "Name of the table which stores delete requests")
 }
 
-// NewDeleteStore creates a store for managing delete requests
+// NewDeleteStore creates a store for managing delete requests.
 func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*DeleteStore, error) {
 	ds := DeleteStore{
 		cfg:         cfg,
@@ -75,7 +93,7 @@ func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*Dele
 	return &ds, nil
 }
 
-// Add creates entries for a new delete request
+// Add creates entries for a new delete request.
 func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error {
 	requestID := generateUniqueID(userID, selectors)
 
@@ -99,50 +117,63 @@ func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, star
 	// Add an entry with userID, requestID as range key and status as value to make it easy to manage and lookup status
 	// We don't want to set anything in hash key here since we would want to find delete requests by just status
 	writeBatch := ds.indexClient.NewWriteBatch()
-	writeBatch.Add(ds.cfg.RequestsTableName, "", []byte(userIDAndRequestID), []byte(StatusReceived))
+	writeBatch.Add(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(StatusReceived))
 
 	// Add another entry with additional details like creation time, time range of delete request and selectors in value
 	rangeValue := fmt.Sprintf("%x:%x:%x", int64(model.Now()), int64(startTime), int64(endTime))
-	writeBatch.Add(ds.cfg.RequestsTableName, userIDAndRequestID, []byte(rangeValue), []byte(strings.Join(selectors, separator)))
+	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID),
+		[]byte(rangeValue), []byte(strings.Join(selectors, separator)))
+
+	// we update only cache gen number because only query responses are changing at this stage.
+	// we still have to query data from store for doing query time filtering and we don't want to invalidate its results now.
+	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults),
+		nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
 
 	return ds.indexClient.BatchWrite(ctx, writeBatch)
 }
 
-// GetDeleteRequestsByStatus returns all delete requests for given status
+// GetDeleteRequestsByStatus returns all delete requests for given status.
 func (ds *DeleteStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{{TableName: ds.cfg.RequestsTableName, ValueEqual: []byte(status)}})
+	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
+		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), ValueEqual: []byte(status)}})
 }
 
-// GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status
+// GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status.
 func (ds *DeleteStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) {
 	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID), ValueEqual: []byte(status)},
+		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userID), ValueEqual: []byte(status)},
 	})
 }
 
-// GetAllDeleteRequestsForUser returns all delete requests for a user
+// GetAllDeleteRequestsForUser returns all delete requests for a user.
 func (ds *DeleteStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
 	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userID)},
+		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userID)},
 	})
 }
 
-// UpdateStatus updates status of a delete request
+// UpdateStatus updates status of a delete request.
 func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID string, newStatus DeleteRequestStatus) error {
 	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
 
 	writeBatch := ds.indexClient.NewWriteBatch()
-	writeBatch.Add(ds.cfg.RequestsTableName, "", []byte(userIDAndRequestID), []byte(newStatus))
+	writeBatch.Add(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(newStatus))
+
+	if newStatus == StatusProcessed {
+		// we have deleted data from store so invalidate cache only for store since we don't have to do runtime filtering anymore.
+		// we don't have to change cache gen number because we were anyways doing runtime filtering
+		writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindStore), nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
+	}
 
 	return ds.indexClient.BatchWrite(ctx, writeBatch)
 }
 
-// GetDeleteRequest returns delete request with given requestID
+// GetDeleteRequest returns delete request with given requestID.
 func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) {
 	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
 
 	deleteRequests, err := ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, RangeValuePrefix: []byte(userIDAndRequestID)},
+		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userIDAndRequestID)},
 	})
 
 	if err != nil {
@@ -156,7 +187,7 @@ func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID s
 	return &deleteRequests[0], nil
 }
 
-// GetPendingDeleteRequestsForUser returns all delete requests for a user which are not processed
+// GetPendingDeleteRequestsForUser returns all delete requests for a user which are not processed.
 func (ds *DeleteStore) GetPendingDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
 	pendingDeleteRequests := []DeleteRequest{}
 	for _, status := range pendingDeleteRequestStatuses {
@@ -191,7 +222,12 @@ func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []ch
 	}
 
 	for i, deleteRequest := range deleteRequests {
-		deleteRequestQuery := []chunk.IndexQuery{{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s", deleteRequest.UserID, deleteRequest.RequestID)}}
+		deleteRequestQuery := []chunk.IndexQuery{
+			{
+				TableName: ds.cfg.RequestsTableName,
+				HashValue: fmt.Sprintf("%s:%s:%s", deleteRequestDetails, deleteRequest.UserID, deleteRequest.RequestID),
+			},
+		}
 
 		var parseError error
 		err := ds.indexClient.QueryPages(ctx, deleteRequestQuery, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
@@ -222,6 +258,41 @@ func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []ch
 	return deleteRequests, nil
 }
 
+// getCacheGenerationNumbers returns cache gen numbers for a user.
+func (ds *DeleteStore) getCacheGenerationNumbers(ctx context.Context, userID string) (*cacheGenNumbers, error) {
+	storeCacheGen, err := ds.queryCacheGenerationNumber(ctx, userID, CacheKindStore)
+	if err != nil {
+		return nil, err
+	}
+
+	resultsCacheGen, err := ds.queryCacheGenerationNumber(ctx, userID, CacheKindResults)
+	if err != nil {
+		return nil, err
+	}
+
+	return &cacheGenNumbers{storeCacheGen, resultsCacheGen}, nil
+}
+
+func (ds *DeleteStore) queryCacheGenerationNumber(ctx context.Context, userID string, kind CacheKind) (string, error) {
+	query := chunk.IndexQuery{TableName: ds.cfg.RequestsTableName, HashValue: fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, kind)}
+
+	genNumber := ""
+	err := ds.indexClient.QueryPages(ctx, []chunk.IndexQuery{query}, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
+		itr := batch.Iterator()
+		for itr.Next() {
+			genNumber = string(itr.Value())
+			break
+		}
+		return false
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	return genNumber, nil
+}
+
 func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest) (DeleteRequest, error) {
 	hexParts := strings.Split(string(rangeValue), ":")
 	if len(hexParts) != 3 {
diff --git a/purger/tombstones.go b/purger/tombstones.go
index 6741d0d40bfaf..bbe4d04d6114e 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -7,6 +7,9 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
@@ -16,6 +19,28 @@ import (
 
 const tombstonesReloadDuration = 5 * time.Minute
 
+type tombstonesLoaderMetrics struct {
+	cacheGenLoadFailures       prometheus.Counter
+	deleteRequestsLoadFailures prometheus.Counter
+}
+
+func newtombstonesLoaderMetrics(r prometheus.Registerer) *tombstonesLoaderMetrics {
+	m := tombstonesLoaderMetrics{}
+
+	m.cacheGenLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "tombstones_loader_cache_gen_load_failures_total",
+		Help:      "Total number of failures while loading cache generation number using tombstones loader",
+	})
+	m.deleteRequestsLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "tombstones_loader_cache_delete_requests_load_failures_total",
+		Help:      "Total number of failures while loading delete requests using tombstones loader",
+	})
+
+	return &m
+}
+
 // TombstonesSet holds all the pending delete requests for a user
 type TombstonesSet struct {
 	tombstones                               []DeleteRequest
@@ -28,15 +53,21 @@ type TombstonesLoader struct {
 	tombstones    map[string]*TombstonesSet
 	tombstonesMtx sync.RWMutex
 
+	cacheGenNumbers    map[string]*cacheGenNumbers
+	cacheGenNumbersMtx sync.RWMutex
+
 	deleteStore *DeleteStore
+	metrics     *tombstonesLoaderMetrics
 	quit        chan struct{}
 }
 
 // NewTombstonesLoader creates a TombstonesLoader
-func NewTombstonesLoader(deleteStore *DeleteStore) *TombstonesLoader {
+func NewTombstonesLoader(deleteStore *DeleteStore, registerer prometheus.Registerer) *TombstonesLoader {
 	tl := TombstonesLoader{
-		tombstones:  map[string]*TombstonesSet{},
-		deleteStore: deleteStore,
+		tombstones:      map[string]*TombstonesSet{},
+		cacheGenNumbers: map[string]*cacheGenNumbers{},
+		deleteStore:     deleteStore,
+		metrics:         newtombstonesLoaderMetrics(registerer),
 	}
 	go tl.loop()
 
@@ -49,6 +80,10 @@ func (tl *TombstonesLoader) Stop() {
 }
 
 func (tl *TombstonesLoader) loop() {
+	if tl.deleteStore == nil {
+		return
+	}
+
 	tombstonesReloadTimer := time.NewTicker(tombstonesReloadDuration)
 	for {
 		select {
@@ -64,22 +99,42 @@ func (tl *TombstonesLoader) loop() {
 }
 
 func (tl *TombstonesLoader) reloadTombstones() error {
+	updatedGenNumbers := make(map[string]*cacheGenNumbers)
+	tl.cacheGenNumbersMtx.RLock()
+
 	// check for updates in loaded gen numbers
-	tl.tombstonesMtx.Lock()
+	for userID, oldGenNumbers := range tl.cacheGenNumbers {
+		newGenNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
+		if err != nil {
+			return err
+		}
 
-	userIDs := make([]string, 0, len(tl.tombstones))
-	for userID := range tl.tombstones {
-		userIDs = append(userIDs, userID)
+		if *oldGenNumbers != *newGenNumbers {
+			updatedGenNumbers[userID] = newGenNumbers
+		}
 	}
 
-	tl.tombstonesMtx.Unlock()
+	tl.cacheGenNumbersMtx.RUnlock()
+
+	// in frontend we load only cache gen numbers so short circuit here if there are no loaded deleted requests
+	// first call to GetPendingTombstones would avoid doing this.
+	tl.tombstonesMtx.RLock()
+	if len(tl.tombstones) == 0 {
+		tl.tombstonesMtx.RUnlock()
+		return nil
+	}
+	tl.tombstonesMtx.RUnlock()
 
 	// for all the updated gen numbers, reload delete requests
-	for _, userID := range userIDs {
+	for userID, genNumbers := range updatedGenNumbers {
 		err := tl.loadPendingTombstones(userID)
 		if err != nil {
 			return err
 		}
+
+		tl.cacheGenNumbersMtx.Lock()
+		tl.cacheGenNumbers[userID] = genNumbers
+		tl.cacheGenNumbersMtx.Unlock()
 	}
 
 	return nil
@@ -150,7 +205,8 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
 
 	pendingDeleteRequests, err := tl.deleteStore.GetPendingDeleteRequestsForUser(context.Background(), userID)
 	if err != nil {
-		return err
+		tl.metrics.deleteRequestsLoadFailures.Inc()
+		return errors.Wrap(err, "error loading delete requests")
 	}
 
 	tombstoneSet := TombstonesSet{tombstones: pendingDeleteRequests, oldestTombstoneStart: model.Now()}
@@ -161,7 +217,8 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
 			tombstoneSet.tombstones[i].Matchers[j], err = promql.ParseMetricSelector(selector)
 
 			if err != nil {
-				return err
+				tl.metrics.deleteRequestsLoadFailures.Inc()
+				return errors.Wrapf(err, "error parsing metric selector")
 			}
 		}
 
@@ -181,6 +238,48 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
 	return nil
 }
 
+// GetStoreCacheGenNumber returns store cache gen number for a user
+func (tl *TombstonesLoader) GetStoreCacheGenNumber(userID string) string {
+	return tl.getCacheGenNumbers(userID).store
+
+}
+
+// GetResultsCacheGenNumber returns results cache gen number for a user
+func (tl *TombstonesLoader) GetResultsCacheGenNumber(userID string) string {
+	return tl.getCacheGenNumbers(userID).results
+}
+
+func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers {
+	tl.cacheGenNumbersMtx.RLock()
+	if genNumbers, isOK := tl.cacheGenNumbers[userID]; isOK {
+		tl.cacheGenNumbersMtx.RUnlock()
+		return genNumbers
+	}
+
+	tl.cacheGenNumbersMtx.RUnlock()
+
+	if tl.deleteStore == nil {
+		tl.cacheGenNumbersMtx.Lock()
+		defer tl.cacheGenNumbersMtx.Unlock()
+
+		tl.cacheGenNumbers[userID] = &cacheGenNumbers{}
+		return tl.cacheGenNumbers[userID]
+	}
+
+	genNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "error loading cache generation numbers", "err", err)
+		tl.metrics.cacheGenLoadFailures.Inc()
+		return &cacheGenNumbers{}
+	}
+
+	tl.cacheGenNumbersMtx.Lock()
+	defer tl.cacheGenNumbersMtx.Unlock()
+
+	tl.cacheGenNumbers[userID] = genNumbers
+	return genNumbers
+}
+
 // GetDeletedIntervals returns non-overlapping, sorted  deleted intervals.
 func (ts TombstonesSet) GetDeletedIntervals(lbls labels.Labels, from, to model.Time) []model.Interval {
 	if len(ts.tombstones) == 0 || to < ts.oldestTombstoneStart || from > ts.newestTombstoneEnd {
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
index f74b4784b00ac..cace8b766bfb0 100644
--- a/purger/tombstones_test.go
+++ b/purger/tombstones_test.go
@@ -97,7 +97,7 @@ func TestTombstonesLoader(t *testing.T) {
 			deleteStore, err := setupTestDeleteStore()
 			require.NoError(t, err)
 
-			tombstonesLoader := NewTombstonesLoader(deleteStore)
+			tombstonesLoader := NewTombstonesLoader(deleteStore, nil)
 
 			// add delete requests
 			for _, interval := range tc.deleteRequestIntervals {
diff --git a/storage/factory.go b/storage/factory.go
index 30c91c5bae9c8..542f662466e09 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -107,7 +107,7 @@ func (cfg *Config) Validate() error {
 }
 
 // NewStore makes the storage clients based on the configuration.
-func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits, reg prometheus.Registerer) (chunk.Store, error) {
+func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits, reg prometheus.Registerer, cacheGenNumLoader chunk.CacheGenNumLoader) (chunk.Store, error) {
 	chunkMetrics := newChunkClientMetrics(reg)
 
 	indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig)
@@ -133,11 +133,16 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	chunksCache = cache.StopOnce(chunksCache)
 	writeDedupeCache = cache.StopOnce(writeDedupeCache)
 
+	// lets wrap all caches with CacheGenMiddleware to facilitate cache invalidation using cache generation numbers
+	indexReadCache = cache.NewCacheGenNumMiddleware(indexReadCache)
+	chunksCache = cache.NewCacheGenNumMiddleware(chunksCache)
+	writeDedupeCache = cache.NewCacheGenNumMiddleware(writeDedupeCache)
+
 	err = schemaCfg.Load()
 	if err != nil {
 		return nil, errors.Wrap(err, "error loading schema config")
 	}
-	stores := chunk.NewCompositeStore()
+	stores := chunk.NewCompositeStore(cacheGenNumLoader)
 
 	for _, s := range schemaCfg.Configs {
 		index, err := NewIndexClient(s.IndexType, cfg, schemaCfg)
diff --git a/storage/factory_test.go b/storage/factory_test.go
index a2ff89b5b54c7..0b16dc7b2a2ce 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -39,7 +39,7 @@ func TestFactoryStop(t *testing.T) {
 	limits, err := validation.NewOverrides(defaults, nil)
 	require.NoError(t, err)
 
-	store, err := NewStore(cfg, storeConfig, schemaConfig, limits, nil)
+	store, err := NewStore(cfg, storeConfig, schemaConfig, limits, nil, nil)
 	require.NoError(t, err)
 
 	store.Stop()
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 4124a9f629714..5ad5363a77fb9 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -135,7 +135,7 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	var storeCfg chunk.StoreConfig
 	flagext.DefaultValues(&storeCfg)
 
-	store := chunk.NewCompositeStore()
+	store := chunk.NewCompositeStore(nil)
 	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides, cache.NewNoopCache(), cache.NewNoopCache())
 	if err != nil {
 		return nil, err

From a55fd69f2189af4dd4f7064e3717090938d87e0e Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Wed, 29 Apr 2020 06:27:41 -0700
Subject: [PATCH 513/660] allow human-readable memory size in FifoCache config
 (#2527)

* allow human-readable memory size in FifoCache config

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* update CHANGELOG, usage message

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* update tests

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed integration test failure

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* added unit test

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

Co-authored-by: Dmitry Shmulevich <dima@dev.local>
---
 cache/cache.go           |  4 ++++
 cache/fifo_cache.go      | 38 ++++++++++++++++++++++++++---------
 cache/fifo_cache_test.go | 43 ++++++++++++++++++++++++++++++++++++++--
 chunk_store.go           | 10 ++++++++++
 storage/factory.go       |  3 +++
 5 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/cache/cache.go b/cache/cache.go
index a8e7c2c6c2fcc..e400e88a32b27 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -55,6 +55,10 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, description string, f
 	cfg.Prefix = prefix
 }
 
+func (cfg *Config) Validate() error {
+	return cfg.Fifocache.Validate()
+}
+
 // New creates a new Cache using Config.
 func New(cfg Config) (Cache, error) {
 	if cfg.Cache != nil {
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index e8302862a8208..ca331de77e68d 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -8,7 +8,9 @@ import (
 	"time"
 	"unsafe"
 
+	"github.com/dustin/go-humanize"
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
@@ -86,7 +88,7 @@ const (
 
 // FifoCacheConfig holds config for the FifoCache.
 type FifoCacheConfig struct {
-	MaxSizeBytes int           `yaml:"max_size_bytes"`
+	MaxSizeBytes string        `yaml:"max_size_bytes"`
 	MaxSizeItems int           `yaml:"max_size_items"`
 	Validity     time.Duration `yaml:"validity"`
 
@@ -95,20 +97,36 @@ type FifoCacheConfig struct {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (cfg *FifoCacheConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	f.IntVar(&cfg.MaxSizeBytes, prefix+"fifocache.max-size-bytes", 0, description+"Maximum memory size of the cache.")
+	f.StringVar(&cfg.MaxSizeBytes, prefix+"fifocache.max-size-bytes", "", description+"Maximum memory size of the cache in bytes. A unit suffix (KB, MB, GB) may be applied.")
 	f.IntVar(&cfg.MaxSizeItems, prefix+"fifocache.max-size-items", 0, description+"Maximum number of entries in the cache.")
 	f.DurationVar(&cfg.Validity, prefix+"fifocache.duration", 0, description+"The expiry duration for the cache.")
 
 	f.IntVar(&cfg.DeprecatedSize, prefix+"fifocache.size", 0, "Deprecated (use max-size-items or max-size-bytes instead): "+description+"The number of entries to cache. ")
 }
 
+func (cfg *FifoCacheConfig) Validate() error {
+	_, err := parsebytes(cfg.MaxSizeBytes)
+	return err
+}
+
+func parsebytes(s string) (uint64, error) {
+	if len(s) == 0 {
+		return 0, nil
+	}
+	bytes, err := humanize.ParseBytes(s)
+	if err != nil {
+		return 0, errors.Wrap(err, "invalid FifoCache config")
+	}
+	return bytes, nil
+}
+
 // FifoCache is a simple string -> interface{} cache which uses a fifo slide to
 // manage evictions.  O(1) inserts and updates, O(1) gets.
 type FifoCache struct {
 	lock          sync.RWMutex
 	maxSizeItems  int
-	maxSizeBytes  int
-	currSizeBytes int
+	maxSizeBytes  uint64
+	currSizeBytes uint64
 	validity      time.Duration
 
 	entries map[string]*list.Element
@@ -140,14 +158,16 @@ func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 		level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag fifocache.size, use fifocache.max-size-items or fifocache.max-size-bytes instead", "cache", name)
 		cfg.MaxSizeItems = cfg.DeprecatedSize
 	}
-	if cfg.MaxSizeBytes == 0 && cfg.MaxSizeItems == 0 {
+	maxSizeBytes, _ := parsebytes(cfg.MaxSizeBytes)
+
+	if maxSizeBytes == 0 && cfg.MaxSizeItems == 0 {
 		// zero cache capacity - no need to create cache
 		level.Warn(util.Logger).Log("msg", "neither fifocache.max-size-bytes nor fifocache.max-size-items is set", "cache", name)
 		return nil
 	}
 	return &FifoCache{
 		maxSizeItems: cfg.MaxSizeItems,
-		maxSizeBytes: cfg.MaxSizeBytes,
+		maxSizeBytes: maxSizeBytes,
 		validity:     cfg.Validity,
 		entries:      make(map[string]*list.Element),
 		lru:          list.New(),
@@ -281,10 +301,10 @@ func (c *FifoCache) Get(ctx context.Context, key string) ([]byte, bool) {
 	return nil, false
 }
 
-func sizeOf(item *cacheEntry) int {
-	return int(unsafe.Sizeof(*item)) + // size of cacheEntry
+func sizeOf(item *cacheEntry) uint64 {
+	return uint64(int(unsafe.Sizeof(*item)) + // size of cacheEntry
 		len(item.key) + // size of key
 		cap(item.value) + // size of value
 		elementSize + // size of the element in linked list
-		elementPrtSize // size of the pointer to an element in the map
+		elementPrtSize) // size of the pointer to an element in the map
 }
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index c03d41aa7d303..f41d5813a6ccb 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -3,6 +3,7 @@ package cache
 import (
 	"context"
 	"fmt"
+	"strconv"
 	"testing"
 	"time"
 
@@ -27,7 +28,7 @@ func TestFifoCacheEviction(t *testing.T) {
 	}{
 		{
 			name: "test-memory-eviction",
-			cfg:  FifoCacheConfig{MaxSizeBytes: cnt * sizeOf(itemTemplate), Validity: 1 * time.Minute},
+			cfg:  FifoCacheConfig{MaxSizeBytes: strconv.FormatInt(int64(cnt*sizeOf(itemTemplate)), 10), Validity: 1 * time.Minute},
 		},
 		{
 			name: "test-items-eviction",
@@ -175,7 +176,7 @@ func TestFifoCacheExpiry(t *testing.T) {
 	}{
 		{
 			name: "test-memory-expiry",
-			cfg:  FifoCacheConfig{MaxSizeBytes: memorySz, Validity: 5 * time.Millisecond},
+			cfg:  FifoCacheConfig{MaxSizeBytes: strconv.FormatInt(int64(memorySz), 10), Validity: 5 * time.Millisecond},
 		},
 		{
 			name: "test-items-expiry",
@@ -236,3 +237,41 @@ func genBytes(n uint8) []byte {
 	}
 	return arr
 }
+
+func TestBytesParsing(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected uint64
+	}{
+		{input: "", expected: 0},
+		{input: "123", expected: 123},
+		{input: "1234567890", expected: 1234567890},
+		{input: "25k", expected: 25000},
+		{input: "25K", expected: 25000},
+		{input: "25kb", expected: 25000},
+		{input: "25kB", expected: 25000},
+		{input: "25Kb", expected: 25000},
+		{input: "25KB", expected: 25000},
+		{input: "25kib", expected: 25600},
+		{input: "25KiB", expected: 25600},
+		{input: "25m", expected: 25000000},
+		{input: "25M", expected: 25000000},
+		{input: "25mB", expected: 25000000},
+		{input: "25MB", expected: 25000000},
+		{input: "2.5MB", expected: 2500000},
+		{input: "25MiB", expected: 26214400},
+		{input: "25mib", expected: 26214400},
+		{input: "2.5mib", expected: 2621440},
+		{input: "25g", expected: 25000000000},
+		{input: "25G", expected: 25000000000},
+		{input: "25gB", expected: 25000000000},
+		{input: "25Gb", expected: 25000000000},
+		{input: "25GiB", expected: 26843545600},
+		{input: "25gib", expected: 26843545600},
+	}
+	for _, test := range tests {
+		output, err := parsebytes(test.input)
+		assert.Nil(t, err)
+		assert.Equal(t, test.expected, output)
+	}
+}
diff --git a/chunk_store.go b/chunk_store.go
index 6e904f6d85daa..f705ccd2388db 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -71,6 +71,16 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Limit how long back data can be queried")
 }
 
+func (cfg *StoreConfig) Validate() error {
+	if err := cfg.ChunkCacheConfig.Validate(); err != nil {
+		return err
+	}
+	if err := cfg.WriteDedupeCacheConfig.Validate(); err != nil {
+		return err
+	}
+	return nil
+}
+
 type baseStore struct {
 	cfg StoreConfig
 
diff --git a/storage/factory.go b/storage/factory.go
index 542f662466e09..5cfb936cc8547 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -103,6 +103,9 @@ func (cfg *Config) Validate() error {
 	if err := cfg.Swift.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Swift Storage config")
 	}
+	if err := cfg.IndexQueriesCacheConfig.Validate(); err != nil {
+		return errors.Wrap(err, "invalid Index Queries Cache config")
+	}
 	return nil
 }
 

From 122159027ffea7ad39b1174d53a263402e9f87d8 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 29 Apr 2020 20:40:29 +0530
Subject: [PATCH 514/660] Handling of empty directories in filesystem object
 client (#2537)

* handling of empty directories in filesystem object client

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* calling os.Remove instead of os.RemoveAll while removing empty directory

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* some minor improvement

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* negative test for isNotEmptyErr

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* some changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed comment

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/fs_object_client.go      | 57 ++++++++++++++++++++++++++++------
 local/fs_object_client_test.go | 57 ++++++++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 1cb066532a45a..dc308d41fb396 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -6,7 +6,6 @@ import (
 	"io"
 	"io/ioutil"
 	"os"
-	"path"
 	"path/filepath"
 	"time"
 
@@ -40,6 +39,10 @@ type FSObjectClient struct {
 
 // NewFSObjectClient makes a chunk.Client which stores chunks as files in the local filesystem.
 func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
+	// filepath.Clean cleans up the path by removing unwanted duplicate slashes, dots etc.
+	// This is needed because DeleteObject works on paths which are already cleaned up and it
+	// checks whether it is about to delete the configured directory when it becomes empty
+	cfg.Directory = filepath.Clean(cfg.Directory)
 	if err := util.EnsureDirectory(cfg.Directory); err != nil {
 		return nil, err
 	}
@@ -54,7 +57,7 @@ func (FSObjectClient) Stop() {}
 
 // GetObject from the store
 func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
-	fl, err := os.Open(path.Join(f.cfg.Directory, objectKey))
+	fl, err := os.Open(filepath.Join(f.cfg.Directory, objectKey))
 	if err != nil && os.IsNotExist(err) {
 		return nil, chunk.ErrStorageObjectNotFound
 	}
@@ -64,8 +67,8 @@ func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 
 // PutObject into the store
 func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
-	fullPath := path.Join(f.cfg.Directory, objectKey)
-	err := util.EnsureDirectory(path.Dir(fullPath))
+	fullPath := filepath.Join(f.cfg.Directory, objectKey)
+	err := util.EnsureDirectory(filepath.Dir(fullPath))
 	if err != nil {
 		return err
 	}
@@ -114,7 +117,15 @@ func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 		nameWithPrefix := filepath.Join(prefix, fileInfo.Name())
 
 		if fileInfo.IsDir() {
-			commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+chunk.DirDelim))
+			empty, err := isDirEmpty(filepath.Join(folderPath, fileInfo.Name()))
+			if err != nil {
+				return nil, nil, err
+			}
+
+			// add the directory only if it is not empty
+			if !empty {
+				commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+chunk.DirDelim))
+			}
 			continue
 		}
 		storageObjects = append(storageObjects, chunk.StorageObject{
@@ -127,12 +138,26 @@ func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 }
 
 func (f *FSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
-	err := os.Remove(path.Join(f.cfg.Directory, objectKey))
-	if err != nil && os.IsNotExist(err) {
-		return chunk.ErrStorageObjectNotFound
+	// inspired from https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/objstore/filesystem/filesystem.go#L195
+	file := filepath.Join(f.cfg.Directory, objectKey)
+
+	for file != f.cfg.Directory {
+		if err := os.Remove(file); err != nil {
+			return err
+		}
+
+		file = filepath.Dir(file)
+		empty, err := isDirEmpty(file)
+		if err != nil {
+			return err
+		}
+
+		if !empty {
+			break
+		}
 	}
 
-	return err
+	return nil
 }
 
 // DeleteChunksBefore implements BucketClient
@@ -147,3 +172,17 @@ func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) e
 		return nil
 	})
 }
+
+// copied from https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/objstore/filesystem/filesystem.go#L181
+func isDirEmpty(name string) (ok bool, err error) {
+	f, err := os.Open(name)
+	if err != nil {
+		return false, err
+	}
+	defer runutil.CloseWithErrCapture(&err, f, "dir open")
+
+	if _, err = f.Readdir(1); err == io.EOF {
+		return true, nil
+	}
+	return false, err
+}
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
index 76709b098a096..630db0f84d68e 100644
--- a/local/fs_object_client_test.go
+++ b/local/fs_object_client_test.go
@@ -5,10 +5,13 @@ import (
 	"context"
 	"io/ioutil"
 	"os"
+	"path/filepath"
 	"testing"
 	"time"
 
 	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 func TestFSObjectClient_DeleteChunksBefore(t *testing.T) {
@@ -82,6 +85,9 @@ func TestFSObjectClient_List(t *testing.T) {
 		}
 	}
 
+	// create an empty directory which should get excluded from the list
+	require.NoError(t, util.EnsureDirectory(filepath.Join(fsObjectsDir, "empty-folder")))
+
 	files := []string{"outer-file1", "outer-file2"}
 
 	for _, fl := range files {
@@ -115,3 +121,54 @@ func TestFSObjectClient_List(t *testing.T) {
 		require.Len(t, commonPrefixes, 0)
 	}
 }
+
+func TestFSObjectClient_DeleteObject(t *testing.T) {
+	fsObjectsDir, err := ioutil.TempDir(os.TempDir(), "fs-delete-object")
+	require.NoError(t, err)
+
+	bucketClient, err := NewFSObjectClient(FSConfig{
+		Directory: fsObjectsDir,
+	})
+	require.NoError(t, err)
+
+	defer func() {
+		require.NoError(t, os.RemoveAll(fsObjectsDir))
+	}()
+
+	foldersWithFiles := make(map[string][]string)
+	foldersWithFiles["folder1/"] = []string{"file1", "file2"}
+
+	for folder, files := range foldersWithFiles {
+		for _, filename := range files {
+			err := bucketClient.PutObject(context.Background(), folder+filename, bytes.NewReader([]byte(filename)))
+			require.NoError(t, err)
+		}
+	}
+
+	// let us check if we have right folders created
+	_, commonPrefixes, err := bucketClient.List(context.Background(), "")
+	require.NoError(t, err)
+	require.Len(t, commonPrefixes, len(foldersWithFiles))
+
+	// let us delete file1 from folder1 and check that file1 is gone but folder1 with file2 is still there
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), "folder1/file1"))
+	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1/file1"))
+	require.True(t, os.IsNotExist(err))
+
+	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1/file2"))
+	require.NoError(t, err)
+
+	// let us delete second file as well and check that folder1 also got removed
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), "folder1/file2"))
+	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1"))
+	require.True(t, os.IsNotExist(err))
+
+	_, err = os.Stat(fsObjectsDir)
+	require.NoError(t, err)
+
+	// let us see ensure folder2 is still there will all the files:
+	/*files, commonPrefixes, err := bucketClient.List(context.Background(), "folder2/")
+	require.NoError(t, err)
+	require.Len(t, commonPrefixes, 0)
+	require.Len(t, files, len(foldersWithFiles["folder2/"]))*/
+}

From c19ee03ba313a22cdc3550ede87083f6b2e67417 Mon Sep 17 00:00:00 2001
From: Aditya C S <aditya.gnu@gmail.com>
Date: Thu, 30 Apr 2020 21:22:04 +0530
Subject: [PATCH 515/660] Create index table from local table client for boltdb
 (#2441)

Signed-off-by: Aditya C S <aditya.gnu@gmail.com>
---
 local/boltdb_index_client_test.go | 56 +++++++++++++++++++++++++++++++
 local/boltdb_table_client.go      |  7 +++-
 local/fixtures.go                 |  4 +++
 3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/local/boltdb_index_client_test.go b/local/boltdb_index_client_test.go
index 5bd5b776b3478..5dc736f9d5495 100644
--- a/local/boltdb_index_client_test.go
+++ b/local/boltdb_index_client_test.go
@@ -1,6 +1,8 @@
 package local
 
 import (
+	"context"
+	"fmt"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -8,6 +10,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/bbolt"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 var (
@@ -114,3 +118,55 @@ func TestBoltDB_GetDB(t *testing.T) {
 	_, err = boltdbIndexClient.GetDB(testDb1, DBOperationRead)
 	require.NoError(t, err)
 }
+
+func Test_CreateTable_BoltdbRW(t *testing.T) {
+	tableName := "test"
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	require.NoError(t, err)
+
+	indexClient, err := NewBoltDBIndexClient(BoltDBConfig{
+		Directory: dirname,
+	})
+	require.NoError(t, err)
+
+	tableClient, err := NewTableClient(dirname)
+	require.NoError(t, err)
+
+	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
+		Name: tableName,
+	})
+	require.NoError(t, err)
+
+	batch := indexClient.NewWriteBatch()
+	batch.Add(tableName, fmt.Sprintf("hash%s", "test"), []byte(fmt.Sprintf("range%s", "value")), nil)
+
+	err = indexClient.BatchWrite(context.Background(), batch)
+	require.NoError(t, err)
+
+	// try to create the same file which is already existing
+	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
+		Name: tableName,
+	})
+	require.NoError(t, err)
+
+	// make sure file content is not modified
+	entry := chunk.IndexQuery{
+		TableName: tableName,
+		HashValue: fmt.Sprintf("hash%s", "test"),
+	}
+	var have []chunk.IndexEntry
+	err = indexClient.query(context.Background(), entry, func(read chunk.ReadBatch) bool {
+		iter := read.Iterator()
+		for iter.Next() {
+			have = append(have, chunk.IndexEntry{
+				RangeValue: iter.RangeValue(),
+			})
+		}
+		return true
+	})
+	require.NoError(t, err)
+	require.Equal(t, []chunk.IndexEntry{
+		{RangeValue: []byte(fmt.Sprintf("range%s", "value"))},
+	}, have)
+
+}
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index b85012f656887..e60ca9e8dbd9a 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -36,7 +36,12 @@ func (c *TableClient) ListTables(ctx context.Context) ([]string, error) {
 }
 
 func (c *TableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
-	return nil
+	file, err := os.OpenFile(filepath.Join(c.directory, desc.Name), os.O_CREATE|os.O_RDONLY, 0666)
+	if err != nil {
+		return err
+	}
+
+	return file.Close()
 }
 
 func (c *TableClient) DeleteTable(ctx context.Context, name string) error {
diff --git a/local/fixtures.go b/local/fixtures.go
index 5a6a6f1344dde..60a0b426a9d57 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -59,6 +59,10 @@ func (f *fixture) Clients() (
 				Prefix: "chunks",
 				Period: 10 * time.Minute,
 			},
+			IndexTables: chunk.PeriodicTableConfig{
+				Prefix: "index",
+				Period: 10 * time.Minute,
+			},
 		}},
 	}
 

From c323f72502216bb1e072bc28178a52569754bcb7 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Thu, 30 Apr 2020 18:46:02 +0200
Subject: [PATCH 516/660] Fixed tests (#2551)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 local/boltdb_index_client_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/local/boltdb_index_client_test.go b/local/boltdb_index_client_test.go
index 5dc736f9d5495..e2877b481613b 100644
--- a/local/boltdb_index_client_test.go
+++ b/local/boltdb_index_client_test.go
@@ -155,7 +155,7 @@ func Test_CreateTable_BoltdbRW(t *testing.T) {
 		HashValue: fmt.Sprintf("hash%s", "test"),
 	}
 	var have []chunk.IndexEntry
-	err = indexClient.query(context.Background(), entry, func(read chunk.ReadBatch) bool {
+	err = indexClient.query(context.Background(), entry, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
 		iter := read.Iterator()
 		for iter.Next() {
 			have = append(have, chunk.IndexEntry{

From 024b44ed02594386fb9bcf1774f6a4fdee30ef78 Mon Sep 17 00:00:00 2001
From: KyeongWon Seo <ruddnjs1230@gmail.com>
Date: Sat, 2 May 2020 23:03:14 +0900
Subject: [PATCH 517/660] Add Redis cache options used by Redigo (#2550)

* add Redis cache flags used by redigo: idle-timeout, wait, max-conn-lifetime

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* update CHANGELOG.md

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* apply check-doc comments

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* remove white-noise

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* rename the Redis flag: wait -> wait-on-pool-exhaustion

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>
---
 cache/redis_cache.go | 27 ++++++++++++++++++---------
 1 file changed, 18 insertions(+), 9 deletions(-)

diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 2325531d5060f..fac33bb4589b5 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -22,13 +22,16 @@ type RedisCache struct {
 
 // RedisConfig defines how a RedisCache should be constructed.
 type RedisConfig struct {
-	Endpoint       string         `yaml:"endpoint"`
-	Timeout        time.Duration  `yaml:"timeout"`
-	Expiration     time.Duration  `yaml:"expiration"`
-	MaxIdleConns   int            `yaml:"max_idle_conns"`
-	MaxActiveConns int            `yaml:"max_active_conns"`
-	Password       flagext.Secret `yaml:"password"`
-	EnableTLS      bool           `yaml:"enable_tls"`
+	Endpoint             string         `yaml:"endpoint"`
+	Timeout              time.Duration  `yaml:"timeout"`
+	Expiration           time.Duration  `yaml:"expiration"`
+	MaxIdleConns         int            `yaml:"max_idle_conns"`
+	MaxActiveConns       int            `yaml:"max_active_conns"`
+	Password             flagext.Secret `yaml:"password"`
+	EnableTLS            bool           `yaml:"enable_tls"`
+	IdleTimeout          time.Duration  `yaml:"idle_timeout"`
+	WaitOnPoolExhaustion bool           `yaml:"wait_on_pool_exhaustion"`
+	MaxConnLifetime      time.Duration  `yaml:"max_conn_lifetime"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -40,6 +43,9 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
 	f.Var(&cfg.Password, prefix+"redis.password", description+"Password to use when connecting to redis.")
 	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
+	f.DurationVar(&cfg.IdleTimeout, prefix+"redis.idle-timeout", 0, description+"Close connections after remaining idle for this duration. If the value is zero, then idle connections are not closed.")
+	f.BoolVar(&cfg.WaitOnPoolExhaustion, prefix+"redis.wait-on-pool-exhaustion", false, description+"Enables waiting if there are no idle connections. If the value is false and the pool is at the max_active_conns limit, the pool will return a connection with ErrPoolExhausted error and not wait for idle connections.")
+	f.DurationVar(&cfg.MaxConnLifetime, prefix+"redis.max-conn-lifetime", 0, description+"Close connections older than this duration. If the value is zero, then the pool does not close connections based on age.")
 }
 
 // NewRedisCache creates a new RedisCache
@@ -48,8 +54,6 @@ func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
 	// pool != nil only in unit tests
 	if pool == nil {
 		pool = &redis.Pool{
-			MaxIdle:   cfg.MaxIdleConns,
-			MaxActive: cfg.MaxActiveConns,
 			Dial: func() (redis.Conn, error) {
 				options := make([]redis.DialOption, 0, 2)
 				if cfg.EnableTLS {
@@ -65,6 +69,11 @@ func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
 				}
 				return c, err
 			},
+			MaxIdle:         cfg.MaxIdleConns,
+			MaxActive:       cfg.MaxActiveConns,
+			IdleTimeout:     cfg.IdleTimeout,
+			Wait:            cfg.WaitOnPoolExhaustion,
+			MaxConnLifetime: cfg.MaxConnLifetime,
 		}
 	}
 

From ad60ede297a522e728354b76a9278566a90fe87f Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 5 May 2020 08:46:11 +0100
Subject: [PATCH 518/660] Don't write empty values to the index. (#2557)

This causes tombstones to be written to Cassandra, which causes excessive heap usage.:

Signed-off-by: Tom Wilkie <tom@grafana.com>
---
 schema.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/schema.go b/schema.go
index e6a8c68d0cca6..441fc5f84cdd9 100644
--- a/schema.go
+++ b/schema.go
@@ -36,6 +36,7 @@ const (
 var (
 	// ErrNotSupported when a schema doesn't support that particular lookup.
 	ErrNotSupported = errors.New("not supported")
+	empty           = []byte("-")
 )
 
 type hasChunksForIntervalFunc func(userID, seriesID string, from, through model.Time) (bool, error)
@@ -677,6 +678,7 @@ func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels l
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + metricName,
 			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
+			Value:      empty,
 		},
 	}
 
@@ -780,6 +782,7 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
 			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
+			Value:      empty,
 		},
 	}
 
@@ -811,6 +814,7 @@ func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels
 			TableName:  bucket.tableName,
 			HashValue:  bucket.hashKey + ":" + string(seriesID),
 			RangeValue: encodeRangeKey(chunkTimeRangeKeyV3, encodedThroughBytes, nil, []byte(chunkID)),
+			Value:      empty,
 		},
 	}
 
@@ -923,6 +927,7 @@ func (s v11Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 			TableName:  bucket.tableName,
 			HashValue:  fmt.Sprintf("%02d:%s:%s", shard, bucket.hashKey, metricName),
 			RangeValue: encodeRangeKey(seriesRangeKeyV1, seriesID, nil, nil),
+			Value:      empty,
 		},
 		// Entry for seriesID -> label names
 		{

From f1802baeeca9902cb8e5ad7038e8e60917897d90 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Thu, 7 May 2020 13:19:41 +0100
Subject: [PATCH 519/660] Add option to limit concurrent queries to Cassandra.
 (#2562)

* Add option to limit concurrent queries to Cassandra.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* go mod vendor

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Add changelog entry.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Review feedback.

Signed-off-by: Tom Wilkie <tom@grafana.com>
---
 cassandra/storage_client.go | 36 ++++++++++++++++++++++++++++++------
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 34480c595b6b7..d6662a85c44f2 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
+	"golang.org/x/sync/semaphore"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
@@ -40,6 +41,7 @@ type Config struct {
 	Retries                  int                 `yaml:"max_retries"`
 	MaxBackoff               time.Duration       `yaml:"retry_max_backoff"`
 	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
+	QueryConcurrency         int                 `yaml:"query_concurrency"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -63,6 +65,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
 	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
+	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. (Default is 0: no limit)")
 }
 
 func (cfg *Config) Validate() error {
@@ -192,9 +195,10 @@ func (cfg *Config) createKeyspace() error {
 
 // StorageClient implements chunk.IndexClient and chunk.ObjectClient for Cassandra.
 type StorageClient struct {
-	cfg       Config
-	schemaCfg chunk.SchemaConfig
-	session   *gocql.Session
+	cfg            Config
+	schemaCfg      chunk.SchemaConfig
+	session        *gocql.Session
+	querySemaphore *semaphore.Weighted
 }
 
 // NewStorageClient returns a new StorageClient.
@@ -206,10 +210,16 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient,
 		return nil, errors.WithStack(err)
 	}
 
+	var querySemaphore *semaphore.Weighted
+	if cfg.QueryConcurrency > 0 {
+		querySemaphore = semaphore.NewWeighted(int64(cfg.QueryConcurrency))
+	}
+
 	client := &StorageClient{
-		cfg:       cfg,
-		schemaCfg: schemaCfg,
-		session:   session,
+		cfg:            cfg,
+		schemaCfg:      schemaCfg,
+		session:        session,
+		querySemaphore: querySemaphore,
 	}
 	return client, nil
 }
@@ -277,6 +287,13 @@ func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQue
 }
 
 func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback util.Callback) error {
+	if s.querySemaphore != nil {
+		if err := s.querySemaphore.Acquire(ctx, 1); err != nil {
+			return err
+		}
+		defer s.querySemaphore.Release(1)
+	}
+
 	var q *gocql.Query
 
 	switch {
@@ -383,6 +400,13 @@ func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]c
 }
 
 func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+	if s.querySemaphore != nil {
+		if err := s.querySemaphore.Acquire(ctx, 1); err != nil {
+			return input, err
+		}
+		defer s.querySemaphore.Release(1)
+	}
+
 	tableName, err := s.schemaCfg.ChunkTableFor(input.From)
 	if err != nil {
 		return input, err

From f546383f46dc5049715ef02d4641794af4c47104 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 11 May 2020 14:16:24 +0100
Subject: [PATCH 520/660] Use newer AWS API for paginated queries (#2452)

* Refactor: export NextDelay() so we can call it from other packages

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Refactor: use newer AWS API for paginated queries

This is less code, and more robust when retrying requests.
We don't need an indirection on the request object for testing now.

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Don't need DynamoDB request wrapper to do so much now

Now we are calling QueryPagesWithContext directly we don't need the
paging interface and we never re-use request objects.

Signed-off-by: Bryan Boreham <bryan@weave.works>

* Stop AWS Retryer if context is cancelled

Ingester.flushUserSeries() puts a timeout on the context, so don't
retry for longer than that.

Signed-off-by: Bryan Boreham <bryan@weave.works>
---
 aws/dynamodb_storage_client.go | 107 +++++++++------------------------
 aws/fixtures.go                |   2 -
 aws/mock.go                    |  14 +----
 aws/retryer.go                 |  52 ++++++++++++++++
 4 files changed, 84 insertions(+), 91 deletions(-)
 create mode 100644 aws/retryer.go

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index a39a0ef422051..c5131ddfd34c7 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
 	"golang.org/x/time/rate"
 
 	"github.com/aws/aws-sdk-go/aws"
@@ -21,6 +22,7 @@ import (
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/dynamodb"
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
@@ -144,7 +146,6 @@ type dynamoDBStorageClient struct {
 
 	// These functions exists for mocking, so we don't have to write a whole load
 	// of boilerplate.
-	queryRequestFn          func(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest
 	batchGetItemRequestFn   func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
 }
@@ -172,7 +173,6 @@ func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig)
 		DynamoDB:      dynamoDB,
 		writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize),
 	}
-	client.queryRequestFn = client.queryRequest
 	client.batchGetItemRequestFn = client.batchGetItemRequest
 	client.batchWriteItemRequestFn = client.batchWriteItemRequest
 	return client, nil
@@ -327,88 +327,44 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 		}
 	}
 
-	request := a.queryRequestFn(ctx, input)
 	pageCount := 0
 	defer func() {
 		dynamoQueryPagesCount.Observe(float64(pageCount))
 	}()
 
-	for page := request; page != nil; page = page.NextPage() {
-		pageCount++
-
-		response, err := a.queryPage(ctx, input, page, query.HashValue, pageCount)
-		if err != nil {
-			return err
-		}
-
-		if !callback(query, response) {
-			if err != nil {
-				return fmt.Errorf("QueryPages error: table=%v, err=%v", *input.TableName, page.Error())
-			}
-			return nil
-		}
-		if !page.HasNextPage() {
-			return nil
+	retryer := newRetryer(ctx, a.cfg.backoffConfig)
+	err := instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
+		if sp := ot.SpanFromContext(innerCtx); sp != nil {
+			sp.SetTag("tableName", query.TableName)
+			sp.SetTag("hashValue", query.HashValue)
 		}
-	}
-	return nil
-}
-
-func (a dynamoDBStorageClient) queryPage(ctx context.Context, input *dynamodb.QueryInput, page dynamoDBRequest, hashValue string, pageCount int) (*dynamoDBReadResponse, error) {
-	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
-
-	var err error
-	for backoff.Ongoing() {
-		err = instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
+		return a.DynamoDB.QueryPagesWithContext(innerCtx, input, func(output *dynamodb.QueryOutput, _ bool) bool {
+			pageCount++
 			if sp := ot.SpanFromContext(innerCtx); sp != nil {
-				sp.SetTag("tableName", aws.StringValue(input.TableName))
-				sp.SetTag("hashValue", hashValue)
-				sp.SetTag("page", pageCount)
-				sp.SetTag("retry", backoff.NumRetries())
+				sp.LogFields(otlog.Int("page", pageCount))
 			}
-			return page.Send()
-		})
-
-		if cc := page.Data().(*dynamodb.QueryOutput).ConsumedCapacity; cc != nil {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
-				Add(float64(*cc.CapacityUnits))
-		}
 
-		if err != nil {
-			recordDynamoError(*input.TableName, err, "DynamoDB.QueryPages")
-			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || page.Retryable()) {
-				if awsErr.Code() != dynamodb.ErrCodeProvisionedThroughputExceededException {
-					level.Warn(util.Logger).Log("msg", "DynamoDB error", "retry", backoff.NumRetries(), "table", *input.TableName, "err", err)
-				}
-				backoff.Wait()
-				continue
+			if cc := output.ConsumedCapacity; cc != nil {
+				dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
+					Add(float64(*cc.CapacityUnits))
 			}
-			return nil, fmt.Errorf("QueryPage error: table=%v, err=%v", *input.TableName, err)
-		}
 
-		queryOutput := page.Data().(*dynamodb.QueryOutput)
-		return &dynamoDBReadResponse{
-			items: queryOutput.Items,
-		}, nil
+			return callback(query, &dynamoDBReadResponse{items: output.Items})
+		}, retryer.withRetries, withErrorHandler(query.TableName, "DynamoDB.QueryPages"))
+	})
+	if err != nil {
+		return errors.Wrapf(err, "QueryPages error: table=%v", query.TableName)
 	}
-	return nil, fmt.Errorf("QueryPage error: %s for table %v, last error %v", backoff.Err(), *input.TableName, err)
+	return err
 }
 
 type dynamoDBRequest interface {
-	NextPage() dynamoDBRequest
 	Send() error
 	Data() interface{}
 	Error() error
-	HasNextPage() bool
 	Retryable() bool
 }
 
-func (a dynamoDBStorageClient) queryRequest(ctx context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
-	req, _ := a.DynamoDB.QueryRequest(input)
-	req.SetContext(ctx)
-	return dynamoDBRequestAdapter{req}
-}
-
 func (a dynamoDBStorageClient) batchGetItemRequest(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest {
 	req, _ := a.DynamoDB.BatchGetItemRequest(input)
 	req.SetContext(ctx)
@@ -425,22 +381,11 @@ type dynamoDBRequestAdapter struct {
 	request *request.Request
 }
 
-func (a dynamoDBRequestAdapter) NextPage() dynamoDBRequest {
-	next := a.request.NextPage()
-	if next == nil {
-		return nil
-	}
-	return dynamoDBRequestAdapter{next}
-}
-
 func (a dynamoDBRequestAdapter) Data() interface{} {
 	return a.request.Data
 }
 
 func (a dynamoDBRequestAdapter) Send() error {
-	// Clear error in case we are retrying the same operation - if we
-	// don't do this then the same error will come back again immediately
-	a.request.Error = nil
 	return a.request.Send()
 }
 
@@ -448,10 +393,6 @@ func (a dynamoDBRequestAdapter) Error() error {
 	return a.request.Error
 }
 
-func (a dynamoDBRequestAdapter) HasNextPage() bool {
-	return a.request.HasNextPage()
-}
-
 func (a dynamoDBRequestAdapter) Retryable() bool {
 	return aws.BoolValue(a.request.Retryable)
 }
@@ -840,6 +781,16 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 	}
 }
 
+func withErrorHandler(tableName, operation string) func(req *request.Request) {
+	return func(req *request.Request) {
+		req.Handlers.CompleteAttempt.PushBack(func(req *request.Request) {
+			if req.Error != nil {
+				recordDynamoError(tableName, req.Error, operation)
+			}
+		})
+	}
+}
+
 func recordDynamoError(tableName string, err error, operation string) {
 	if awsErr, ok := err.(awserr.Error); ok {
 		dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
diff --git a/aws/fixtures.go b/aws/fixtures.go
index a9bf9dd96c65a..c4c0800ba970c 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -41,7 +41,6 @@ var Fixtures = []testutils.Fixture{
 			}
 			index := &dynamoDBStorageClient{
 				DynamoDB:                dynamoDB,
-				queryRequestFn:          dynamoDB.queryRequest,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaConfig,
@@ -80,7 +79,6 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				},
 				DynamoDB:                dynamoDB,
 				writeThrottle:           rate.NewLimiter(10, dynamoDBMaxWriteBatchSize),
-				queryRequestFn:          dynamoDB.queryRequest,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaCfg,
diff --git a/aws/mock.go b/aws/mock.go
index 84fdad28eccd0..864e410dd5c13 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -194,7 +194,7 @@ func (m *mockDynamoDBClient) batchGetItemRequest(_ context.Context, input *dynam
 	}
 }
 
-func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.QueryInput) dynamoDBRequest {
+func (m *mockDynamoDBClient) QueryPagesWithContext(ctx aws.Context, input *dynamodb.QueryInput, fn func(*dynamodb.QueryOutput, bool) bool, opts ...request.Option) error {
 	result := &dynamodb.QueryOutput{
 		Items: []map[string]*dynamodb.AttributeValue{},
 	}
@@ -241,10 +241,8 @@ func (m *mockDynamoDBClient) queryRequest(_ context.Context, input *dynamodb.Que
 
 		result.Items = append(result.Items, item)
 	}
-
-	return &dynamoDBMockRequest{
-		result: result,
-	}
+	fn(result, true)
+	return nil
 }
 
 type dynamoDBMockRequest struct {
@@ -252,9 +250,6 @@ type dynamoDBMockRequest struct {
 	err    error
 }
 
-func (m *dynamoDBMockRequest) NextPage() dynamoDBRequest {
-	return m
-}
 func (m *dynamoDBMockRequest) Send() error {
 	return m.err
 }
@@ -264,9 +259,6 @@ func (m *dynamoDBMockRequest) Data() interface{} {
 func (m *dynamoDBMockRequest) Error() error {
 	return m.err
 }
-func (m *dynamoDBMockRequest) HasNextPage() bool {
-	return false
-}
 func (m *dynamoDBMockRequest) Retryable() bool {
 	return false
 }
diff --git a/aws/retryer.go b/aws/retryer.go
new file mode 100644
index 0000000000000..94ac71f905708
--- /dev/null
+++ b/aws/retryer.go
@@ -0,0 +1,52 @@
+package aws
+
+import (
+	"context"
+	"time"
+
+	"github.com/aws/aws-sdk-go/aws/request"
+	ot "github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+// Map Cortex Backoff into AWS Retryer interface
+type retryer struct {
+	*util.Backoff
+	maxRetries int
+}
+
+var _ request.Retryer = &retryer{}
+
+func newRetryer(ctx context.Context, cfg util.BackoffConfig) *retryer {
+	return &retryer{
+		Backoff:    util.NewBackoff(ctx, cfg),
+		maxRetries: cfg.MaxRetries,
+	}
+}
+
+func (r *retryer) withRetries(req *request.Request) {
+	req.Retryer = r
+}
+
+// RetryRules return the retry delay that should be used by the SDK before
+// making another request attempt for the failed request.
+func (r *retryer) RetryRules(req *request.Request) time.Duration {
+	duration := r.Backoff.NextDelay()
+	if sp := ot.SpanFromContext(req.Context()); sp != nil {
+		sp.LogFields(otlog.Int("retry", r.NumRetries()))
+	}
+	return duration
+}
+
+// ShouldRetry returns if the failed request is retryable.
+func (r *retryer) ShouldRetry(req *request.Request) bool {
+	return r.Ongoing() && (req.IsErrorRetryable() || req.IsErrorThrottle())
+}
+
+// MaxRetries is the number of times a request may be retried before
+// failing.
+func (r *retryer) MaxRetries() int {
+	return r.maxRetries
+}

From bf07c77d26f0b34a17e9105000f99d80a5e7b353 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Tue, 12 May 2020 09:14:02 -0400
Subject: [PATCH 521/660] Reduces allocation in `GetParallelChunks`. (#2581)

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 util/parallel_chunk_fetch.go      | 10 +++++++++-
 util/parallel_chunk_fetch_test.go | 26 ++++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 util/parallel_chunk_fetch_test.go

diff --git a/util/parallel_chunk_fetch.go b/util/parallel_chunk_fetch.go
index 25748244445a3..cd0163ee68428 100644
--- a/util/parallel_chunk_fetch.go
+++ b/util/parallel_chunk_fetch.go
@@ -2,6 +2,7 @@ package util
 
 import (
 	"context"
+	"sync"
 
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
@@ -11,6 +12,12 @@ import (
 
 const maxParallel = 1000
 
+var decodeContextPool = sync.Pool{
+	New: func() interface{} {
+		return chunk.NewDecodeContext()
+	},
+}
+
 // GetParallelChunks fetches chunks in parallel (up to maxParallel).
 func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context.Context, *chunk.DecodeContext, chunk.Chunk) (chunk.Chunk, error)) ([]chunk.Chunk, error) {
 	sp, ctx := ot.StartSpanFromContext(ctx, "GetParallelChunks")
@@ -31,7 +38,7 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 
 	for i := 0; i < min(maxParallel, len(chunks)); i++ {
 		go func() {
-			decodeContext := chunk.NewDecodeContext()
+			decodeContext := decodeContextPool.Get().(*chunk.DecodeContext)
 			for c := range queuedChunks {
 				c, err := f(ctx, decodeContext, c)
 				if err != nil {
@@ -40,6 +47,7 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 					processedChunks <- c
 				}
 			}
+			decodeContextPool.Put(decodeContext)
 		}()
 	}
 
diff --git a/util/parallel_chunk_fetch_test.go b/util/parallel_chunk_fetch_test.go
new file mode 100644
index 0000000000000..23e4d33143e1c
--- /dev/null
+++ b/util/parallel_chunk_fetch_test.go
@@ -0,0 +1,26 @@
+package util
+
+import (
+	"context"
+	"testing"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+func BenchmarkGetParallelChunks(b *testing.B) {
+	ctx := context.Background()
+	in := make([]chunk.Chunk, 1024)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		res, err := GetParallelChunks(ctx, in,
+			func(_ context.Context, d *chunk.DecodeContext, c chunk.Chunk) (chunk.Chunk, error) {
+				return c, nil
+			})
+		if err != nil {
+			b.Fatal(err)
+		}
+		if len(res) != len(in) {
+			b.Fatal("unexpected number of chunk returned")
+		}
+	}
+}

From d146c37c5ede6fd85ce1bc204b0404a9841456a3 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 15 May 2020 15:04:30 +0530
Subject: [PATCH 522/660] support for cancellation of delete requests (#2555)

* support for cancellation of delete requests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* having deadline to cancel delete request instead of synchronizing with mutex

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/delete_requests_store.go | 19 +++++++++++++
 purger/purger.go                |  8 ++++--
 purger/request_handler.go       | 48 +++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index 1743648db32e3..dc7e4c7372f03 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -293,6 +293,25 @@ func (ds *DeleteStore) queryCacheGenerationNumber(ctx context.Context, userID st
 	return genNumber, nil
 }
 
+// RemoveDeleteRequest removes a delete request and increments cache gen number
+func (ds *DeleteStore) RemoveDeleteRequest(ctx context.Context, userID, requestID string, createdAt, startTime, endTime model.Time) error {
+	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
+
+	writeBatch := ds.indexClient.NewWriteBatch()
+	writeBatch.Delete(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID))
+
+	// Add another entry with additional details like creation time, time range of delete request and selectors in value
+	rangeValue := fmt.Sprintf("%x:%x:%x", int64(createdAt), int64(startTime), int64(endTime))
+	writeBatch.Delete(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID),
+		[]byte(rangeValue))
+
+	// we need to invalidate results cache since removal of delete request would cause query results to change
+	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults),
+		nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
+
+	return ds.indexClient.BatchWrite(ctx, writeBatch)
+}
+
 func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest) (DeleteRequest, error) {
 	hexParts := strings.Split(string(rangeValue), ":")
 	if len(hexParts) != 3 {
diff --git a/purger/purger.go b/purger/purger.go
index 709074a9f82b5..a64b832366a2a 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -24,7 +24,10 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
-const millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
+const (
+	millisecondPerDay                 = int64(24 * time.Hour / time.Millisecond)
+	deleteRequestCancellationDeadline = 24 * time.Hour
+)
 
 type purgerMetrics struct {
 	deleteRequestsProcessedTotal      *prometheus.CounterVec
@@ -335,7 +338,8 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 	}
 
 	for _, deleteRequest := range deleteRequests {
-		if deleteRequest.CreatedAt.Add(24 * time.Hour).After(model.Now()) {
+		// adding an extra minute here to avoid a race between cancellation of request and picking of the request for processing
+		if deleteRequest.CreatedAt.Add(deleteRequestCancellationDeadline).Add(time.Minute).After(model.Now()) {
 			continue
 		}
 
diff --git a/purger/request_handler.go b/purger/request_handler.go
index 8a933a540025c..d4b3b0bcaa482 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"net/http"
 
+	"github.com/go-kit/kit/log/level"
+
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -102,6 +104,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	}
 
 	if err := dm.deleteStore.AddDeleteRequest(ctx, userID, model.Time(startTime), model.Time(endTime), match); err != nil {
+		level.Error(util.Logger).Log("msg", "error adding delete request to the store", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -120,11 +123,56 @@ func (dm *DeleteRequestHandler) GetAllDeleteRequestsHandler(w http.ResponseWrite
 
 	deleteRequests, err := dm.deleteStore.GetAllDeleteRequestsForUser(ctx, userID)
 	if err != nil {
+		level.Error(util.Logger).Log("msg", "error getting delete requests from the store", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 
 	if err := json.NewEncoder(w).Encode(deleteRequests); err != nil {
+		level.Error(util.Logger).Log("msg", "error marshalling response", "err", err)
 		http.Error(w, fmt.Sprintf("Error marshalling response: %v", err), http.StatusInternalServerError)
 	}
 }
+
+// CancelDeleteRequestHandler handles delete request cancellation
+func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	userID, err := user.ExtractOrgID(ctx)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	params := r.URL.Query()
+	requestID := params.Get("request_id")
+
+	deleteRequest, err := dm.deleteStore.GetDeleteRequest(ctx, userID, requestID)
+	if err != nil {
+		level.Error(util.Logger).Log("msg", "error getting delete request from the store", "err", err)
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	if deleteRequest == nil {
+		http.Error(w, "could not find delete request with given id", http.StatusBadRequest)
+		return
+	}
+
+	if deleteRequest.Status != StatusReceived {
+		http.Error(w, "deletion of request which is in process or already processed is not allowed", http.StatusBadRequest)
+		return
+	}
+
+	if deleteRequest.CreatedAt.Add(deleteRequestCancellationDeadline).Before(model.Now()) {
+		http.Error(w, fmt.Sprintf("deletion of request past the deadline of %s since its creation is not allowed", deleteRequestCancellationDeadline.String()), http.StatusBadRequest)
+		return
+	}
+
+	if err := dm.deleteStore.RemoveDeleteRequest(ctx, userID, requestID, deleteRequest.CreatedAt, deleteRequest.StartTime, deleteRequest.EndTime); err != nil {
+		level.Error(util.Logger).Log("msg", "error cancelling the delete request", "err", err)
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	w.WriteHeader(http.StatusOK)
+}

From c7e8e270a3a2dec3b954ee0e7de5e907d52f9cb4 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Date: Fri, 22 May 2020 15:25:16 +0530
Subject: [PATCH 523/660] Upgrade Thanos and Prometheus (#2604)

* Upgrade Thanos and Prometheus

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Move etcd to v3.4.9 and address reviews

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix lint

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Reviews

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Update pkg/querier/blocks_store_balanced_set.go

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/memcached_client.go |  4 +++-
 chunk_store_test.go       | 18 +++++++++---------
 purger/purger.go          |  3 ++-
 purger/purger_test.go     |  6 +++---
 purger/request_handler.go |  4 ++--
 purger/tombstones.go      |  4 ++--
 purger/tombstones_test.go |  4 ++--
 schema_test.go            |  4 ++--
 8 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index adcd25b9f5980..c468b35655f54 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -154,7 +154,9 @@ func (c *memcachedClient) updateMemcacheServers() error {
 		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 		defer cancel()
 
-		c.provider.Resolve(ctx, c.addresses)
+		if err := c.provider.Resolve(ctx, c.addresses); err != nil {
+			return err
+		}
 		servers = c.provider.Addresses()
 	} else {
 		_, addrs, err := net.LookupSRV(c.service, "tcp", c.hostname)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index fe53db2025393..0ed3ae3726740 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -13,7 +13,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
@@ -200,7 +200,7 @@ func TestChunkStore_Get(t *testing.T) {
 			for _, tc := range testCases {
 				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema, storeCase.name), func(t *testing.T) {
 					t.Log("========= Running query", tc.query, "with schema", schema)
-					matchers, err := promql.ParseMetricSelector(tc.query)
+					matchers, err := parser.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)
 					}
@@ -521,7 +521,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			for _, tc := range testCases {
 				t.Run(fmt.Sprintf("%s / %s / %s", tc.query, schema, storeCase.name), func(t *testing.T) {
 					t.Log("========= Running query", tc.query, "with schema", schema)
-					matchers, err := promql.ParseMetricSelector(tc.query)
+					matchers, err := parser.ParseMetricSelector(tc.query)
 					if err != nil {
 						t.Fatal(err)
 					}
@@ -610,7 +610,7 @@ func TestChunkStore_verifyRegexSetOptimizations(t *testing.T) {
 				mockSchema.resetQueries()
 
 				t.Log("========= Running query", tc.query, "with schema", schema)
-				matchers, err := promql.ParseMetricSelector(tc.query)
+				matchers, err := parser.ParseMetricSelector(tc.query)
 				if err != nil {
 					t.Fatal(err)
 				}
@@ -934,7 +934,7 @@ func TestChunkStoreError(t *testing.T) {
 				store := newTestChunkStore(t, schema)
 				defer store.Stop()
 
-				matchers, err := promql.ParseMetricSelector(tc.query)
+				matchers, err := parser.ParseMetricSelector(tc.query)
 				require.NoError(t, err)
 
 				// Query with ordinary time-range
@@ -981,7 +981,7 @@ func TestStoreMaxLookBack(t *testing.T) {
 	err = storeWithLookBackLimit.Put(ctx, []Chunk{fooChunk2})
 	require.NoError(t, err)
 
-	matchers, err := promql.ParseMetricSelector(`foo{bar="baz"}`)
+	matchers, err := parser.ParseMetricSelector(`foo{bar="baz"}`)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1089,7 +1089,7 @@ func TestStore_DeleteChunk(t *testing.T) {
 
 	nonExistentChunk := dummyChunkForEncoding(model.Now(), metric3, encoding.Varbit, 200)
 
-	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	fooMetricNameMatcher, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -1175,7 +1175,7 @@ func TestStore_DeleteChunk(t *testing.T) {
 				}
 				require.NoError(t, err)
 
-				matchersForDeletedChunk, err := promql.ParseMetricSelector(tc.chunkToDelete.Metric.String())
+				matchersForDeletedChunk, err := parser.ParseMetricSelector(tc.chunkToDelete.Metric.String())
 				require.NoError(t, err)
 
 				var nonDeletedIntervals []model.Interval
@@ -1218,7 +1218,7 @@ func TestStore_DeleteSeriesIDs(t *testing.T) {
 		{Name: "bar", Value: "baz2"},
 	}
 
-	matchers, err := promql.ParseMetricSelector(`foo`)
+	matchers, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/purger/purger.go b/purger/purger.go
index a64b832366a2a..a5e62ced6716c 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -16,6 +16,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -403,7 +404,7 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 		chunksGroups := []ChunksGroup{}
 
 		for _, selector := range req.Selectors {
-			matchers, err := promql.ParseMetricSelector(selector)
+			matchers, err := parser.ParseMetricSelector(selector)
 			if err != nil {
 				return err
 			}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index e46f2755f235d..32eb550c0c080 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -8,7 +8,7 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -237,7 +237,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 }
 
 func TestDataPurger_ExecutePlan(t *testing.T) {
-	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	fooMetricNameMatcher, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -301,7 +301,7 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 }
 
 func TestDataPurger_Restarts(t *testing.T) {
-	fooMetricNameMatcher, err := promql.ParseMetricSelector(`foo`)
+	fooMetricNameMatcher, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/purger/request_handler.go b/purger/request_handler.go
index d4b3b0bcaa482..7b019614e967d 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -10,7 +10,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/util"
@@ -65,7 +65,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	}
 
 	for i := range match {
-		_, err := promql.ParseMetricSelector(match[i])
+		_, err := parser.ParseMetricSelector(match[i])
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusBadRequest)
 			return
diff --git a/purger/tombstones.go b/purger/tombstones.go
index bbe4d04d6114e..044ef313035d6 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -12,7 +12,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 
 	"github.com/cortexproject/cortex/pkg/util"
 )
@@ -214,7 +214,7 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
 		tombstoneSet.tombstones[i].Matchers = make([][]*labels.Matcher, len(tombstoneSet.tombstones[i].Selectors))
 
 		for j, selector := range tombstoneSet.tombstones[i].Selectors {
-			tombstoneSet.tombstones[i].Matchers[j], err = promql.ParseMetricSelector(selector)
+			tombstoneSet.tombstones[i].Matchers[j], err = parser.ParseMetricSelector(selector)
 
 			if err != nil {
 				tl.metrics.deleteRequestsLoadFailures.Inc()
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
index cace8b766bfb0..a528f3cc1f311 100644
--- a/purger/tombstones_test.go
+++ b/purger/tombstones_test.go
@@ -6,13 +6,13 @@ import (
 	"time"
 
 	"github.com/prometheus/common/model"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/require"
 )
 
 func TestTombstonesLoader(t *testing.T) {
 	deleteRequestSelectors := []string{"foo"}
-	metric, err := promql.ParseMetric(deleteRequestSelectors[0])
+	metric, err := parser.ParseMetric(deleteRequestSelectors[0])
 	require.NoError(t, err)
 
 	for _, tc := range []struct {
diff --git a/schema_test.go b/schema_test.go
index b2b8e67970511..fd7ef57c36280 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -11,7 +11,7 @@ import (
 	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 
@@ -394,7 +394,7 @@ func BenchmarkEncodeLabelsString(b *testing.B) {
 	var err error
 	for n := 0; n < b.N; n++ {
 		data = []byte(lbs.String())
-		decoded, err = promql.ParseMetric(string(data))
+		decoded, err = parser.ParseMetric(string(data))
 		if err != nil {
 			panic(err)
 		}

From df7481c4c35b2f62fd9ddf5811461b0a8e35c130 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Tue, 26 May 2020 11:18:15 +0200
Subject: [PATCH 524/660] Translate internal store errors to prometheus API
 errors. (#2571)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Translate internal store errors to prometheus API errors.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added integration test for chunks querier and query-frontend.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Account for one extra query.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix test after rebase.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store.go      | 23 +++++++++++++----------
 chunk_store_test.go | 10 +++++-----
 series_store.go     |  4 +---
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index f705ccd2388db..41e5d8380ab8e 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"flag"
 	"fmt"
-	"net/http"
 	"sort"
 	"sync"
 	"time"
@@ -15,8 +14,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/prometheus/prometheus/promql"
-	"github.com/weaveworks/common/httpgrpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
@@ -26,11 +23,10 @@ import (
 )
 
 var (
+	ErrQueryMustContainMetricName = QueryError("query must contain metric name")
 	ErrMetricNameLabelMissing     = errors.New("metric name label missing")
 	ErrParialDeleteChunkNoOverlap = errors.New("interval for partial deletion has not overlap with chunk interval")
-)
 
-var (
 	indexEntriesPerChunk = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "chunk_store_index_entries_per_chunk",
@@ -44,6 +40,13 @@ var (
 	})
 )
 
+// Query errors are to be treated as user errors, rather than storage errors.
+type QueryError string
+
+func (e QueryError) Error() string {
+	return string(e)
+}
+
 // StoreConfig specifies config for a ChunkStore
 type StoreConfig struct {
 	ChunkCacheConfig       cache.Config `yaml:"chunk_cache_config"`
@@ -286,12 +289,12 @@ func (c *baseStore) validateQueryTimeRange(ctx context.Context, userID string, f
 	defer log.Span.Finish()
 
 	if *through < *from {
-		return false, httpgrpc.Errorf(http.StatusBadRequest, "invalid query, through < from (%s < %s)", through, from)
+		return false, QueryError(fmt.Sprintf("invalid query, through < from (%s < %s)", through, from))
 	}
 
 	maxQueryLength := c.limits.MaxQueryLength(userID)
 	if maxQueryLength > 0 && (*through).Sub(*from) > maxQueryLength {
-		return false, httpgrpc.Errorf(http.StatusBadRequest, validation.ErrQueryTooLong, (*through).Sub(*from), maxQueryLength)
+		return false, QueryError(fmt.Sprintf(validation.ErrQueryTooLong, (*through).Sub(*from), maxQueryLength))
 	}
 
 	now := model.Now()
@@ -333,7 +336,7 @@ func (c *baseStore) validateQuery(ctx context.Context, userID string, from *mode
 	// Check there is a metric name matcher of type equal,
 	metricNameMatcher, matchers, ok := extract.MetricNameMatcherFromMatchers(matchers)
 	if !ok || metricNameMatcher.Type != labels.MatchEqual {
-		return "", nil, false, httpgrpc.Errorf(http.StatusBadRequest, "query must contain metric name")
+		return "", nil, false, ErrQueryMustContainMetricName
 	}
 
 	return metricNameMatcher.Value, matchers, false, nil
@@ -357,7 +360,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, userID string, from, th
 
 	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
 	if maxChunksPerQuery > 0 && len(filtered) > maxChunksPerQuery {
-		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), maxChunksPerQuery)
+		err := QueryError(fmt.Sprintf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), maxChunksPerQuery))
 		level.Error(log).Log("err", err)
 		return nil, err
 	}
@@ -366,7 +369,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, userID string, from, th
 	keys := keysFromChunks(filtered)
 	allChunks, err := c.FetchChunks(ctx, filtered, keys)
 	if err != nil {
-		return nil, promql.ErrStorage{Err: err}
+		return nil, err
 	}
 
 	// Filter out chunks based on the empty matchers in the query.
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 0ed3ae3726740..7020937515a7e 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -179,7 +179,7 @@ func TestChunkStore_Get(t *testing.T) {
 		},
 		{
 			query: `{__name__=~"foo"}`,
-			err:   "rpc error: code = Code(400) desc = query must contain metric name",
+			err:   "query must contain metric name",
 		},
 	}
 	for _, schema := range schemas {
@@ -908,25 +908,25 @@ func TestChunkStoreError(t *testing.T) {
 			query:   "foo",
 			from:    model.Time(0).Add(31 * 24 * time.Hour),
 			through: model.Time(0),
-			err:     "rpc error: code = Code(400) desc = invalid query, through < from (0 < 2678400)",
+			err:     "invalid query, through < from (0 < 2678400)",
 		},
 		{
 			query:   "foo",
 			from:    model.Time(0),
 			through: model.Time(0).Add(31 * 24 * time.Hour),
-			err:     "rpc error: code = Code(400) desc = invalid query, length > limit (744h0m0s > 720h0m0s)",
+			err:     "invalid query, length > limit (744h0m0s > 720h0m0s)",
 		},
 		{
 			query:   "{foo=\"bar\"}",
 			from:    model.Time(0),
 			through: model.Time(0).Add(1 * time.Hour),
-			err:     "rpc error: code = Code(400) desc = query must contain metric name",
+			err:     "query must contain metric name",
 		},
 		{
 			query:   "{__name__=~\"bar\"}",
 			from:    model.Time(0),
 			through: model.Time(0).Add(1 * time.Hour),
-			err:     "rpc error: code = Code(400) desc = query must contain metric name",
+			err:     "query must contain metric name",
 		},
 	} {
 		for _, schema := range schemas {
diff --git a/series_store.go b/series_store.go
index 1495d6579fb15..39cecbbe4a196 100644
--- a/series_store.go
+++ b/series_store.go
@@ -3,7 +3,6 @@ package chunk
 import (
 	"context"
 	"fmt"
-	"net/http"
 	"time"
 
 	"github.com/go-kit/kit/log/level"
@@ -13,7 +12,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
-	"github.com/weaveworks/common/httpgrpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
@@ -116,7 +114,7 @@ func (c *seriesStore) Get(ctx context.Context, userID string, from, through mode
 	// Protect ourselves against OOMing.
 	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
 	if maxChunksPerQuery > 0 && len(chunks) > maxChunksPerQuery {
-		err := httpgrpc.Errorf(http.StatusBadRequest, "Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunks), maxChunksPerQuery)
+		err := QueryError(fmt.Sprintf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunks), maxChunksPerQuery))
 		level.Error(log).Log("err", err)
 		return nil, err
 	}

From 0afb1e2b631572c9a74d280b8c14ed3935406ac0 Mon Sep 17 00:00:00 2001
From: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Date: Wed, 27 May 2020 20:16:55 +0530
Subject: [PATCH 525/660] Upgrade Thanos to master (#2634)

* Upgrade Thanos to master

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix lint

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Update deps

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Change proto library and update Makefile

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add CHANGELOG entry

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Comments for go.mod overrides

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Upgrade github.com/aws/aws-sdk-go to v1.30.16

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Add PR number in changelog entry

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>

* Fix review comments

Signed-off-by: Ganesh Vernekar <cs15btech11018@iith.ac.in>
---
 storage/caching_index_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 7af46eea056d3..a536f5ab243c3 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -6,7 +6,7 @@ import (
 	"time"
 
 	"github.com/go-kit/kit/log/level"
-	proto "github.com/golang/protobuf/proto"
+	"github.com/gogo/protobuf/proto"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/weaveworks/common/user"

From 9767b499c7a13c16f70b0bc620a100cb08b2ab26 Mon Sep 17 00:00:00 2001
From: storyicon <storyicon@foxmail.com>
Date: Thu, 28 May 2020 13:48:16 +0800
Subject: [PATCH 526/660] cassandra-cql-fixup (#2639)

* cassandra-cql-fixup

Signed-off-by: storyicon <storyicon@foxmail.com>

* ADD-CHANGELOG

Signed-off-by: storyicon <storyicon@foxmail.com>
---
 cassandra/storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index d6662a85c44f2..650d0c13bf266 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -318,7 +318,7 @@ func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 			query.TableName), query.HashValue)
 
 	case query.ValueEqual != nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? value = ? ALLOW FILTERING",
+		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND value = ? ALLOW FILTERING",
 			query.TableName), query.HashValue, query.ValueEqual)
 	}
 

From d0b12c19db96d9ea41dd7a008405e74700f96aa0 Mon Sep 17 00:00:00 2001
From: Vineeth Pothulapati <vineethpothulapati@outlook.com>
Date: Fri, 29 May 2020 19:51:46 +0530
Subject: [PATCH 527/660] gRPC Storage Service (#2220)

* gRPC Storage Service

This PR contains gRPC service client i.e
1. Index Client
2. Storage Client
3. Table Client

Signed-off-by: vineeth <vineethpothulapati@outlook.com>

* Fixed the spaces in imports.

Signed-off-by: Vineeth <vineethpothulapati@outlook.com>

* Made changes around review comments

Signed-off-by: Vineeth Pothulapati <vineethpothulapati@outlook.com>

* Address the rpc message change from TableDesc to CreateTableRequest & added the logic to terminate connection on Stop() invocation.

Signed-off-by: Vineeth Pothulapati <vineethpothulapati@outlook.com>

* Added the chnages in CHANGELOG.md & add information around rpc calls in grpc.proto

Signed-off-by: Vineeth Pothulapati <vineethpothulapati@outlook.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 grpc/grpc.pb.go               | 6481 +++++++++++++++++++++++++++++++++
 grpc/grpc.proto               |  142 +
 grpc/grpc_client.go           |   34 +
 grpc/grpc_client_test.go      |  180 +
 grpc/grpc_server_mock_test.go |  185 +
 grpc/index_client.go          |  107 +
 grpc/storage_client.go        |  118 +
 grpc/table_client.go          |  100 +
 storage/factory.go            |   14 +-
 9 files changed, 7359 insertions(+), 2 deletions(-)
 create mode 100644 grpc/grpc.pb.go
 create mode 100644 grpc/grpc.proto
 create mode 100644 grpc/grpc_client.go
 create mode 100644 grpc/grpc_client_test.go
 create mode 100644 grpc/grpc_server_mock_test.go
 create mode 100644 grpc/index_client.go
 create mode 100644 grpc/storage_client.go
 create mode 100644 grpc/table_client.go

diff --git a/grpc/grpc.pb.go b/grpc/grpc.pb.go
new file mode 100644
index 0000000000000..9ff2df44522df
--- /dev/null
+++ b/grpc/grpc.pb.go
@@ -0,0 +1,6481 @@
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: grpc.proto
+
+package grpc
+
+import (
+	bytes "bytes"
+	context "context"
+	fmt "fmt"
+	proto "github.com/gogo/protobuf/proto"
+	github_com_gogo_protobuf_sortkeys "github.com/gogo/protobuf/sortkeys"
+	empty "github.com/golang/protobuf/ptypes/empty"
+	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
+	io "io"
+	math "math"
+	math_bits "math/bits"
+	reflect "reflect"
+	strings "strings"
+)
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
+
+type PutChunksRequest struct {
+	Chunks []*Chunk `protobuf:"bytes,1,rep,name=chunks,proto3" json:"chunks,omitempty"`
+}
+
+func (m *PutChunksRequest) Reset()      { *m = PutChunksRequest{} }
+func (*PutChunksRequest) ProtoMessage() {}
+func (*PutChunksRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{0}
+}
+func (m *PutChunksRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *PutChunksRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_PutChunksRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *PutChunksRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_PutChunksRequest.Merge(m, src)
+}
+func (m *PutChunksRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *PutChunksRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_PutChunksRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_PutChunksRequest proto.InternalMessageInfo
+
+func (m *PutChunksRequest) GetChunks() []*Chunk {
+	if m != nil {
+		return m.Chunks
+	}
+	return nil
+}
+
+type GetChunksRequest struct {
+	Chunks []*Chunk `protobuf:"bytes,1,rep,name=chunks,proto3" json:"chunks,omitempty"`
+}
+
+func (m *GetChunksRequest) Reset()      { *m = GetChunksRequest{} }
+func (*GetChunksRequest) ProtoMessage() {}
+func (*GetChunksRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{1}
+}
+func (m *GetChunksRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *GetChunksRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_GetChunksRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *GetChunksRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_GetChunksRequest.Merge(m, src)
+}
+func (m *GetChunksRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *GetChunksRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_GetChunksRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_GetChunksRequest proto.InternalMessageInfo
+
+func (m *GetChunksRequest) GetChunks() []*Chunk {
+	if m != nil {
+		return m.Chunks
+	}
+	return nil
+}
+
+type GetChunksResponse struct {
+	Chunks []*Chunk `protobuf:"bytes,1,rep,name=chunks,proto3" json:"chunks,omitempty"`
+}
+
+func (m *GetChunksResponse) Reset()      { *m = GetChunksResponse{} }
+func (*GetChunksResponse) ProtoMessage() {}
+func (*GetChunksResponse) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{2}
+}
+func (m *GetChunksResponse) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *GetChunksResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_GetChunksResponse.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *GetChunksResponse) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_GetChunksResponse.Merge(m, src)
+}
+func (m *GetChunksResponse) XXX_Size() int {
+	return m.Size()
+}
+func (m *GetChunksResponse) XXX_DiscardUnknown() {
+	xxx_messageInfo_GetChunksResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_GetChunksResponse proto.InternalMessageInfo
+
+func (m *GetChunksResponse) GetChunks() []*Chunk {
+	if m != nil {
+		return m.Chunks
+	}
+	return nil
+}
+
+type Chunk struct {
+	Encoded   []byte `protobuf:"bytes,1,opt,name=encoded,proto3" json:"encoded,omitempty"`
+	Key       string `protobuf:"bytes,2,opt,name=key,proto3" json:"key,omitempty"`
+	TableName string `protobuf:"bytes,3,opt,name=tableName,proto3" json:"tableName,omitempty"`
+}
+
+func (m *Chunk) Reset()      { *m = Chunk{} }
+func (*Chunk) ProtoMessage() {}
+func (*Chunk) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{3}
+}
+func (m *Chunk) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_Chunk.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *Chunk) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_Chunk.Merge(m, src)
+}
+func (m *Chunk) XXX_Size() int {
+	return m.Size()
+}
+func (m *Chunk) XXX_DiscardUnknown() {
+	xxx_messageInfo_Chunk.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Chunk proto.InternalMessageInfo
+
+func (m *Chunk) GetEncoded() []byte {
+	if m != nil {
+		return m.Encoded
+	}
+	return nil
+}
+
+func (m *Chunk) GetKey() string {
+	if m != nil {
+		return m.Key
+	}
+	return ""
+}
+
+func (m *Chunk) GetTableName() string {
+	if m != nil {
+		return m.TableName
+	}
+	return ""
+}
+
+type ChunkID struct {
+	ChunkID string `protobuf:"bytes,1,opt,name=chunkID,proto3" json:"chunkID,omitempty"`
+}
+
+func (m *ChunkID) Reset()      { *m = ChunkID{} }
+func (*ChunkID) ProtoMessage() {}
+func (*ChunkID) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{4}
+}
+func (m *ChunkID) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *ChunkID) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_ChunkID.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *ChunkID) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_ChunkID.Merge(m, src)
+}
+func (m *ChunkID) XXX_Size() int {
+	return m.Size()
+}
+func (m *ChunkID) XXX_DiscardUnknown() {
+	xxx_messageInfo_ChunkID.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ChunkID proto.InternalMessageInfo
+
+func (m *ChunkID) GetChunkID() string {
+	if m != nil {
+		return m.ChunkID
+	}
+	return ""
+}
+
+type DeleteTableRequest struct {
+	TableName string `protobuf:"bytes,1,opt,name=tableName,proto3" json:"tableName,omitempty"`
+}
+
+func (m *DeleteTableRequest) Reset()      { *m = DeleteTableRequest{} }
+func (*DeleteTableRequest) ProtoMessage() {}
+func (*DeleteTableRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{5}
+}
+func (m *DeleteTableRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *DeleteTableRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_DeleteTableRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *DeleteTableRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_DeleteTableRequest.Merge(m, src)
+}
+func (m *DeleteTableRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *DeleteTableRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_DeleteTableRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_DeleteTableRequest proto.InternalMessageInfo
+
+func (m *DeleteTableRequest) GetTableName() string {
+	if m != nil {
+		return m.TableName
+	}
+	return ""
+}
+
+type DescribeTableRequest struct {
+	TableName string `protobuf:"bytes,1,opt,name=tableName,proto3" json:"tableName,omitempty"`
+}
+
+func (m *DescribeTableRequest) Reset()      { *m = DescribeTableRequest{} }
+func (*DescribeTableRequest) ProtoMessage() {}
+func (*DescribeTableRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{6}
+}
+func (m *DescribeTableRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *DescribeTableRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_DescribeTableRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *DescribeTableRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_DescribeTableRequest.Merge(m, src)
+}
+func (m *DescribeTableRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *DescribeTableRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_DescribeTableRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_DescribeTableRequest proto.InternalMessageInfo
+
+func (m *DescribeTableRequest) GetTableName() string {
+	if m != nil {
+		return m.TableName
+	}
+	return ""
+}
+
+type WriteBatch struct {
+	Writes  []*IndexEntry `protobuf:"bytes,1,rep,name=writes,proto3" json:"writes,omitempty"`
+	Deletes []*IndexEntry `protobuf:"bytes,2,rep,name=deletes,proto3" json:"deletes,omitempty"`
+}
+
+func (m *WriteBatch) Reset()      { *m = WriteBatch{} }
+func (*WriteBatch) ProtoMessage() {}
+func (*WriteBatch) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{7}
+}
+func (m *WriteBatch) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *WriteBatch) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_WriteBatch.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *WriteBatch) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_WriteBatch.Merge(m, src)
+}
+func (m *WriteBatch) XXX_Size() int {
+	return m.Size()
+}
+func (m *WriteBatch) XXX_DiscardUnknown() {
+	xxx_messageInfo_WriteBatch.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_WriteBatch proto.InternalMessageInfo
+
+func (m *WriteBatch) GetWrites() []*IndexEntry {
+	if m != nil {
+		return m.Writes
+	}
+	return nil
+}
+
+func (m *WriteBatch) GetDeletes() []*IndexEntry {
+	if m != nil {
+		return m.Deletes
+	}
+	return nil
+}
+
+type WriteIndexRequest struct {
+	Writes []*IndexEntry `protobuf:"bytes,1,rep,name=writes,proto3" json:"writes,omitempty"`
+}
+
+func (m *WriteIndexRequest) Reset()      { *m = WriteIndexRequest{} }
+func (*WriteIndexRequest) ProtoMessage() {}
+func (*WriteIndexRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{8}
+}
+func (m *WriteIndexRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *WriteIndexRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_WriteIndexRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *WriteIndexRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_WriteIndexRequest.Merge(m, src)
+}
+func (m *WriteIndexRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *WriteIndexRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_WriteIndexRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_WriteIndexRequest proto.InternalMessageInfo
+
+func (m *WriteIndexRequest) GetWrites() []*IndexEntry {
+	if m != nil {
+		return m.Writes
+	}
+	return nil
+}
+
+type DeleteIndexRequest struct {
+	Deletes []*IndexEntry `protobuf:"bytes,1,rep,name=deletes,proto3" json:"deletes,omitempty"`
+}
+
+func (m *DeleteIndexRequest) Reset()      { *m = DeleteIndexRequest{} }
+func (*DeleteIndexRequest) ProtoMessage() {}
+func (*DeleteIndexRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{9}
+}
+func (m *DeleteIndexRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *DeleteIndexRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_DeleteIndexRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *DeleteIndexRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_DeleteIndexRequest.Merge(m, src)
+}
+func (m *DeleteIndexRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *DeleteIndexRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_DeleteIndexRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_DeleteIndexRequest proto.InternalMessageInfo
+
+func (m *DeleteIndexRequest) GetDeletes() []*IndexEntry {
+	if m != nil {
+		return m.Deletes
+	}
+	return nil
+}
+
+type QueryIndexResponse struct {
+	Rows []*Row `protobuf:"bytes,1,rep,name=rows,proto3" json:"rows,omitempty"`
+}
+
+func (m *QueryIndexResponse) Reset()      { *m = QueryIndexResponse{} }
+func (*QueryIndexResponse) ProtoMessage() {}
+func (*QueryIndexResponse) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{10}
+}
+func (m *QueryIndexResponse) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *QueryIndexResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_QueryIndexResponse.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *QueryIndexResponse) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_QueryIndexResponse.Merge(m, src)
+}
+func (m *QueryIndexResponse) XXX_Size() int {
+	return m.Size()
+}
+func (m *QueryIndexResponse) XXX_DiscardUnknown() {
+	xxx_messageInfo_QueryIndexResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_QueryIndexResponse proto.InternalMessageInfo
+
+func (m *QueryIndexResponse) GetRows() []*Row {
+	if m != nil {
+		return m.Rows
+	}
+	return nil
+}
+
+type Row struct {
+	RangeValue []byte `protobuf:"bytes,1,opt,name=rangeValue,proto3" json:"rangeValue,omitempty"`
+	Value      []byte `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
+}
+
+func (m *Row) Reset()      { *m = Row{} }
+func (*Row) ProtoMessage() {}
+func (*Row) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{11}
+}
+func (m *Row) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *Row) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_Row.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *Row) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_Row.Merge(m, src)
+}
+func (m *Row) XXX_Size() int {
+	return m.Size()
+}
+func (m *Row) XXX_DiscardUnknown() {
+	xxx_messageInfo_Row.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Row proto.InternalMessageInfo
+
+func (m *Row) GetRangeValue() []byte {
+	if m != nil {
+		return m.RangeValue
+	}
+	return nil
+}
+
+func (m *Row) GetValue() []byte {
+	if m != nil {
+		return m.Value
+	}
+	return nil
+}
+
+type IndexEntry struct {
+	TableName  string `protobuf:"bytes,1,opt,name=tableName,proto3" json:"tableName,omitempty"`
+	HashValue  string `protobuf:"bytes,2,opt,name=hashValue,proto3" json:"hashValue,omitempty"`
+	RangeValue []byte `protobuf:"bytes,3,opt,name=rangeValue,proto3" json:"rangeValue,omitempty"`
+	Value      []byte `protobuf:"bytes,4,opt,name=value,proto3" json:"value,omitempty"`
+}
+
+func (m *IndexEntry) Reset()      { *m = IndexEntry{} }
+func (*IndexEntry) ProtoMessage() {}
+func (*IndexEntry) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{12}
+}
+func (m *IndexEntry) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *IndexEntry) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_IndexEntry.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *IndexEntry) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_IndexEntry.Merge(m, src)
+}
+func (m *IndexEntry) XXX_Size() int {
+	return m.Size()
+}
+func (m *IndexEntry) XXX_DiscardUnknown() {
+	xxx_messageInfo_IndexEntry.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_IndexEntry proto.InternalMessageInfo
+
+func (m *IndexEntry) GetTableName() string {
+	if m != nil {
+		return m.TableName
+	}
+	return ""
+}
+
+func (m *IndexEntry) GetHashValue() string {
+	if m != nil {
+		return m.HashValue
+	}
+	return ""
+}
+
+func (m *IndexEntry) GetRangeValue() []byte {
+	if m != nil {
+		return m.RangeValue
+	}
+	return nil
+}
+
+func (m *IndexEntry) GetValue() []byte {
+	if m != nil {
+		return m.Value
+	}
+	return nil
+}
+
+type QueryIndexRequest struct {
+	TableName        string `protobuf:"bytes,1,opt,name=tableName,proto3" json:"tableName,omitempty"`
+	HashValue        string `protobuf:"bytes,2,opt,name=hashValue,proto3" json:"hashValue,omitempty"`
+	RangeValuePrefix []byte `protobuf:"bytes,3,opt,name=rangeValuePrefix,proto3" json:"rangeValuePrefix,omitempty"`
+	RangeValueStart  []byte `protobuf:"bytes,4,opt,name=rangeValueStart,proto3" json:"rangeValueStart,omitempty"`
+	ValueEqual       []byte `protobuf:"bytes,5,opt,name=valueEqual,proto3" json:"valueEqual,omitempty"`
+	Immutable        bool   `protobuf:"varint,6,opt,name=immutable,proto3" json:"immutable,omitempty"`
+}
+
+func (m *QueryIndexRequest) Reset()      { *m = QueryIndexRequest{} }
+func (*QueryIndexRequest) ProtoMessage() {}
+func (*QueryIndexRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{13}
+}
+func (m *QueryIndexRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *QueryIndexRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_QueryIndexRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *QueryIndexRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_QueryIndexRequest.Merge(m, src)
+}
+func (m *QueryIndexRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *QueryIndexRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_QueryIndexRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_QueryIndexRequest proto.InternalMessageInfo
+
+func (m *QueryIndexRequest) GetTableName() string {
+	if m != nil {
+		return m.TableName
+	}
+	return ""
+}
+
+func (m *QueryIndexRequest) GetHashValue() string {
+	if m != nil {
+		return m.HashValue
+	}
+	return ""
+}
+
+func (m *QueryIndexRequest) GetRangeValuePrefix() []byte {
+	if m != nil {
+		return m.RangeValuePrefix
+	}
+	return nil
+}
+
+func (m *QueryIndexRequest) GetRangeValueStart() []byte {
+	if m != nil {
+		return m.RangeValueStart
+	}
+	return nil
+}
+
+func (m *QueryIndexRequest) GetValueEqual() []byte {
+	if m != nil {
+		return m.ValueEqual
+	}
+	return nil
+}
+
+func (m *QueryIndexRequest) GetImmutable() bool {
+	if m != nil {
+		return m.Immutable
+	}
+	return false
+}
+
+type UpdateTableRequest struct {
+	Current  *TableDesc `protobuf:"bytes,1,opt,name=current,proto3" json:"current,omitempty"`
+	Expected *TableDesc `protobuf:"bytes,2,opt,name=expected,proto3" json:"expected,omitempty"`
+}
+
+func (m *UpdateTableRequest) Reset()      { *m = UpdateTableRequest{} }
+func (*UpdateTableRequest) ProtoMessage() {}
+func (*UpdateTableRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{14}
+}
+func (m *UpdateTableRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *UpdateTableRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_UpdateTableRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *UpdateTableRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_UpdateTableRequest.Merge(m, src)
+}
+func (m *UpdateTableRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *UpdateTableRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_UpdateTableRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_UpdateTableRequest proto.InternalMessageInfo
+
+func (m *UpdateTableRequest) GetCurrent() *TableDesc {
+	if m != nil {
+		return m.Current
+	}
+	return nil
+}
+
+func (m *UpdateTableRequest) GetExpected() *TableDesc {
+	if m != nil {
+		return m.Expected
+	}
+	return nil
+}
+
+type DescribeTableResponse struct {
+	Desc     *TableDesc `protobuf:"bytes,1,opt,name=desc,proto3" json:"desc,omitempty"`
+	IsActive bool       `protobuf:"varint,2,opt,name=isActive,proto3" json:"isActive,omitempty"`
+}
+
+func (m *DescribeTableResponse) Reset()      { *m = DescribeTableResponse{} }
+func (*DescribeTableResponse) ProtoMessage() {}
+func (*DescribeTableResponse) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{15}
+}
+func (m *DescribeTableResponse) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *DescribeTableResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_DescribeTableResponse.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *DescribeTableResponse) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_DescribeTableResponse.Merge(m, src)
+}
+func (m *DescribeTableResponse) XXX_Size() int {
+	return m.Size()
+}
+func (m *DescribeTableResponse) XXX_DiscardUnknown() {
+	xxx_messageInfo_DescribeTableResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_DescribeTableResponse proto.InternalMessageInfo
+
+func (m *DescribeTableResponse) GetDesc() *TableDesc {
+	if m != nil {
+		return m.Desc
+	}
+	return nil
+}
+
+func (m *DescribeTableResponse) GetIsActive() bool {
+	if m != nil {
+		return m.IsActive
+	}
+	return false
+}
+
+type CreateTableRequest struct {
+	Desc *TableDesc `protobuf:"bytes,1,opt,name=desc,proto3" json:"desc,omitempty"`
+}
+
+func (m *CreateTableRequest) Reset()      { *m = CreateTableRequest{} }
+func (*CreateTableRequest) ProtoMessage() {}
+func (*CreateTableRequest) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{16}
+}
+func (m *CreateTableRequest) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *CreateTableRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_CreateTableRequest.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *CreateTableRequest) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_CreateTableRequest.Merge(m, src)
+}
+func (m *CreateTableRequest) XXX_Size() int {
+	return m.Size()
+}
+func (m *CreateTableRequest) XXX_DiscardUnknown() {
+	xxx_messageInfo_CreateTableRequest.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_CreateTableRequest proto.InternalMessageInfo
+
+func (m *CreateTableRequest) GetDesc() *TableDesc {
+	if m != nil {
+		return m.Desc
+	}
+	return nil
+}
+
+type TableDesc struct {
+	Name              string            `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+	UseOnDemandIOMode bool              `protobuf:"varint,2,opt,name=useOnDemandIOMode,proto3" json:"useOnDemandIOMode,omitempty"`
+	ProvisionedRead   int64             `protobuf:"varint,3,opt,name=provisionedRead,proto3" json:"provisionedRead,omitempty"`
+	ProvisionedWrite  int64             `protobuf:"varint,4,opt,name=provisionedWrite,proto3" json:"provisionedWrite,omitempty"`
+	Tags              map[string]string `protobuf:"bytes,5,rep,name=tags,proto3" json:"tags,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"`
+}
+
+func (m *TableDesc) Reset()      { *m = TableDesc{} }
+func (*TableDesc) ProtoMessage() {}
+func (*TableDesc) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{17}
+}
+func (m *TableDesc) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *TableDesc) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_TableDesc.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *TableDesc) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_TableDesc.Merge(m, src)
+}
+func (m *TableDesc) XXX_Size() int {
+	return m.Size()
+}
+func (m *TableDesc) XXX_DiscardUnknown() {
+	xxx_messageInfo_TableDesc.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_TableDesc proto.InternalMessageInfo
+
+func (m *TableDesc) GetName() string {
+	if m != nil {
+		return m.Name
+	}
+	return ""
+}
+
+func (m *TableDesc) GetUseOnDemandIOMode() bool {
+	if m != nil {
+		return m.UseOnDemandIOMode
+	}
+	return false
+}
+
+func (m *TableDesc) GetProvisionedRead() int64 {
+	if m != nil {
+		return m.ProvisionedRead
+	}
+	return 0
+}
+
+func (m *TableDesc) GetProvisionedWrite() int64 {
+	if m != nil {
+		return m.ProvisionedWrite
+	}
+	return 0
+}
+
+func (m *TableDesc) GetTags() map[string]string {
+	if m != nil {
+		return m.Tags
+	}
+	return nil
+}
+
+type ListTablesResponse struct {
+	TableNames []string `protobuf:"bytes,1,rep,name=tableNames,proto3" json:"tableNames,omitempty"`
+}
+
+func (m *ListTablesResponse) Reset()      { *m = ListTablesResponse{} }
+func (*ListTablesResponse) ProtoMessage() {}
+func (*ListTablesResponse) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{18}
+}
+func (m *ListTablesResponse) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *ListTablesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_ListTablesResponse.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *ListTablesResponse) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_ListTablesResponse.Merge(m, src)
+}
+func (m *ListTablesResponse) XXX_Size() int {
+	return m.Size()
+}
+func (m *ListTablesResponse) XXX_DiscardUnknown() {
+	xxx_messageInfo_ListTablesResponse.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_ListTablesResponse proto.InternalMessageInfo
+
+func (m *ListTablesResponse) GetTableNames() []string {
+	if m != nil {
+		return m.TableNames
+	}
+	return nil
+}
+
+type Labels struct {
+	Name  string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"`
+	Value string `protobuf:"bytes,2,opt,name=value,proto3" json:"value,omitempty"`
+}
+
+func (m *Labels) Reset()      { *m = Labels{} }
+func (*Labels) ProtoMessage() {}
+func (*Labels) Descriptor() ([]byte, []int) {
+	return fileDescriptor_bedfbfc9b54e5600, []int{19}
+}
+func (m *Labels) XXX_Unmarshal(b []byte) error {
+	return m.Unmarshal(b)
+}
+func (m *Labels) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
+	if deterministic {
+		return xxx_messageInfo_Labels.Marshal(b, m, deterministic)
+	} else {
+		b = b[:cap(b)]
+		n, err := m.MarshalToSizedBuffer(b)
+		if err != nil {
+			return nil, err
+		}
+		return b[:n], nil
+	}
+}
+func (m *Labels) XXX_Merge(src proto.Message) {
+	xxx_messageInfo_Labels.Merge(m, src)
+}
+func (m *Labels) XXX_Size() int {
+	return m.Size()
+}
+func (m *Labels) XXX_DiscardUnknown() {
+	xxx_messageInfo_Labels.DiscardUnknown(m)
+}
+
+var xxx_messageInfo_Labels proto.InternalMessageInfo
+
+func (m *Labels) GetName() string {
+	if m != nil {
+		return m.Name
+	}
+	return ""
+}
+
+func (m *Labels) GetValue() string {
+	if m != nil {
+		return m.Value
+	}
+	return ""
+}
+
+func init() {
+	proto.RegisterType((*PutChunksRequest)(nil), "grpc.PutChunksRequest")
+	proto.RegisterType((*GetChunksRequest)(nil), "grpc.GetChunksRequest")
+	proto.RegisterType((*GetChunksResponse)(nil), "grpc.GetChunksResponse")
+	proto.RegisterType((*Chunk)(nil), "grpc.Chunk")
+	proto.RegisterType((*ChunkID)(nil), "grpc.ChunkID")
+	proto.RegisterType((*DeleteTableRequest)(nil), "grpc.DeleteTableRequest")
+	proto.RegisterType((*DescribeTableRequest)(nil), "grpc.DescribeTableRequest")
+	proto.RegisterType((*WriteBatch)(nil), "grpc.WriteBatch")
+	proto.RegisterType((*WriteIndexRequest)(nil), "grpc.WriteIndexRequest")
+	proto.RegisterType((*DeleteIndexRequest)(nil), "grpc.DeleteIndexRequest")
+	proto.RegisterType((*QueryIndexResponse)(nil), "grpc.QueryIndexResponse")
+	proto.RegisterType((*Row)(nil), "grpc.Row")
+	proto.RegisterType((*IndexEntry)(nil), "grpc.IndexEntry")
+	proto.RegisterType((*QueryIndexRequest)(nil), "grpc.QueryIndexRequest")
+	proto.RegisterType((*UpdateTableRequest)(nil), "grpc.UpdateTableRequest")
+	proto.RegisterType((*DescribeTableResponse)(nil), "grpc.DescribeTableResponse")
+	proto.RegisterType((*CreateTableRequest)(nil), "grpc.CreateTableRequest")
+	proto.RegisterType((*TableDesc)(nil), "grpc.TableDesc")
+	proto.RegisterMapType((map[string]string)(nil), "grpc.TableDesc.TagsEntry")
+	proto.RegisterType((*ListTablesResponse)(nil), "grpc.ListTablesResponse")
+	proto.RegisterType((*Labels)(nil), "grpc.Labels")
+}
+
+func init() { proto.RegisterFile("grpc.proto", fileDescriptor_bedfbfc9b54e5600) }
+
+var fileDescriptor_bedfbfc9b54e5600 = []byte{
+	// 921 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0x4f, 0x93, 0xd4, 0x44,
+	0x14, 0x9f, 0x9e, 0x7f, 0xbb, 0x79, 0x03, 0xc5, 0x6e, 0x17, 0x42, 0x0c, 0x9a, 0xda, 0x0a, 0x97,
+	0x11, 0x75, 0xb0, 0x86, 0xb5, 0x40, 0x29, 0x44, 0x60, 0xb6, 0x74, 0xab, 0x50, 0xa0, 0x45, 0xf4,
+	0x66, 0x65, 0x92, 0xc7, 0x6c, 0x8a, 0x99, 0x64, 0x48, 0x3a, 0xfb, 0xe7, 0x62, 0x79, 0xf7, 0xe2,
+	0xc7, 0xf0, 0xa3, 0x78, 0xdc, 0x23, 0x47, 0x77, 0xf6, 0xe2, 0x91, 0x8f, 0x60, 0xa5, 0x3b, 0x9d,
+	0x64, 0x92, 0x09, 0xbb, 0x7a, 0xeb, 0xfe, 0xbd, 0x7f, 0xbf, 0xd7, 0xaf, 0xdf, 0x7b, 0x00, 0x93,
+	0x70, 0xee, 0x0c, 0xe6, 0x61, 0xc0, 0x03, 0xda, 0x4e, 0xce, 0xc6, 0xb5, 0x49, 0x10, 0x4c, 0xa6,
+	0x78, 0x53, 0x60, 0xe3, 0xf8, 0xe5, 0x4d, 0x9c, 0xcd, 0xf9, 0x91, 0x54, 0xb1, 0x6e, 0xc3, 0xc6,
+	0xd3, 0x98, 0x3f, 0xda, 0x8b, 0xfd, 0x57, 0x11, 0xc3, 0xd7, 0x31, 0x46, 0x9c, 0x5e, 0x87, 0xae,
+	0x23, 0x00, 0x9d, 0x6c, 0xb5, 0xfa, 0xbd, 0x61, 0x6f, 0x20, 0x7c, 0x0a, 0x25, 0x96, 0x8a, 0x12,
+	0xc3, 0x6f, 0xf0, 0xff, 0x18, 0xde, 0x81, 0xcd, 0x82, 0x61, 0x34, 0x0f, 0xfc, 0x08, 0xcf, 0x67,
+	0xf9, 0x0c, 0x3a, 0x02, 0xa0, 0x3a, 0xac, 0xa1, 0xef, 0x04, 0x2e, 0xba, 0x3a, 0xd9, 0x22, 0xfd,
+	0x0b, 0x4c, 0x5d, 0xe9, 0x06, 0xb4, 0x5e, 0xe1, 0x91, 0xde, 0xdc, 0x22, 0x7d, 0x8d, 0x25, 0x47,
+	0xfa, 0x01, 0x68, 0xdc, 0x1e, 0x4f, 0xf1, 0x7b, 0x7b, 0x86, 0x7a, 0x4b, 0xe0, 0x39, 0x60, 0x5d,
+	0x87, 0x35, 0xe1, 0x72, 0x77, 0x94, 0x38, 0x75, 0xe4, 0x51, 0x38, 0xd5, 0x98, 0xba, 0x5a, 0x43,
+	0xa0, 0x23, 0x9c, 0x22, 0xc7, 0xe7, 0x89, 0x9d, 0x4a, 0x76, 0xc9, 0x31, 0x29, 0x3b, 0xde, 0x86,
+	0xcb, 0x23, 0x8c, 0x9c, 0xd0, 0x1b, 0xff, 0x17, 0xab, 0x31, 0xc0, 0x4f, 0xa1, 0xc7, 0xf1, 0xa1,
+	0xcd, 0x9d, 0x3d, 0xda, 0x87, 0xee, 0x41, 0x72, 0x53, 0x8f, 0xb2, 0x21, 0x1f, 0x65, 0xd7, 0x77,
+	0xf1, 0x70, 0xc7, 0xe7, 0xe1, 0x11, 0x4b, 0xe5, 0xf4, 0x06, 0xac, 0xb9, 0x82, 0x61, 0xa4, 0x37,
+	0x6b, 0x54, 0x95, 0x82, 0x75, 0x0f, 0x36, 0x45, 0x0c, 0x21, 0x53, 0xb4, 0xce, 0x1d, 0xca, 0xfa,
+	0x5a, 0x3d, 0xc6, 0x92, 0x7d, 0x81, 0x00, 0x39, 0x8b, 0xc0, 0x2d, 0xa0, 0xcf, 0x62, 0x0c, 0x8f,
+	0x52, 0x07, 0xe9, 0x0f, 0xf8, 0x10, 0xda, 0x61, 0x70, 0xa0, 0xcc, 0x35, 0x69, 0xce, 0x82, 0x03,
+	0x26, 0x60, 0xeb, 0x2e, 0xb4, 0x58, 0x70, 0x40, 0x4d, 0x80, 0xd0, 0xf6, 0x27, 0xf8, 0xc2, 0x9e,
+	0xc6, 0x98, 0x16, 0xbf, 0x80, 0xd0, 0xcb, 0xd0, 0xd9, 0x17, 0xa2, 0xa6, 0x10, 0xc9, 0x8b, 0xf5,
+	0x2b, 0x40, 0x4e, 0xe4, 0xdd, 0x25, 0x48, 0xa4, 0x7b, 0x76, 0xb4, 0xf7, 0x22, 0xf3, 0xa2, 0xb1,
+	0x1c, 0x28, 0xc5, 0x6f, 0xd5, 0xc7, 0x6f, 0x17, 0xe3, 0x9f, 0x12, 0xd8, 0x2c, 0xa6, 0x7c, 0x8e,
+	0xaf, 0x70, 0x06, 0x8f, 0x1b, 0xb0, 0x91, 0x47, 0x7d, 0x1a, 0xe2, 0x4b, 0xef, 0x30, 0x65, 0x53,
+	0xc1, 0x69, 0x1f, 0x2e, 0xe5, 0xd8, 0x0f, 0xdc, 0x0e, 0x79, 0xca, 0xae, 0x0c, 0x27, 0xd9, 0x09,
+	0xc2, 0x3b, 0xaf, 0x63, 0x7b, 0xaa, 0x77, 0x64, 0x76, 0x39, 0x92, 0x70, 0xf2, 0x66, 0xb3, 0x58,
+	0x90, 0xd4, 0xbb, 0x5b, 0xa4, 0xbf, 0xce, 0x72, 0xc0, 0x9a, 0x02, 0xfd, 0x71, 0xee, 0xda, 0xa5,
+	0x36, 0xf9, 0x08, 0xd6, 0x9c, 0x38, 0x0c, 0xd1, 0xe7, 0x22, 0xc7, 0xde, 0xf0, 0x92, 0x2c, 0xad,
+	0x50, 0x4a, 0x5a, 0x84, 0x29, 0x39, 0xfd, 0x18, 0xd6, 0xf1, 0x70, 0x8e, 0x0e, 0x47, 0x57, 0x64,
+	0xbc, 0x42, 0x37, 0x53, 0xb0, 0x7e, 0x86, 0xf7, 0x4a, 0x0d, 0x96, 0x8d, 0x92, 0xb6, 0x8b, 0x91,
+	0x53, 0x17, 0x4d, 0x08, 0xa9, 0x01, 0xeb, 0x5e, 0xf4, 0xc0, 0xe1, 0xde, 0xbe, 0x7c, 0xdc, 0x75,
+	0x96, 0xdd, 0xad, 0x2f, 0x80, 0x3e, 0x0a, 0xb1, 0x9c, 0xc7, 0x79, 0xdc, 0x5a, 0xbf, 0x37, 0x41,
+	0xcb, 0x30, 0x4a, 0xa1, 0xed, 0xe7, 0xb5, 0x15, 0x67, 0xfa, 0x09, 0x6c, 0xc6, 0x11, 0x3e, 0xf1,
+	0x47, 0x38, 0xb3, 0x7d, 0x77, 0xf7, 0xc9, 0x77, 0x81, 0xab, 0x18, 0x54, 0x05, 0x49, 0xe9, 0xe6,
+	0x61, 0xb0, 0xef, 0x45, 0x5e, 0xe0, 0xa3, 0xcb, 0xd0, 0x76, 0x45, 0x95, 0x5b, 0xac, 0x0c, 0x27,
+	0x1f, 0xa2, 0x00, 0x89, 0x06, 0x17, 0x55, 0x6e, 0xb1, 0x0a, 0x4e, 0x3f, 0x85, 0x36, 0xb7, 0x27,
+	0x91, 0xde, 0x11, 0xad, 0xf6, 0x7e, 0x29, 0x95, 0xc1, 0x73, 0x7b, 0x12, 0xc9, 0x96, 0x15, 0x6a,
+	0xc6, 0xed, 0x24, 0xa7, 0x14, 0x52, 0x03, 0x96, 0xe4, 0x03, 0x76, 0xa9, 0xe5, 0xb4, 0xf4, 0xcb,
+	0x7f, 0xd9, 0xbc, 0x43, 0xac, 0x6d, 0xa0, 0x8f, 0xbd, 0x88, 0x0b, 0xcf, 0xf9, 0xa8, 0x37, 0x01,
+	0xb2, 0x5f, 0x2e, 0xdb, 0x5d, 0x63, 0x05, 0xc4, 0x1a, 0x42, 0xf7, 0xb1, 0x3d, 0xc6, 0x69, 0xb4,
+	0xf2, 0xfd, 0x56, 0x46, 0x1b, 0x1e, 0x77, 0xe4, 0xde, 0xfb, 0x25, 0xe2, 0x41, 0x88, 0xf4, 0x5e,
+	0x3a, 0x46, 0x45, 0xbb, 0xd1, 0xab, 0x32, 0xc1, 0xca, 0xd0, 0x33, 0xae, 0x0c, 0xe4, 0x66, 0x1c,
+	0xa8, 0xcd, 0x38, 0xd8, 0x49, 0x36, 0x23, 0x7d, 0x00, 0x90, 0x77, 0xab, 0x32, 0xaf, 0xf4, 0xaf,
+	0xa1, 0x57, 0x05, 0x32, 0xc5, 0xcf, 0x08, 0xbd, 0x0f, 0xbd, 0xc2, 0x94, 0xa4, 0xa9, 0x6a, 0x75,
+	0x70, 0xd6, 0x72, 0xb8, 0x0b, 0x5a, 0xb6, 0x97, 0xe9, 0x15, 0x69, 0x5e, 0x5e, 0xd4, 0xb5, 0xc6,
+	0x5f, 0x81, 0x96, 0xad, 0x58, 0x65, 0x5c, 0x5e, 0xd6, 0xc6, 0xd5, 0x0a, 0x9e, 0xb1, 0xff, 0x1c,
+	0x2e, 0x48, 0xaa, 0xa9, 0x8b, 0x8b, 0x85, 0x6d, 0xbc, 0x3b, 0x7a, 0x47, 0x58, 0xc8, 0xeb, 0x4d,
+	0x6b, 0xb4, 0xd4, 0xb3, 0xad, 0xf8, 0x19, 0xf7, 0xa1, 0x57, 0x68, 0x3c, 0xf5, 0x68, 0xd5, 0x5e,
+	0xac, 0x25, 0x90, 0xbd, 0xfa, 0x92, 0x83, 0xea, 0xee, 0xae, 0x75, 0xf0, 0x2d, 0x5c, 0x5c, 0x1a,
+	0x2a, 0xd4, 0x50, 0x2e, 0xaa, 0xab, 0xdc, 0xb8, 0xb6, 0x52, 0x96, 0xe7, 0x52, 0x18, 0x86, 0x8a,
+	0x4a, 0x75, 0x3e, 0xd6, 0x51, 0x79, 0xb8, 0x7d, 0x7c, 0x62, 0x36, 0xde, 0x9c, 0x98, 0x8d, 0xb7,
+	0x27, 0x26, 0xf9, 0x6d, 0x61, 0x92, 0x3f, 0x17, 0x26, 0xf9, 0x6b, 0x61, 0x92, 0xe3, 0x85, 0x49,
+	0xfe, 0x5e, 0x98, 0xe4, 0x9f, 0x85, 0xd9, 0x78, 0xbb, 0x30, 0xc9, 0x1f, 0xa7, 0x66, 0xe3, 0xf8,
+	0xd4, 0x6c, 0xbc, 0x39, 0x35, 0x1b, 0xe3, 0xae, 0xf0, 0x72, 0xeb, 0xdf, 0x00, 0x00, 0x00, 0xff,
+	0xff, 0x42, 0x8c, 0xec, 0xe3, 0x06, 0x0a, 0x00, 0x00,
+}
+
+func (this *PutChunksRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*PutChunksRequest)
+	if !ok {
+		that2, ok := that.(PutChunksRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Chunks) != len(that1.Chunks) {
+		return false
+	}
+	for i := range this.Chunks {
+		if !this.Chunks[i].Equal(that1.Chunks[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *GetChunksRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*GetChunksRequest)
+	if !ok {
+		that2, ok := that.(GetChunksRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Chunks) != len(that1.Chunks) {
+		return false
+	}
+	for i := range this.Chunks {
+		if !this.Chunks[i].Equal(that1.Chunks[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *GetChunksResponse) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*GetChunksResponse)
+	if !ok {
+		that2, ok := that.(GetChunksResponse)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Chunks) != len(that1.Chunks) {
+		return false
+	}
+	for i := range this.Chunks {
+		if !this.Chunks[i].Equal(that1.Chunks[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *Chunk) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Chunk)
+	if !ok {
+		that2, ok := that.(Chunk)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !bytes.Equal(this.Encoded, that1.Encoded) {
+		return false
+	}
+	if this.Key != that1.Key {
+		return false
+	}
+	if this.TableName != that1.TableName {
+		return false
+	}
+	return true
+}
+func (this *ChunkID) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ChunkID)
+	if !ok {
+		that2, ok := that.(ChunkID)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.ChunkID != that1.ChunkID {
+		return false
+	}
+	return true
+}
+func (this *DeleteTableRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*DeleteTableRequest)
+	if !ok {
+		that2, ok := that.(DeleteTableRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.TableName != that1.TableName {
+		return false
+	}
+	return true
+}
+func (this *DescribeTableRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*DescribeTableRequest)
+	if !ok {
+		that2, ok := that.(DescribeTableRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.TableName != that1.TableName {
+		return false
+	}
+	return true
+}
+func (this *WriteBatch) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*WriteBatch)
+	if !ok {
+		that2, ok := that.(WriteBatch)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Writes) != len(that1.Writes) {
+		return false
+	}
+	for i := range this.Writes {
+		if !this.Writes[i].Equal(that1.Writes[i]) {
+			return false
+		}
+	}
+	if len(this.Deletes) != len(that1.Deletes) {
+		return false
+	}
+	for i := range this.Deletes {
+		if !this.Deletes[i].Equal(that1.Deletes[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *WriteIndexRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*WriteIndexRequest)
+	if !ok {
+		that2, ok := that.(WriteIndexRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Writes) != len(that1.Writes) {
+		return false
+	}
+	for i := range this.Writes {
+		if !this.Writes[i].Equal(that1.Writes[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *DeleteIndexRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*DeleteIndexRequest)
+	if !ok {
+		that2, ok := that.(DeleteIndexRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Deletes) != len(that1.Deletes) {
+		return false
+	}
+	for i := range this.Deletes {
+		if !this.Deletes[i].Equal(that1.Deletes[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *QueryIndexResponse) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*QueryIndexResponse)
+	if !ok {
+		that2, ok := that.(QueryIndexResponse)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.Rows) != len(that1.Rows) {
+		return false
+	}
+	for i := range this.Rows {
+		if !this.Rows[i].Equal(that1.Rows[i]) {
+			return false
+		}
+	}
+	return true
+}
+func (this *Row) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Row)
+	if !ok {
+		that2, ok := that.(Row)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !bytes.Equal(this.RangeValue, that1.RangeValue) {
+		return false
+	}
+	if !bytes.Equal(this.Value, that1.Value) {
+		return false
+	}
+	return true
+}
+func (this *IndexEntry) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*IndexEntry)
+	if !ok {
+		that2, ok := that.(IndexEntry)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.TableName != that1.TableName {
+		return false
+	}
+	if this.HashValue != that1.HashValue {
+		return false
+	}
+	if !bytes.Equal(this.RangeValue, that1.RangeValue) {
+		return false
+	}
+	if !bytes.Equal(this.Value, that1.Value) {
+		return false
+	}
+	return true
+}
+func (this *QueryIndexRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*QueryIndexRequest)
+	if !ok {
+		that2, ok := that.(QueryIndexRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.TableName != that1.TableName {
+		return false
+	}
+	if this.HashValue != that1.HashValue {
+		return false
+	}
+	if !bytes.Equal(this.RangeValuePrefix, that1.RangeValuePrefix) {
+		return false
+	}
+	if !bytes.Equal(this.RangeValueStart, that1.RangeValueStart) {
+		return false
+	}
+	if !bytes.Equal(this.ValueEqual, that1.ValueEqual) {
+		return false
+	}
+	if this.Immutable != that1.Immutable {
+		return false
+	}
+	return true
+}
+func (this *UpdateTableRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*UpdateTableRequest)
+	if !ok {
+		that2, ok := that.(UpdateTableRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !this.Current.Equal(that1.Current) {
+		return false
+	}
+	if !this.Expected.Equal(that1.Expected) {
+		return false
+	}
+	return true
+}
+func (this *DescribeTableResponse) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*DescribeTableResponse)
+	if !ok {
+		that2, ok := that.(DescribeTableResponse)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !this.Desc.Equal(that1.Desc) {
+		return false
+	}
+	if this.IsActive != that1.IsActive {
+		return false
+	}
+	return true
+}
+func (this *CreateTableRequest) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*CreateTableRequest)
+	if !ok {
+		that2, ok := that.(CreateTableRequest)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if !this.Desc.Equal(that1.Desc) {
+		return false
+	}
+	return true
+}
+func (this *TableDesc) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*TableDesc)
+	if !ok {
+		that2, ok := that.(TableDesc)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.Name != that1.Name {
+		return false
+	}
+	if this.UseOnDemandIOMode != that1.UseOnDemandIOMode {
+		return false
+	}
+	if this.ProvisionedRead != that1.ProvisionedRead {
+		return false
+	}
+	if this.ProvisionedWrite != that1.ProvisionedWrite {
+		return false
+	}
+	if len(this.Tags) != len(that1.Tags) {
+		return false
+	}
+	for i := range this.Tags {
+		if this.Tags[i] != that1.Tags[i] {
+			return false
+		}
+	}
+	return true
+}
+func (this *ListTablesResponse) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*ListTablesResponse)
+	if !ok {
+		that2, ok := that.(ListTablesResponse)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if len(this.TableNames) != len(that1.TableNames) {
+		return false
+	}
+	for i := range this.TableNames {
+		if this.TableNames[i] != that1.TableNames[i] {
+			return false
+		}
+	}
+	return true
+}
+func (this *Labels) Equal(that interface{}) bool {
+	if that == nil {
+		return this == nil
+	}
+
+	that1, ok := that.(*Labels)
+	if !ok {
+		that2, ok := that.(Labels)
+		if ok {
+			that1 = &that2
+		} else {
+			return false
+		}
+	}
+	if that1 == nil {
+		return this == nil
+	} else if this == nil {
+		return false
+	}
+	if this.Name != that1.Name {
+		return false
+	}
+	if this.Value != that1.Value {
+		return false
+	}
+	return true
+}
+func (this *PutChunksRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.PutChunksRequest{")
+	if this.Chunks != nil {
+		s = append(s, "Chunks: "+fmt.Sprintf("%#v", this.Chunks)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *GetChunksRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.GetChunksRequest{")
+	if this.Chunks != nil {
+		s = append(s, "Chunks: "+fmt.Sprintf("%#v", this.Chunks)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *GetChunksResponse) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.GetChunksResponse{")
+	if this.Chunks != nil {
+		s = append(s, "Chunks: "+fmt.Sprintf("%#v", this.Chunks)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *Chunk) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 7)
+	s = append(s, "&grpc.Chunk{")
+	s = append(s, "Encoded: "+fmt.Sprintf("%#v", this.Encoded)+",\n")
+	s = append(s, "Key: "+fmt.Sprintf("%#v", this.Key)+",\n")
+	s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ChunkID) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.ChunkID{")
+	s = append(s, "ChunkID: "+fmt.Sprintf("%#v", this.ChunkID)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *DeleteTableRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.DeleteTableRequest{")
+	s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *DescribeTableRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.DescribeTableRequest{")
+	s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *WriteBatch) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&grpc.WriteBatch{")
+	if this.Writes != nil {
+		s = append(s, "Writes: "+fmt.Sprintf("%#v", this.Writes)+",\n")
+	}
+	if this.Deletes != nil {
+		s = append(s, "Deletes: "+fmt.Sprintf("%#v", this.Deletes)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *WriteIndexRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.WriteIndexRequest{")
+	if this.Writes != nil {
+		s = append(s, "Writes: "+fmt.Sprintf("%#v", this.Writes)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *DeleteIndexRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.DeleteIndexRequest{")
+	if this.Deletes != nil {
+		s = append(s, "Deletes: "+fmt.Sprintf("%#v", this.Deletes)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *QueryIndexResponse) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.QueryIndexResponse{")
+	if this.Rows != nil {
+		s = append(s, "Rows: "+fmt.Sprintf("%#v", this.Rows)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *Row) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&grpc.Row{")
+	s = append(s, "RangeValue: "+fmt.Sprintf("%#v", this.RangeValue)+",\n")
+	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *IndexEntry) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 8)
+	s = append(s, "&grpc.IndexEntry{")
+	s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n")
+	s = append(s, "HashValue: "+fmt.Sprintf("%#v", this.HashValue)+",\n")
+	s = append(s, "RangeValue: "+fmt.Sprintf("%#v", this.RangeValue)+",\n")
+	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *QueryIndexRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 10)
+	s = append(s, "&grpc.QueryIndexRequest{")
+	s = append(s, "TableName: "+fmt.Sprintf("%#v", this.TableName)+",\n")
+	s = append(s, "HashValue: "+fmt.Sprintf("%#v", this.HashValue)+",\n")
+	s = append(s, "RangeValuePrefix: "+fmt.Sprintf("%#v", this.RangeValuePrefix)+",\n")
+	s = append(s, "RangeValueStart: "+fmt.Sprintf("%#v", this.RangeValueStart)+",\n")
+	s = append(s, "ValueEqual: "+fmt.Sprintf("%#v", this.ValueEqual)+",\n")
+	s = append(s, "Immutable: "+fmt.Sprintf("%#v", this.Immutable)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *UpdateTableRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&grpc.UpdateTableRequest{")
+	if this.Current != nil {
+		s = append(s, "Current: "+fmt.Sprintf("%#v", this.Current)+",\n")
+	}
+	if this.Expected != nil {
+		s = append(s, "Expected: "+fmt.Sprintf("%#v", this.Expected)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *DescribeTableResponse) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&grpc.DescribeTableResponse{")
+	if this.Desc != nil {
+		s = append(s, "Desc: "+fmt.Sprintf("%#v", this.Desc)+",\n")
+	}
+	s = append(s, "IsActive: "+fmt.Sprintf("%#v", this.IsActive)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *CreateTableRequest) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.CreateTableRequest{")
+	if this.Desc != nil {
+		s = append(s, "Desc: "+fmt.Sprintf("%#v", this.Desc)+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *TableDesc) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 9)
+	s = append(s, "&grpc.TableDesc{")
+	s = append(s, "Name: "+fmt.Sprintf("%#v", this.Name)+",\n")
+	s = append(s, "UseOnDemandIOMode: "+fmt.Sprintf("%#v", this.UseOnDemandIOMode)+",\n")
+	s = append(s, "ProvisionedRead: "+fmt.Sprintf("%#v", this.ProvisionedRead)+",\n")
+	s = append(s, "ProvisionedWrite: "+fmt.Sprintf("%#v", this.ProvisionedWrite)+",\n")
+	keysForTags := make([]string, 0, len(this.Tags))
+	for k, _ := range this.Tags {
+		keysForTags = append(keysForTags, k)
+	}
+	github_com_gogo_protobuf_sortkeys.Strings(keysForTags)
+	mapStringForTags := "map[string]string{"
+	for _, k := range keysForTags {
+		mapStringForTags += fmt.Sprintf("%#v: %#v,", k, this.Tags[k])
+	}
+	mapStringForTags += "}"
+	if this.Tags != nil {
+		s = append(s, "Tags: "+mapStringForTags+",\n")
+	}
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *ListTablesResponse) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 5)
+	s = append(s, "&grpc.ListTablesResponse{")
+	s = append(s, "TableNames: "+fmt.Sprintf("%#v", this.TableNames)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func (this *Labels) GoString() string {
+	if this == nil {
+		return "nil"
+	}
+	s := make([]string, 0, 6)
+	s = append(s, "&grpc.Labels{")
+	s = append(s, "Name: "+fmt.Sprintf("%#v", this.Name)+",\n")
+	s = append(s, "Value: "+fmt.Sprintf("%#v", this.Value)+",\n")
+	s = append(s, "}")
+	return strings.Join(s, "")
+}
+func valueToGoStringGrpc(v interface{}, typ string) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv)
+}
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ context.Context
+var _ grpc.ClientConn
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+const _ = grpc.SupportPackageIsVersion4
+
+// GrpcStoreClient is the client API for GrpcStore service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://godoc.org/google.golang.org/grpc#ClientConn.NewStream.
+type GrpcStoreClient interface {
+	/// WriteIndex writes batch of indexes to the index tables.
+	WriteIndex(ctx context.Context, in *WriteIndexRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+	/// QueryIndex reads the indexes required for given query & sends back the batch of rows
+	/// in rpc streams
+	QueryIndex(ctx context.Context, in *QueryIndexRequest, opts ...grpc.CallOption) (GrpcStore_QueryIndexClient, error)
+	/// DeleteIndex deletes the batch of index entries from the index tables
+	DeleteIndex(ctx context.Context, in *DeleteIndexRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+	/// PutChunks saves the batch of chunks into the chunk tables.
+	PutChunks(ctx context.Context, in *PutChunksRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+	/// GetChunks requests for batch of chunks and the batch of chunks are sent back in rpc streams
+	/// batching needs to be performed at server level as per requirement instead of sending single chunk per stream.
+	/// In GetChunks rpc request send buf as nil
+	GetChunks(ctx context.Context, in *GetChunksRequest, opts ...grpc.CallOption) (GrpcStore_GetChunksClient, error)
+	/// DeleteChunks deletes the chunks based on chunkID.
+	DeleteChunks(ctx context.Context, in *ChunkID, opts ...grpc.CallOption) (*empty.Empty, error)
+	/// Lists all the tables that exists in the database.
+	ListTables(ctx context.Context, in *empty.Empty, opts ...grpc.CallOption) (*ListTablesResponse, error)
+	/// Creates a table with provided name & attributes.
+	CreateTable(ctx context.Context, in *CreateTableRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+	// Deletes a table using table name provided.
+	DeleteTable(ctx context.Context, in *DeleteTableRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+	// Describes a table information for the provided table.
+	DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error)
+	// Update a table with newly provided table information.
+	UpdateTable(ctx context.Context, in *UpdateTableRequest, opts ...grpc.CallOption) (*empty.Empty, error)
+}
+
+type grpcStoreClient struct {
+	cc *grpc.ClientConn
+}
+
+func NewGrpcStoreClient(cc *grpc.ClientConn) GrpcStoreClient {
+	return &grpcStoreClient{cc}
+}
+
+func (c *grpcStoreClient) WriteIndex(ctx context.Context, in *WriteIndexRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/WriteIndex", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) QueryIndex(ctx context.Context, in *QueryIndexRequest, opts ...grpc.CallOption) (GrpcStore_QueryIndexClient, error) {
+	stream, err := c.cc.NewStream(ctx, &_GrpcStore_serviceDesc.Streams[0], "/grpc.grpc_store/QueryIndex", opts...)
+	if err != nil {
+		return nil, err
+	}
+	x := &grpcStoreQueryIndexClient{stream}
+	if err := x.ClientStream.SendMsg(in); err != nil {
+		return nil, err
+	}
+	if err := x.ClientStream.CloseSend(); err != nil {
+		return nil, err
+	}
+	return x, nil
+}
+
+type GrpcStore_QueryIndexClient interface {
+	Recv() (*QueryIndexResponse, error)
+	grpc.ClientStream
+}
+
+type grpcStoreQueryIndexClient struct {
+	grpc.ClientStream
+}
+
+func (x *grpcStoreQueryIndexClient) Recv() (*QueryIndexResponse, error) {
+	m := new(QueryIndexResponse)
+	if err := x.ClientStream.RecvMsg(m); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+func (c *grpcStoreClient) DeleteIndex(ctx context.Context, in *DeleteIndexRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/DeleteIndex", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) PutChunks(ctx context.Context, in *PutChunksRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/PutChunks", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) GetChunks(ctx context.Context, in *GetChunksRequest, opts ...grpc.CallOption) (GrpcStore_GetChunksClient, error) {
+	stream, err := c.cc.NewStream(ctx, &_GrpcStore_serviceDesc.Streams[1], "/grpc.grpc_store/GetChunks", opts...)
+	if err != nil {
+		return nil, err
+	}
+	x := &grpcStoreGetChunksClient{stream}
+	if err := x.ClientStream.SendMsg(in); err != nil {
+		return nil, err
+	}
+	if err := x.ClientStream.CloseSend(); err != nil {
+		return nil, err
+	}
+	return x, nil
+}
+
+type GrpcStore_GetChunksClient interface {
+	Recv() (*GetChunksResponse, error)
+	grpc.ClientStream
+}
+
+type grpcStoreGetChunksClient struct {
+	grpc.ClientStream
+}
+
+func (x *grpcStoreGetChunksClient) Recv() (*GetChunksResponse, error) {
+	m := new(GetChunksResponse)
+	if err := x.ClientStream.RecvMsg(m); err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+func (c *grpcStoreClient) DeleteChunks(ctx context.Context, in *ChunkID, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/DeleteChunks", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) ListTables(ctx context.Context, in *empty.Empty, opts ...grpc.CallOption) (*ListTablesResponse, error) {
+	out := new(ListTablesResponse)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/ListTables", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) CreateTable(ctx context.Context, in *CreateTableRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/CreateTable", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) DeleteTable(ctx context.Context, in *DeleteTableRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/DeleteTable", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) DescribeTable(ctx context.Context, in *DescribeTableRequest, opts ...grpc.CallOption) (*DescribeTableResponse, error) {
+	out := new(DescribeTableResponse)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/DescribeTable", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+func (c *grpcStoreClient) UpdateTable(ctx context.Context, in *UpdateTableRequest, opts ...grpc.CallOption) (*empty.Empty, error) {
+	out := new(empty.Empty)
+	err := c.cc.Invoke(ctx, "/grpc.grpc_store/UpdateTable", in, out, opts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+// GrpcStoreServer is the server API for GrpcStore service.
+type GrpcStoreServer interface {
+	/// WriteIndex writes batch of indexes to the index tables.
+	WriteIndex(context.Context, *WriteIndexRequest) (*empty.Empty, error)
+	/// QueryIndex reads the indexes required for given query & sends back the batch of rows
+	/// in rpc streams
+	QueryIndex(*QueryIndexRequest, GrpcStore_QueryIndexServer) error
+	/// DeleteIndex deletes the batch of index entries from the index tables
+	DeleteIndex(context.Context, *DeleteIndexRequest) (*empty.Empty, error)
+	/// PutChunks saves the batch of chunks into the chunk tables.
+	PutChunks(context.Context, *PutChunksRequest) (*empty.Empty, error)
+	/// GetChunks requests for batch of chunks and the batch of chunks are sent back in rpc streams
+	/// batching needs to be performed at server level as per requirement instead of sending single chunk per stream.
+	/// In GetChunks rpc request send buf as nil
+	GetChunks(*GetChunksRequest, GrpcStore_GetChunksServer) error
+	/// DeleteChunks deletes the chunks based on chunkID.
+	DeleteChunks(context.Context, *ChunkID) (*empty.Empty, error)
+	/// Lists all the tables that exists in the database.
+	ListTables(context.Context, *empty.Empty) (*ListTablesResponse, error)
+	/// Creates a table with provided name & attributes.
+	CreateTable(context.Context, *CreateTableRequest) (*empty.Empty, error)
+	// Deletes a table using table name provided.
+	DeleteTable(context.Context, *DeleteTableRequest) (*empty.Empty, error)
+	// Describes a table information for the provided table.
+	DescribeTable(context.Context, *DescribeTableRequest) (*DescribeTableResponse, error)
+	// Update a table with newly provided table information.
+	UpdateTable(context.Context, *UpdateTableRequest) (*empty.Empty, error)
+}
+
+// UnimplementedGrpcStoreServer can be embedded to have forward compatible implementations.
+type UnimplementedGrpcStoreServer struct {
+}
+
+func (*UnimplementedGrpcStoreServer) WriteIndex(ctx context.Context, req *WriteIndexRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method WriteIndex not implemented")
+}
+func (*UnimplementedGrpcStoreServer) QueryIndex(req *QueryIndexRequest, srv GrpcStore_QueryIndexServer) error {
+	return status.Errorf(codes.Unimplemented, "method QueryIndex not implemented")
+}
+func (*UnimplementedGrpcStoreServer) DeleteIndex(ctx context.Context, req *DeleteIndexRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method DeleteIndex not implemented")
+}
+func (*UnimplementedGrpcStoreServer) PutChunks(ctx context.Context, req *PutChunksRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method PutChunks not implemented")
+}
+func (*UnimplementedGrpcStoreServer) GetChunks(req *GetChunksRequest, srv GrpcStore_GetChunksServer) error {
+	return status.Errorf(codes.Unimplemented, "method GetChunks not implemented")
+}
+func (*UnimplementedGrpcStoreServer) DeleteChunks(ctx context.Context, req *ChunkID) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method DeleteChunks not implemented")
+}
+func (*UnimplementedGrpcStoreServer) ListTables(ctx context.Context, req *empty.Empty) (*ListTablesResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method ListTables not implemented")
+}
+func (*UnimplementedGrpcStoreServer) CreateTable(ctx context.Context, req *CreateTableRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method CreateTable not implemented")
+}
+func (*UnimplementedGrpcStoreServer) DeleteTable(ctx context.Context, req *DeleteTableRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method DeleteTable not implemented")
+}
+func (*UnimplementedGrpcStoreServer) DescribeTable(ctx context.Context, req *DescribeTableRequest) (*DescribeTableResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method DescribeTable not implemented")
+}
+func (*UnimplementedGrpcStoreServer) UpdateTable(ctx context.Context, req *UpdateTableRequest) (*empty.Empty, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method UpdateTable not implemented")
+}
+
+func RegisterGrpcStoreServer(s *grpc.Server, srv GrpcStoreServer) {
+	s.RegisterService(&_GrpcStore_serviceDesc, srv)
+}
+
+func _GrpcStore_WriteIndex_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(WriteIndexRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).WriteIndex(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/WriteIndex",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).WriteIndex(ctx, req.(*WriteIndexRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_QueryIndex_Handler(srv interface{}, stream grpc.ServerStream) error {
+	m := new(QueryIndexRequest)
+	if err := stream.RecvMsg(m); err != nil {
+		return err
+	}
+	return srv.(GrpcStoreServer).QueryIndex(m, &grpcStoreQueryIndexServer{stream})
+}
+
+type GrpcStore_QueryIndexServer interface {
+	Send(*QueryIndexResponse) error
+	grpc.ServerStream
+}
+
+type grpcStoreQueryIndexServer struct {
+	grpc.ServerStream
+}
+
+func (x *grpcStoreQueryIndexServer) Send(m *QueryIndexResponse) error {
+	return x.ServerStream.SendMsg(m)
+}
+
+func _GrpcStore_DeleteIndex_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(DeleteIndexRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).DeleteIndex(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/DeleteIndex",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).DeleteIndex(ctx, req.(*DeleteIndexRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_PutChunks_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(PutChunksRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).PutChunks(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/PutChunks",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).PutChunks(ctx, req.(*PutChunksRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_GetChunks_Handler(srv interface{}, stream grpc.ServerStream) error {
+	m := new(GetChunksRequest)
+	if err := stream.RecvMsg(m); err != nil {
+		return err
+	}
+	return srv.(GrpcStoreServer).GetChunks(m, &grpcStoreGetChunksServer{stream})
+}
+
+type GrpcStore_GetChunksServer interface {
+	Send(*GetChunksResponse) error
+	grpc.ServerStream
+}
+
+type grpcStoreGetChunksServer struct {
+	grpc.ServerStream
+}
+
+func (x *grpcStoreGetChunksServer) Send(m *GetChunksResponse) error {
+	return x.ServerStream.SendMsg(m)
+}
+
+func _GrpcStore_DeleteChunks_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(ChunkID)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).DeleteChunks(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/DeleteChunks",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).DeleteChunks(ctx, req.(*ChunkID))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_ListTables_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(empty.Empty)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).ListTables(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/ListTables",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).ListTables(ctx, req.(*empty.Empty))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_CreateTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(CreateTableRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).CreateTable(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/CreateTable",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).CreateTable(ctx, req.(*CreateTableRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_DeleteTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(DeleteTableRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).DeleteTable(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/DeleteTable",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).DeleteTable(ctx, req.(*DeleteTableRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_DescribeTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(DescribeTableRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).DescribeTable(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/DescribeTable",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).DescribeTable(ctx, req.(*DescribeTableRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+func _GrpcStore_UpdateTable_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(UpdateTableRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(GrpcStoreServer).UpdateTable(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: "/grpc.grpc_store/UpdateTable",
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(GrpcStoreServer).UpdateTable(ctx, req.(*UpdateTableRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+var _GrpcStore_serviceDesc = grpc.ServiceDesc{
+	ServiceName: "grpc.grpc_store",
+	HandlerType: (*GrpcStoreServer)(nil),
+	Methods: []grpc.MethodDesc{
+		{
+			MethodName: "WriteIndex",
+			Handler:    _GrpcStore_WriteIndex_Handler,
+		},
+		{
+			MethodName: "DeleteIndex",
+			Handler:    _GrpcStore_DeleteIndex_Handler,
+		},
+		{
+			MethodName: "PutChunks",
+			Handler:    _GrpcStore_PutChunks_Handler,
+		},
+		{
+			MethodName: "DeleteChunks",
+			Handler:    _GrpcStore_DeleteChunks_Handler,
+		},
+		{
+			MethodName: "ListTables",
+			Handler:    _GrpcStore_ListTables_Handler,
+		},
+		{
+			MethodName: "CreateTable",
+			Handler:    _GrpcStore_CreateTable_Handler,
+		},
+		{
+			MethodName: "DeleteTable",
+			Handler:    _GrpcStore_DeleteTable_Handler,
+		},
+		{
+			MethodName: "DescribeTable",
+			Handler:    _GrpcStore_DescribeTable_Handler,
+		},
+		{
+			MethodName: "UpdateTable",
+			Handler:    _GrpcStore_UpdateTable_Handler,
+		},
+	},
+	Streams: []grpc.StreamDesc{
+		{
+			StreamName:    "QueryIndex",
+			Handler:       _GrpcStore_QueryIndex_Handler,
+			ServerStreams: true,
+		},
+		{
+			StreamName:    "GetChunks",
+			Handler:       _GrpcStore_GetChunks_Handler,
+			ServerStreams: true,
+		},
+	},
+	Metadata: "grpc.proto",
+}
+
+func (m *PutChunksRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *PutChunksRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *PutChunksRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *GetChunksRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *GetChunksRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *GetChunksRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *GetChunksResponse) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *GetChunksResponse) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *GetChunksResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Chunk) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Chunk) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.TableName) > 0 {
+		i -= len(m.TableName)
+		copy(dAtA[i:], m.TableName)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableName)))
+		i--
+		dAtA[i] = 0x1a
+	}
+	if len(m.Key) > 0 {
+		i -= len(m.Key)
+		copy(dAtA[i:], m.Key)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Key)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Encoded) > 0 {
+		i -= len(m.Encoded)
+		copy(dAtA[i:], m.Encoded)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Encoded)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ChunkID) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ChunkID) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ChunkID) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.ChunkID) > 0 {
+		i -= len(m.ChunkID)
+		copy(dAtA[i:], m.ChunkID)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.ChunkID)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DeleteTableRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DeleteTableRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DeleteTableRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.TableName) > 0 {
+		i -= len(m.TableName)
+		copy(dAtA[i:], m.TableName)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableName)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DescribeTableRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DescribeTableRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DescribeTableRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.TableName) > 0 {
+		i -= len(m.TableName)
+		copy(dAtA[i:], m.TableName)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableName)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *WriteBatch) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *WriteBatch) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *WriteBatch) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Deletes) > 0 {
+		for iNdEx := len(m.Deletes) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Deletes[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0x12
+		}
+	}
+	if len(m.Writes) > 0 {
+		for iNdEx := len(m.Writes) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Writes[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *WriteIndexRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *WriteIndexRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *WriteIndexRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Writes) > 0 {
+		for iNdEx := len(m.Writes) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Writes[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DeleteIndexRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DeleteIndexRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DeleteIndexRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Deletes) > 0 {
+		for iNdEx := len(m.Deletes) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Deletes[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *QueryIndexResponse) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *QueryIndexResponse) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *QueryIndexResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Rows) > 0 {
+		for iNdEx := len(m.Rows) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Rows[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintGrpc(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Row) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Row) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Row) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Value) > 0 {
+		i -= len(m.Value)
+		copy(dAtA[i:], m.Value)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Value)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.RangeValue) > 0 {
+		i -= len(m.RangeValue)
+		copy(dAtA[i:], m.RangeValue)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.RangeValue)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *IndexEntry) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *IndexEntry) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *IndexEntry) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Value) > 0 {
+		i -= len(m.Value)
+		copy(dAtA[i:], m.Value)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Value)))
+		i--
+		dAtA[i] = 0x22
+	}
+	if len(m.RangeValue) > 0 {
+		i -= len(m.RangeValue)
+		copy(dAtA[i:], m.RangeValue)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.RangeValue)))
+		i--
+		dAtA[i] = 0x1a
+	}
+	if len(m.HashValue) > 0 {
+		i -= len(m.HashValue)
+		copy(dAtA[i:], m.HashValue)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.HashValue)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.TableName) > 0 {
+		i -= len(m.TableName)
+		copy(dAtA[i:], m.TableName)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableName)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *QueryIndexRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *QueryIndexRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *QueryIndexRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.Immutable {
+		i--
+		if m.Immutable {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x30
+	}
+	if len(m.ValueEqual) > 0 {
+		i -= len(m.ValueEqual)
+		copy(dAtA[i:], m.ValueEqual)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.ValueEqual)))
+		i--
+		dAtA[i] = 0x2a
+	}
+	if len(m.RangeValueStart) > 0 {
+		i -= len(m.RangeValueStart)
+		copy(dAtA[i:], m.RangeValueStart)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.RangeValueStart)))
+		i--
+		dAtA[i] = 0x22
+	}
+	if len(m.RangeValuePrefix) > 0 {
+		i -= len(m.RangeValuePrefix)
+		copy(dAtA[i:], m.RangeValuePrefix)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.RangeValuePrefix)))
+		i--
+		dAtA[i] = 0x1a
+	}
+	if len(m.HashValue) > 0 {
+		i -= len(m.HashValue)
+		copy(dAtA[i:], m.HashValue)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.HashValue)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.TableName) > 0 {
+		i -= len(m.TableName)
+		copy(dAtA[i:], m.TableName)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableName)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *UpdateTableRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *UpdateTableRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *UpdateTableRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.Expected != nil {
+		{
+			size, err := m.Expected.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintGrpc(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0x12
+	}
+	if m.Current != nil {
+		{
+			size, err := m.Current.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintGrpc(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *DescribeTableResponse) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *DescribeTableResponse) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DescribeTableResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.IsActive {
+		i--
+		if m.IsActive {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.Desc != nil {
+		{
+			size, err := m.Desc.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintGrpc(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *CreateTableRequest) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *CreateTableRequest) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *CreateTableRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if m.Desc != nil {
+		{
+			size, err := m.Desc.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintGrpc(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *TableDesc) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *TableDesc) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TableDesc) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Tags) > 0 {
+		for k := range m.Tags {
+			v := m.Tags[k]
+			baseI := i
+			i -= len(v)
+			copy(dAtA[i:], v)
+			i = encodeVarintGrpc(dAtA, i, uint64(len(v)))
+			i--
+			dAtA[i] = 0x12
+			i -= len(k)
+			copy(dAtA[i:], k)
+			i = encodeVarintGrpc(dAtA, i, uint64(len(k)))
+			i--
+			dAtA[i] = 0xa
+			i = encodeVarintGrpc(dAtA, i, uint64(baseI-i))
+			i--
+			dAtA[i] = 0x2a
+		}
+	}
+	if m.ProvisionedWrite != 0 {
+		i = encodeVarintGrpc(dAtA, i, uint64(m.ProvisionedWrite))
+		i--
+		dAtA[i] = 0x20
+	}
+	if m.ProvisionedRead != 0 {
+		i = encodeVarintGrpc(dAtA, i, uint64(m.ProvisionedRead))
+		i--
+		dAtA[i] = 0x18
+	}
+	if m.UseOnDemandIOMode {
+		i--
+		if m.UseOnDemandIOMode {
+			dAtA[i] = 1
+		} else {
+			dAtA[i] = 0
+		}
+		i--
+		dAtA[i] = 0x10
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *ListTablesResponse) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *ListTablesResponse) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *ListTablesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.TableNames) > 0 {
+		for iNdEx := len(m.TableNames) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.TableNames[iNdEx])
+			copy(dAtA[i:], m.TableNames[iNdEx])
+			i = encodeVarintGrpc(dAtA, i, uint64(len(m.TableNames[iNdEx])))
+			i--
+			dAtA[i] = 0xa
+		}
+	}
+	return len(dAtA) - i, nil
+}
+
+func (m *Labels) Marshal() (dAtA []byte, err error) {
+	size := m.Size()
+	dAtA = make([]byte, size)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
+	if err != nil {
+		return nil, err
+	}
+	return dAtA[:n], nil
+}
+
+func (m *Labels) MarshalTo(dAtA []byte) (int, error) {
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Labels) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
+	_ = i
+	var l int
+	_ = l
+	if len(m.Value) > 0 {
+		i -= len(m.Value)
+		copy(dAtA[i:], m.Value)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Value)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = encodeVarintGrpc(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
+}
+
+func encodeVarintGrpc(dAtA []byte, offset int, v uint64) int {
+	offset -= sovGrpc(v)
+	base := offset
+	for v >= 1<<7 {
+		dAtA[offset] = uint8(v&0x7f | 0x80)
+		v >>= 7
+		offset++
+	}
+	dAtA[offset] = uint8(v)
+	return base
+}
+func (m *PutChunksRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for _, e := range m.Chunks {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *GetChunksRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for _, e := range m.Chunks {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *GetChunksResponse) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Chunks) > 0 {
+		for _, e := range m.Chunks {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *Chunk) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Encoded)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.Key)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *ChunkID) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.ChunkID)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *DeleteTableRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *DescribeTableRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *WriteBatch) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Writes) > 0 {
+		for _, e := range m.Writes {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	if len(m.Deletes) > 0 {
+		for _, e := range m.Deletes {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *WriteIndexRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Writes) > 0 {
+		for _, e := range m.Writes {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *DeleteIndexRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Deletes) > 0 {
+		for _, e := range m.Deletes {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *QueryIndexResponse) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.Rows) > 0 {
+		for _, e := range m.Rows {
+			l = e.Size()
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *Row) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.RangeValue)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.Value)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *IndexEntry) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.HashValue)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.RangeValue)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.Value)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *QueryIndexRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.TableName)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.HashValue)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.RangeValuePrefix)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.RangeValueStart)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.ValueEqual)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	if m.Immutable {
+		n += 2
+	}
+	return n
+}
+
+func (m *UpdateTableRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Current != nil {
+		l = m.Current.Size()
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	if m.Expected != nil {
+		l = m.Expected.Size()
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *DescribeTableResponse) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Desc != nil {
+		l = m.Desc.Size()
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	if m.IsActive {
+		n += 2
+	}
+	return n
+}
+
+func (m *CreateTableRequest) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if m.Desc != nil {
+		l = m.Desc.Size()
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func (m *TableDesc) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	if m.UseOnDemandIOMode {
+		n += 2
+	}
+	if m.ProvisionedRead != 0 {
+		n += 1 + sovGrpc(uint64(m.ProvisionedRead))
+	}
+	if m.ProvisionedWrite != 0 {
+		n += 1 + sovGrpc(uint64(m.ProvisionedWrite))
+	}
+	if len(m.Tags) > 0 {
+		for k, v := range m.Tags {
+			_ = k
+			_ = v
+			mapEntrySize := 1 + len(k) + sovGrpc(uint64(len(k))) + 1 + len(v) + sovGrpc(uint64(len(v)))
+			n += mapEntrySize + 1 + sovGrpc(uint64(mapEntrySize))
+		}
+	}
+	return n
+}
+
+func (m *ListTablesResponse) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	if len(m.TableNames) > 0 {
+		for _, s := range m.TableNames {
+			l = len(s)
+			n += 1 + l + sovGrpc(uint64(l))
+		}
+	}
+	return n
+}
+
+func (m *Labels) Size() (n int) {
+	if m == nil {
+		return 0
+	}
+	var l int
+	_ = l
+	l = len(m.Name)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	l = len(m.Value)
+	if l > 0 {
+		n += 1 + l + sovGrpc(uint64(l))
+	}
+	return n
+}
+
+func sovGrpc(x uint64) (n int) {
+	return (math_bits.Len64(x|1) + 6) / 7
+}
+func sozGrpc(x uint64) (n int) {
+	return sovGrpc(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (this *PutChunksRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForChunks := "[]*Chunk{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(f.String(), "Chunk", "Chunk", 1) + ","
+	}
+	repeatedStringForChunks += "}"
+	s := strings.Join([]string{`&PutChunksRequest{`,
+		`Chunks:` + repeatedStringForChunks + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *GetChunksRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForChunks := "[]*Chunk{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(f.String(), "Chunk", "Chunk", 1) + ","
+	}
+	repeatedStringForChunks += "}"
+	s := strings.Join([]string{`&GetChunksRequest{`,
+		`Chunks:` + repeatedStringForChunks + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *GetChunksResponse) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForChunks := "[]*Chunk{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(f.String(), "Chunk", "Chunk", 1) + ","
+	}
+	repeatedStringForChunks += "}"
+	s := strings.Join([]string{`&GetChunksResponse{`,
+		`Chunks:` + repeatedStringForChunks + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *Chunk) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Chunk{`,
+		`Encoded:` + fmt.Sprintf("%v", this.Encoded) + `,`,
+		`Key:` + fmt.Sprintf("%v", this.Key) + `,`,
+		`TableName:` + fmt.Sprintf("%v", this.TableName) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ChunkID) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&ChunkID{`,
+		`ChunkID:` + fmt.Sprintf("%v", this.ChunkID) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *DeleteTableRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&DeleteTableRequest{`,
+		`TableName:` + fmt.Sprintf("%v", this.TableName) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *DescribeTableRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&DescribeTableRequest{`,
+		`TableName:` + fmt.Sprintf("%v", this.TableName) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *WriteBatch) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForWrites := "[]*IndexEntry{"
+	for _, f := range this.Writes {
+		repeatedStringForWrites += strings.Replace(f.String(), "IndexEntry", "IndexEntry", 1) + ","
+	}
+	repeatedStringForWrites += "}"
+	repeatedStringForDeletes := "[]*IndexEntry{"
+	for _, f := range this.Deletes {
+		repeatedStringForDeletes += strings.Replace(f.String(), "IndexEntry", "IndexEntry", 1) + ","
+	}
+	repeatedStringForDeletes += "}"
+	s := strings.Join([]string{`&WriteBatch{`,
+		`Writes:` + repeatedStringForWrites + `,`,
+		`Deletes:` + repeatedStringForDeletes + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *WriteIndexRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForWrites := "[]*IndexEntry{"
+	for _, f := range this.Writes {
+		repeatedStringForWrites += strings.Replace(f.String(), "IndexEntry", "IndexEntry", 1) + ","
+	}
+	repeatedStringForWrites += "}"
+	s := strings.Join([]string{`&WriteIndexRequest{`,
+		`Writes:` + repeatedStringForWrites + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *DeleteIndexRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForDeletes := "[]*IndexEntry{"
+	for _, f := range this.Deletes {
+		repeatedStringForDeletes += strings.Replace(f.String(), "IndexEntry", "IndexEntry", 1) + ","
+	}
+	repeatedStringForDeletes += "}"
+	s := strings.Join([]string{`&DeleteIndexRequest{`,
+		`Deletes:` + repeatedStringForDeletes + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *QueryIndexResponse) String() string {
+	if this == nil {
+		return "nil"
+	}
+	repeatedStringForRows := "[]*Row{"
+	for _, f := range this.Rows {
+		repeatedStringForRows += strings.Replace(f.String(), "Row", "Row", 1) + ","
+	}
+	repeatedStringForRows += "}"
+	s := strings.Join([]string{`&QueryIndexResponse{`,
+		`Rows:` + repeatedStringForRows + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *Row) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Row{`,
+		`RangeValue:` + fmt.Sprintf("%v", this.RangeValue) + `,`,
+		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *IndexEntry) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&IndexEntry{`,
+		`TableName:` + fmt.Sprintf("%v", this.TableName) + `,`,
+		`HashValue:` + fmt.Sprintf("%v", this.HashValue) + `,`,
+		`RangeValue:` + fmt.Sprintf("%v", this.RangeValue) + `,`,
+		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *QueryIndexRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&QueryIndexRequest{`,
+		`TableName:` + fmt.Sprintf("%v", this.TableName) + `,`,
+		`HashValue:` + fmt.Sprintf("%v", this.HashValue) + `,`,
+		`RangeValuePrefix:` + fmt.Sprintf("%v", this.RangeValuePrefix) + `,`,
+		`RangeValueStart:` + fmt.Sprintf("%v", this.RangeValueStart) + `,`,
+		`ValueEqual:` + fmt.Sprintf("%v", this.ValueEqual) + `,`,
+		`Immutable:` + fmt.Sprintf("%v", this.Immutable) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *UpdateTableRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&UpdateTableRequest{`,
+		`Current:` + strings.Replace(this.Current.String(), "TableDesc", "TableDesc", 1) + `,`,
+		`Expected:` + strings.Replace(this.Expected.String(), "TableDesc", "TableDesc", 1) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *DescribeTableResponse) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&DescribeTableResponse{`,
+		`Desc:` + strings.Replace(this.Desc.String(), "TableDesc", "TableDesc", 1) + `,`,
+		`IsActive:` + fmt.Sprintf("%v", this.IsActive) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *CreateTableRequest) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&CreateTableRequest{`,
+		`Desc:` + strings.Replace(this.Desc.String(), "TableDesc", "TableDesc", 1) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *TableDesc) String() string {
+	if this == nil {
+		return "nil"
+	}
+	keysForTags := make([]string, 0, len(this.Tags))
+	for k, _ := range this.Tags {
+		keysForTags = append(keysForTags, k)
+	}
+	github_com_gogo_protobuf_sortkeys.Strings(keysForTags)
+	mapStringForTags := "map[string]string{"
+	for _, k := range keysForTags {
+		mapStringForTags += fmt.Sprintf("%v: %v,", k, this.Tags[k])
+	}
+	mapStringForTags += "}"
+	s := strings.Join([]string{`&TableDesc{`,
+		`Name:` + fmt.Sprintf("%v", this.Name) + `,`,
+		`UseOnDemandIOMode:` + fmt.Sprintf("%v", this.UseOnDemandIOMode) + `,`,
+		`ProvisionedRead:` + fmt.Sprintf("%v", this.ProvisionedRead) + `,`,
+		`ProvisionedWrite:` + fmt.Sprintf("%v", this.ProvisionedWrite) + `,`,
+		`Tags:` + mapStringForTags + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *ListTablesResponse) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&ListTablesResponse{`,
+		`TableNames:` + fmt.Sprintf("%v", this.TableNames) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func (this *Labels) String() string {
+	if this == nil {
+		return "nil"
+	}
+	s := strings.Join([]string{`&Labels{`,
+		`Name:` + fmt.Sprintf("%v", this.Name) + `,`,
+		`Value:` + fmt.Sprintf("%v", this.Value) + `,`,
+		`}`,
+	}, "")
+	return s
+}
+func valueToStringGrpc(v interface{}) string {
+	rv := reflect.ValueOf(v)
+	if rv.IsNil() {
+		return "nil"
+	}
+	pv := reflect.Indirect(rv).Interface()
+	return fmt.Sprintf("*%v", pv)
+}
+func (m *PutChunksRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: PutChunksRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: PutChunksRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Chunks", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Chunks = append(m.Chunks, &Chunk{})
+			if err := m.Chunks[len(m.Chunks)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *GetChunksRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: GetChunksRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: GetChunksRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Chunks", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Chunks = append(m.Chunks, &Chunk{})
+			if err := m.Chunks[len(m.Chunks)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *GetChunksResponse) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: GetChunksResponse: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: GetChunksResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Chunks", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Chunks = append(m.Chunks, &Chunk{})
+			if err := m.Chunks[len(m.Chunks)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Chunk) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Chunk: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Chunk: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Encoded", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Encoded = append(m.Encoded[:0], dAtA[iNdEx:postIndex]...)
+			if m.Encoded == nil {
+				m.Encoded = []byte{}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Key", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Key = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableName = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ChunkID) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ChunkID: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ChunkID: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ChunkID", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.ChunkID = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DeleteTableRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DeleteTableRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DeleteTableRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableName = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DescribeTableRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DescribeTableRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DescribeTableRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableName = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *WriteBatch) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: WriteBatch: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: WriteBatch: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Writes", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Writes = append(m.Writes, &IndexEntry{})
+			if err := m.Writes[len(m.Writes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Deletes", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Deletes = append(m.Deletes, &IndexEntry{})
+			if err := m.Deletes[len(m.Deletes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *WriteIndexRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: WriteIndexRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: WriteIndexRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Writes", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Writes = append(m.Writes, &IndexEntry{})
+			if err := m.Writes[len(m.Writes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DeleteIndexRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DeleteIndexRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DeleteIndexRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Deletes", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Deletes = append(m.Deletes, &IndexEntry{})
+			if err := m.Deletes[len(m.Deletes)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *QueryIndexResponse) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: QueryIndexResponse: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: QueryIndexResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Rows", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Rows = append(m.Rows, &Row{})
+			if err := m.Rows[len(m.Rows)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Row) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Row: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Row: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RangeValue", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.RangeValue = append(m.RangeValue[:0], dAtA[iNdEx:postIndex]...)
+			if m.RangeValue == nil {
+				m.RangeValue = []byte{}
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
+			if m.Value == nil {
+				m.Value = []byte{}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *IndexEntry) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: IndexEntry: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: IndexEntry: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableName = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field HashValue", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.HashValue = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RangeValue", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.RangeValue = append(m.RangeValue[:0], dAtA[iNdEx:postIndex]...)
+			if m.RangeValue == nil {
+				m.RangeValue = []byte{}
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Value = append(m.Value[:0], dAtA[iNdEx:postIndex]...)
+			if m.Value == nil {
+				m.Value = []byte{}
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *QueryIndexRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: QueryIndexRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: QueryIndexRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableName", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableName = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field HashValue", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.HashValue = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 3:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RangeValuePrefix", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.RangeValuePrefix = append(m.RangeValuePrefix[:0], dAtA[iNdEx:postIndex]...)
+			if m.RangeValuePrefix == nil {
+				m.RangeValuePrefix = []byte{}
+			}
+			iNdEx = postIndex
+		case 4:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field RangeValueStart", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.RangeValueStart = append(m.RangeValueStart[:0], dAtA[iNdEx:postIndex]...)
+			if m.RangeValueStart == nil {
+				m.RangeValueStart = []byte{}
+			}
+			iNdEx = postIndex
+		case 5:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ValueEqual", wireType)
+			}
+			var byteLen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				byteLen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if byteLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + byteLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.ValueEqual = append(m.ValueEqual[:0], dAtA[iNdEx:postIndex]...)
+			if m.ValueEqual == nil {
+				m.ValueEqual = []byte{}
+			}
+			iNdEx = postIndex
+		case 6:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Immutable", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.Immutable = bool(v != 0)
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *UpdateTableRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: UpdateTableRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: UpdateTableRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Current", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Current == nil {
+				m.Current = &TableDesc{}
+			}
+			if err := m.Current.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Expected", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Expected == nil {
+				m.Expected = &TableDesc{}
+			}
+			if err := m.Expected.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *DescribeTableResponse) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: DescribeTableResponse: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: DescribeTableResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Desc", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Desc == nil {
+				m.Desc = &TableDesc{}
+			}
+			if err := m.Desc.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field IsActive", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.IsActive = bool(v != 0)
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *CreateTableRequest) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: CreateTableRequest: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: CreateTableRequest: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Desc", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Desc == nil {
+				m.Desc = &TableDesc{}
+			}
+			if err := m.Desc.Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
+				return err
+			}
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *TableDesc) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: TableDesc: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: TableDesc: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field UseOnDemandIOMode", wireType)
+			}
+			var v int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				v |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			m.UseOnDemandIOMode = bool(v != 0)
+		case 3:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ProvisionedRead", wireType)
+			}
+			m.ProvisionedRead = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.ProvisionedRead |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 4:
+			if wireType != 0 {
+				return fmt.Errorf("proto: wrong wireType = %d for field ProvisionedWrite", wireType)
+			}
+			m.ProvisionedWrite = 0
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				m.ProvisionedWrite |= int64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+		case 5:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Tags", wireType)
+			}
+			var msglen int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				msglen |= int(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if msglen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + msglen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			if m.Tags == nil {
+				m.Tags = make(map[string]string)
+			}
+			var mapkey string
+			var mapvalue string
+			for iNdEx < postIndex {
+				entryPreIndex := iNdEx
+				var wire uint64
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return ErrIntOverflowGrpc
+					}
+					if iNdEx >= l {
+						return io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					wire |= uint64(b&0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				fieldNum := int32(wire >> 3)
+				if fieldNum == 1 {
+					var stringLenmapkey uint64
+					for shift := uint(0); ; shift += 7 {
+						if shift >= 64 {
+							return ErrIntOverflowGrpc
+						}
+						if iNdEx >= l {
+							return io.ErrUnexpectedEOF
+						}
+						b := dAtA[iNdEx]
+						iNdEx++
+						stringLenmapkey |= uint64(b&0x7F) << shift
+						if b < 0x80 {
+							break
+						}
+					}
+					intStringLenmapkey := int(stringLenmapkey)
+					if intStringLenmapkey < 0 {
+						return ErrInvalidLengthGrpc
+					}
+					postStringIndexmapkey := iNdEx + intStringLenmapkey
+					if postStringIndexmapkey < 0 {
+						return ErrInvalidLengthGrpc
+					}
+					if postStringIndexmapkey > l {
+						return io.ErrUnexpectedEOF
+					}
+					mapkey = string(dAtA[iNdEx:postStringIndexmapkey])
+					iNdEx = postStringIndexmapkey
+				} else if fieldNum == 2 {
+					var stringLenmapvalue uint64
+					for shift := uint(0); ; shift += 7 {
+						if shift >= 64 {
+							return ErrIntOverflowGrpc
+						}
+						if iNdEx >= l {
+							return io.ErrUnexpectedEOF
+						}
+						b := dAtA[iNdEx]
+						iNdEx++
+						stringLenmapvalue |= uint64(b&0x7F) << shift
+						if b < 0x80 {
+							break
+						}
+					}
+					intStringLenmapvalue := int(stringLenmapvalue)
+					if intStringLenmapvalue < 0 {
+						return ErrInvalidLengthGrpc
+					}
+					postStringIndexmapvalue := iNdEx + intStringLenmapvalue
+					if postStringIndexmapvalue < 0 {
+						return ErrInvalidLengthGrpc
+					}
+					if postStringIndexmapvalue > l {
+						return io.ErrUnexpectedEOF
+					}
+					mapvalue = string(dAtA[iNdEx:postStringIndexmapvalue])
+					iNdEx = postStringIndexmapvalue
+				} else {
+					iNdEx = entryPreIndex
+					skippy, err := skipGrpc(dAtA[iNdEx:])
+					if err != nil {
+						return err
+					}
+					if skippy < 0 {
+						return ErrInvalidLengthGrpc
+					}
+					if (iNdEx + skippy) > postIndex {
+						return io.ErrUnexpectedEOF
+					}
+					iNdEx += skippy
+				}
+			}
+			m.Tags[mapkey] = mapvalue
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *ListTablesResponse) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: ListTablesResponse: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: ListTablesResponse: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field TableNames", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.TableNames = append(m.TableNames, string(dAtA[iNdEx:postIndex]))
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func (m *Labels) Unmarshal(dAtA []byte) error {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		preIndex := iNdEx
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= uint64(b&0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		fieldNum := int32(wire >> 3)
+		wireType := int(wire & 0x7)
+		if wireType == 4 {
+			return fmt.Errorf("proto: Labels: wiretype end group for non-group")
+		}
+		if fieldNum <= 0 {
+			return fmt.Errorf("proto: Labels: illegal tag %d (wire type %d)", fieldNum, wire)
+		}
+		switch fieldNum {
+		case 1:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Name", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Name = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		case 2:
+			if wireType != 2 {
+				return fmt.Errorf("proto: wrong wireType = %d for field Value", wireType)
+			}
+			var stringLen uint64
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				stringLen |= uint64(b&0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			intStringLen := int(stringLen)
+			if intStringLen < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			postIndex := iNdEx + intStringLen
+			if postIndex < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if postIndex > l {
+				return io.ErrUnexpectedEOF
+			}
+			m.Value = string(dAtA[iNdEx:postIndex])
+			iNdEx = postIndex
+		default:
+			iNdEx = preIndex
+			skippy, err := skipGrpc(dAtA[iNdEx:])
+			if err != nil {
+				return err
+			}
+			if skippy < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) < 0 {
+				return ErrInvalidLengthGrpc
+			}
+			if (iNdEx + skippy) > l {
+				return io.ErrUnexpectedEOF
+			}
+			iNdEx += skippy
+		}
+	}
+
+	if iNdEx > l {
+		return io.ErrUnexpectedEOF
+	}
+	return nil
+}
+func skipGrpc(dAtA []byte) (n int, err error) {
+	l := len(dAtA)
+	iNdEx := 0
+	for iNdEx < l {
+		var wire uint64
+		for shift := uint(0); ; shift += 7 {
+			if shift >= 64 {
+				return 0, ErrIntOverflowGrpc
+			}
+			if iNdEx >= l {
+				return 0, io.ErrUnexpectedEOF
+			}
+			b := dAtA[iNdEx]
+			iNdEx++
+			wire |= (uint64(b) & 0x7F) << shift
+			if b < 0x80 {
+				break
+			}
+		}
+		wireType := int(wire & 0x7)
+		switch wireType {
+		case 0:
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				iNdEx++
+				if dAtA[iNdEx-1] < 0x80 {
+					break
+				}
+			}
+			return iNdEx, nil
+		case 1:
+			iNdEx += 8
+			return iNdEx, nil
+		case 2:
+			var length int
+			for shift := uint(0); ; shift += 7 {
+				if shift >= 64 {
+					return 0, ErrIntOverflowGrpc
+				}
+				if iNdEx >= l {
+					return 0, io.ErrUnexpectedEOF
+				}
+				b := dAtA[iNdEx]
+				iNdEx++
+				length |= (int(b) & 0x7F) << shift
+				if b < 0x80 {
+					break
+				}
+			}
+			if length < 0 {
+				return 0, ErrInvalidLengthGrpc
+			}
+			iNdEx += length
+			if iNdEx < 0 {
+				return 0, ErrInvalidLengthGrpc
+			}
+			return iNdEx, nil
+		case 3:
+			for {
+				var innerWire uint64
+				var start int = iNdEx
+				for shift := uint(0); ; shift += 7 {
+					if shift >= 64 {
+						return 0, ErrIntOverflowGrpc
+					}
+					if iNdEx >= l {
+						return 0, io.ErrUnexpectedEOF
+					}
+					b := dAtA[iNdEx]
+					iNdEx++
+					innerWire |= (uint64(b) & 0x7F) << shift
+					if b < 0x80 {
+						break
+					}
+				}
+				innerWireType := int(innerWire & 0x7)
+				if innerWireType == 4 {
+					break
+				}
+				next, err := skipGrpc(dAtA[start:])
+				if err != nil {
+					return 0, err
+				}
+				iNdEx = start + next
+				if iNdEx < 0 {
+					return 0, ErrInvalidLengthGrpc
+				}
+			}
+			return iNdEx, nil
+		case 4:
+			return iNdEx, nil
+		case 5:
+			iNdEx += 4
+			return iNdEx, nil
+		default:
+			return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+		}
+	}
+	panic("unreachable")
+}
+
+var (
+	ErrInvalidLengthGrpc = fmt.Errorf("proto: negative length found during unmarshaling")
+	ErrIntOverflowGrpc   = fmt.Errorf("proto: integer overflow")
+)
diff --git a/grpc/grpc.proto b/grpc/grpc.proto
new file mode 100644
index 0000000000000..3eecd35847b8d
--- /dev/null
+++ b/grpc/grpc.proto
@@ -0,0 +1,142 @@
+syntax = "proto3";
+
+package grpc;
+
+import "google/protobuf/empty.proto";
+
+service grpc_store {
+    /// index-client
+
+    /// WriteIndex writes batch of indexes to the index tables.
+    rpc WriteIndex(WriteIndexRequest) returns (google.protobuf.Empty);
+    /// QueryIndex reads the indexes required for given query & sends back the batch of rows
+    /// in rpc streams
+    rpc QueryIndex(QueryIndexRequest) returns (stream QueryIndexResponse);
+    /// DeleteIndex deletes the batch of index entries from the index tables
+    rpc DeleteIndex(DeleteIndexRequest) returns (google.protobuf.Empty);
+
+    /// storage-client
+
+    /// PutChunks saves the batch of chunks into the chunk tables.
+    rpc PutChunks(PutChunksRequest) returns (google.protobuf.Empty);
+    /// GetChunks requests for batch of chunks and the batch of chunks are sent back in rpc streams
+    /// batching needs to be performed at server level as per requirement instead of sending single chunk per stream.
+    /// In GetChunks rpc request send buf as nil
+    rpc GetChunks(GetChunksRequest) returns (stream GetChunksResponse);
+    /// DeleteChunks deletes the chunks based on chunkID.
+    rpc DeleteChunks(ChunkID) returns (google.protobuf.Empty);
+
+    /// table-client
+
+    /// Lists all the tables that exists in the database.
+    rpc ListTables(google.protobuf.Empty) returns (ListTablesResponse);
+    /// Creates a table with provided name & attributes.
+    rpc CreateTable(CreateTableRequest) returns (google.protobuf.Empty);
+    // Deletes a table using table name provided.
+    rpc DeleteTable(DeleteTableRequest) returns (google.protobuf.Empty);
+    // Describes a table information for the provided table.
+    rpc DescribeTable(DescribeTableRequest) returns (DescribeTableResponse);
+    // Update a table with newly provided table information.
+    rpc UpdateTable(UpdateTableRequest) returns (google.protobuf.Empty);
+}
+
+message PutChunksRequest {
+    repeated Chunk chunks = 1;
+}
+
+message GetChunksRequest {
+    repeated Chunk chunks = 1;
+}
+
+message GetChunksResponse {
+    repeated Chunk chunks = 1;
+}
+
+message Chunk {
+    bytes encoded     = 1;
+    string key        = 2;
+    string tableName  = 3;
+}
+
+message ChunkID {
+    string chunkID = 1;
+}
+
+message DeleteTableRequest {
+    string tableName = 1;
+}
+
+message DescribeTableRequest {
+    string tableName = 1;
+}
+
+message WriteBatch {
+    repeated IndexEntry writes     = 1;
+    repeated IndexEntry deletes    = 2;
+}
+
+message WriteIndexRequest {
+    repeated IndexEntry writes    = 1;
+}
+
+message DeleteIndexRequest {
+    repeated IndexEntry deletes   = 1;
+}
+
+message QueryIndexResponse {
+    repeated Row rows = 1;
+}
+
+message Row {
+    bytes rangeValue = 1;
+    bytes value      = 2;
+}
+
+message IndexEntry {
+    string tableName = 1;
+    string hashValue = 2;
+    bytes rangeValue = 3;
+    bytes value      = 4;
+}
+
+message QueryIndexRequest {
+    string tableName          = 1;
+    string hashValue          = 2;
+    bytes rangeValuePrefix    = 3;
+    bytes rangeValueStart     = 4;
+    bytes valueEqual          = 5;
+    bool immutable            = 6;
+}
+
+message UpdateTableRequest {
+    TableDesc current  = 1;
+    TableDesc expected = 2;
+}
+
+message DescribeTableResponse {
+    TableDesc desc  = 1;
+    bool isActive   = 2;
+}
+
+message CreateTableRequest {
+    TableDesc desc = 1;
+}
+
+message TableDesc {
+    string name                  = 1;
+    bool useOnDemandIOMode       = 2;
+    int64 provisionedRead        = 3;
+    int64 provisionedWrite       = 4;
+    map<string, string> tags     = 5;
+}
+
+message ListTablesResponse {
+    repeated string tableNames = 1;
+}
+
+message Labels {
+    string name  = 1;
+    string value = 2;
+}
+
+
diff --git a/grpc/grpc_client.go b/grpc/grpc_client.go
new file mode 100644
index 0000000000000..7c9998f657f62
--- /dev/null
+++ b/grpc/grpc_client.go
@@ -0,0 +1,34 @@
+package grpc
+
+import (
+	"flag"
+	"time"
+
+	"github.com/pkg/errors"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/keepalive"
+)
+
+// Config for a StorageClient
+type Config struct {
+	Address string `yaml:"server_address,omitempty"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet
+func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
+	f.StringVar(&cfg.Address, "grpc-store.server-address", "", "Hostname or IP of the gRPC store instance.")
+}
+
+func connectToGrpcServer(serverAddress string) (GrpcStoreClient, *grpc.ClientConn, error) {
+	params := keepalive.ClientParameters{
+		Time:                time.Second * 20,
+		Timeout:             time.Minute * 10,
+		PermitWithoutStream: true,
+	}
+	param := grpc.WithKeepaliveParams(params)
+	cc, err := grpc.Dial(serverAddress, param, grpc.WithInsecure())
+	if err != nil {
+		return nil, nil, errors.Wrapf(err, "failed to dial grpc-store %s", serverAddress)
+	}
+	return NewGrpcStoreClient(cc), cc, nil
+}
diff --git a/grpc/grpc_client_test.go b/grpc/grpc_client_test.go
new file mode 100644
index 0000000000000..e97e34e69cba2
--- /dev/null
+++ b/grpc/grpc_client_test.go
@@ -0,0 +1,180 @@
+package grpc
+
+import (
+	"context"
+	"testing"
+
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/stretchr/testify/require"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+)
+
+// This includes test for all RPCs in
+// tableClient, indexClient, storageClient
+func TestGrpcStore(t *testing.T) {
+	var err error
+	cleanup, storeAddress := createTestGrpcServer(t)
+	defer cleanup()
+	cfg := Config{Address: storeAddress}
+	schemaCfg := chunk.SchemaConfig{Configs: []chunk.PeriodConfig{
+		{
+			From:       chunk.DayTime{Time: 1564358400000},
+			IndexType:  "grpc-store",
+			ObjectType: "grpc-store",
+			Schema:     "v10",
+			IndexTables: chunk.PeriodicTableConfig{
+				Prefix: "index_",
+				Period: 604800000000000,
+				Tags:   nil,
+			},
+			RowShards: 16,
+		},
+	}}
+
+	// rpc calls specific to tableClient
+	tableClient, _ := NewTestTableClient(cfg)
+	tableDesc := chunk.TableDesc{
+		Name:              "chunk_2607",
+		UseOnDemandIOMode: false,
+		ProvisionedRead:   300,
+		ProvisionedWrite:  1,
+		Tags:              nil,
+	}
+	err = tableClient.CreateTable(context.Background(), tableDesc)
+	require.NoError(t, err)
+
+	_, err = tableClient.ListTables(context.Background())
+	require.NoError(t, err)
+
+	_, _, err = tableClient.DescribeTable(context.Background(), "chunk_2591")
+	require.NoError(t, err)
+
+	currentTable := chunk.TableDesc{
+		Name:              "chunk_2591",
+		UseOnDemandIOMode: false,
+		ProvisionedRead:   0,
+		ProvisionedWrite:  0,
+		Tags:              nil,
+	}
+	expectedTable := chunk.TableDesc{
+		Name:              "chunk_2591",
+		UseOnDemandIOMode: false,
+		ProvisionedRead:   300,
+		ProvisionedWrite:  1,
+		Tags:              nil,
+	}
+
+	err = tableClient.UpdateTable(context.Background(), currentTable, expectedTable)
+	require.NoError(t, err)
+
+	err = tableClient.DeleteTable(context.Background(), "chunk_2591")
+	require.NoError(t, err)
+
+	// rpc calls for storageClient
+	storageClient, _ := NewTestStorageClient(cfg, schemaCfg)
+
+	putChunksTestData := []chunk.Chunk{
+		{
+			Fingerprint: 15993187966453505842,
+			UserID:      "fake",
+			From:        1587997054298,
+			Through:     1587997054298,
+			Metric: labels.Labels{
+				{
+					Name:  "_name_",
+					Value: "prometheus_sd_file_scan_duration_seconds_sum",
+				},
+				{
+					Name:  "instance",
+					Value: "localhost:9090",
+				},
+				{
+					Name:  "job",
+					Value: "prometheus",
+				},
+			},
+			ChecksumSet: true,
+			Checksum:    3651208117,
+			Encoding:    encoding.Bigchunk,
+			Data:        prom_chunk.New(),
+		},
+	}
+	err = storageClient.PutChunks(context.Background(), putChunksTestData)
+	require.NoError(t, err)
+
+	getChunksTestData := []chunk.Chunk{
+		{
+			Fingerprint: 15993187966453505842,
+			UserID:      "fake",
+			From:        1587997054298,
+			Through:     1587997054298,
+			Metric: labels.Labels{
+				{
+					Name:  "_name_",
+					Value: "prometheus_sd_file_scan_duration_seconds_sum",
+				},
+				{
+					Name:  "instance",
+					Value: "localhost:9090",
+				},
+				{
+					Name:  "job",
+					Value: "prometheus",
+				},
+			},
+			ChecksumSet: true,
+			Checksum:    3651208117,
+			Encoding:    encoding.Bigchunk,
+			Data:        prom_chunk.New(),
+		},
+	}
+	_, err = storageClient.GetChunks(context.Background(), getChunksTestData)
+	require.NoError(t, err)
+
+	err = storageClient.DeleteChunk(context.Background(), "")
+	require.NoError(t, err)
+
+	//rpc calls specific to indexClient
+	writeBatchTestData := writeBatchTestData()
+	err = storageClient.BatchWrite(context.Background(), writeBatchTestData)
+	require.NoError(t, err)
+
+	queries := []chunk.IndexQuery{
+		{TableName: "table", HashValue: "foo"},
+	}
+	results := 0
+	err = storageClient.QueryPages(context.Background(), queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
+		iter := batch.Iterator()
+		for iter.Next() {
+			results++
+		}
+		return true
+	})
+	require.NoError(t, err)
+
+}
+
+func writeBatchTestData() chunk.WriteBatch {
+	t := &WriteBatch{
+		Writes: []*IndexEntry{
+			{
+				TableName:  "index_2625",
+				HashValue:  "fake:d18381:5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4",
+				RangeValue: []byte("JSI0YbyRLVmLKkLBiAKf5ctf8mWtn9U6CXCzuYmWkMk 5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4  8"),
+				Value:      []byte("localhost:9090"),
+			},
+		},
+		Deletes: []*IndexEntry{
+			{
+				TableName:  "index_2625",
+				HashValue:  "fake:d18381:5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4",
+				RangeValue: []byte("JSI0YbyRLVmLKkLBiAKf5ctf8mWtn9U6CXCzuYmWkMk 5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4  8"),
+				Value:      nil,
+			},
+		},
+	}
+	return t
+}
diff --git a/grpc/grpc_server_mock_test.go b/grpc/grpc_server_mock_test.go
new file mode 100644
index 0000000000000..d552da085284c
--- /dev/null
+++ b/grpc/grpc_server_mock_test.go
@@ -0,0 +1,185 @@
+package grpc
+
+import (
+	"context"
+	"log"
+	"net"
+	"testing"
+
+	"github.com/golang/protobuf/ptypes/empty"
+	"github.com/pkg/errors"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/grpc"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+type server struct {
+	Cfg Config `yaml:"cfg,omitempty"`
+}
+
+// indexClient RPCs
+func (s server) WriteIndex(ctx context.Context, writes *WriteIndexRequest) (*empty.Empty, error) {
+	rangeValue := "JSI0YbyRLVmLKkLBiAKf5ctf8mWtn9U6CXCzuYmWkMk 5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4  8"
+	value := "localhost:9090"
+	if writes.Writes[0].TableName == "index_2625" &&
+		writes.Writes[0].HashValue == "fake:d18381:5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4" && string(writes.Writes[0].RangeValue) == rangeValue &&
+		string(writes.Writes[0].Value) == value {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("batch write request from indexClient doesn't match with the gRPC client")
+	return &empty.Empty{}, err
+}
+
+func (s server) QueryIndex(query *QueryIndexRequest, pagesServer GrpcStore_QueryIndexServer) error {
+	if query.TableName == "table" && query.HashValue == "foo" {
+		return nil
+	}
+	err := errors.New("query pages from indexClient request doesn't match with the gRPC client")
+	return err
+}
+
+func (s server) DeleteIndex(ctx context.Context, deletes *DeleteIndexRequest) (*empty.Empty, error) {
+	if deletes.Deletes[0].TableName == "index_2625" && deletes.Deletes[0].HashValue == "fake:d18381:5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4" &&
+		string(deletes.Deletes[0].RangeValue) == "JSI0YbyRLVmLKkLBiAKf5ctf8mWtn9U6CXCzuYmWkMk 5f3DoSEa2cDzymQ7u8VZ6c/ku1HlYIdMWqdg1QKCYh4  8" {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("delete from indexClient request doesn't match with the gRPC client")
+	return &empty.Empty{}, err
+}
+
+// storageClient RPCs
+func (s server) PutChunks(ctx context.Context, request *PutChunksRequest) (*empty.Empty, error) {
+	//encoded :=
+	if request.Chunks[0].TableName == "" && request.Chunks[0].Key == "fake/ddf337b84e835f32:171bc00155a:171bc00155a:fc8fd207" {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("putChunks from storageClient request doesn't match with test from gRPC client")
+	return &empty.Empty{}, err
+}
+
+func (s server) GetChunks(request *GetChunksRequest, chunksServer GrpcStore_GetChunksServer) error {
+	if request.Chunks[0].TableName == "" && request.Chunks[0].Key == "fake/ddf337b84e835f32:171bc00155a:171bc00155a:d9a103b5" &&
+		request.Chunks[0].Encoded == nil {
+		return nil
+	}
+	err := errors.New("getChunks from storageClient request doesn't match with test gRPC client")
+	return err
+}
+
+func (s server) DeleteChunks(ctx context.Context, id *ChunkID) (*empty.Empty, error) {
+	if id.ChunkID == "" {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("deleteChunks from storageClient request doesn't match with test gRPC client")
+	return &empty.Empty{}, err
+}
+
+// tableClient RPCs
+func (s server) ListTables(ctx context.Context, empty *empty.Empty) (*ListTablesResponse, error) {
+	return &ListTablesResponse{
+		TableNames: []string{"chunk_2604, chunk_2613, index_2594, index_2603"},
+	}, nil
+}
+
+func (s server) CreateTable(ctx context.Context, createTableRequest *CreateTableRequest) (*empty.Empty, error) {
+	if createTableRequest.Desc.Name == "chunk_2607" && !createTableRequest.Desc.UseOnDemandIOMode && createTableRequest.Desc.ProvisionedRead == 300 && createTableRequest.Desc.ProvisionedWrite == 1 && createTableRequest.Desc.Tags == nil {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("create table from tableClient request doesn't match with test gRPC client")
+	return &empty.Empty{}, err
+}
+
+func (s server) DeleteTable(ctx context.Context, name *DeleteTableRequest) (*empty.Empty, error) {
+	if name.TableName == "chunk_2591" {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("delete table from tableClient request doesn't match with test gRPC client")
+	return &empty.Empty{}, err
+}
+
+func (s server) DescribeTable(ctx context.Context, name *DescribeTableRequest) (*DescribeTableResponse, error) {
+	if name.TableName == "chunk_2591" {
+		return &DescribeTableResponse{
+			Desc: &TableDesc{
+				Name:              "chunk_2591",
+				UseOnDemandIOMode: false,
+				ProvisionedRead:   0,
+				ProvisionedWrite:  0,
+				Tags:              nil,
+			},
+			IsActive: true,
+		}, nil
+	}
+	err := errors.New("describe table from tableClient request doesn't match with test gRPC client")
+	return &DescribeTableResponse{}, err
+}
+
+func (s server) UpdateTable(ctx context.Context, request *UpdateTableRequest) (*empty.Empty, error) {
+	if request.Current.Name == "chunk_2591" && !request.Current.UseOnDemandIOMode && request.Current.ProvisionedWrite == 0 &&
+		request.Current.ProvisionedRead == 0 && request.Current.Tags == nil && request.Expected.Name == "chunk_2591" &&
+		!request.Expected.UseOnDemandIOMode && request.Expected.ProvisionedWrite == 1 &&
+		request.Expected.ProvisionedRead == 300 && request.Expected.Tags == nil {
+		return &empty.Empty{}, nil
+	}
+	err := errors.New("update table from tableClient request doesn't match with test gRPC client")
+	return &empty.Empty{}, err
+}
+
+// NewStorageClient returns a new StorageClient.
+func NewTestStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
+	grpcClient, _, err := connectToGrpcServer(cfg.Address)
+	if err != nil {
+		return nil, err
+	}
+	client := &StorageClient{
+		schemaCfg: schemaCfg,
+		client:    grpcClient,
+	}
+	return client, nil
+}
+
+//***********************  gRPC mock server *********************************//
+
+// NewTableClient returns a new TableClient.
+func NewTestTableClient(cfg Config) (*TableClient, error) {
+	grpcClient, _, err := connectToGrpcServer(cfg.Address)
+	if err != nil {
+		return nil, err
+	}
+	client := &TableClient{
+		client: grpcClient,
+	}
+	return client, nil
+}
+
+// NewStorageClient returns a new StorageClient.
+func newTestStorageServer(cfg Config) (*server, error) {
+	client := &server{
+		Cfg: cfg,
+	}
+	return client, nil
+}
+
+func createTestGrpcServer(t *testing.T) (func(), string) {
+	var cfg server
+	lis, err := net.Listen("tcp", "localhost:0")
+	require.NoError(t, err)
+	s := grpc.NewServer()
+
+	s1, err := newTestStorageServer(cfg.Cfg)
+	if err != nil {
+		log.Fatalf("Failed to created new storage client")
+	}
+	RegisterGrpcStoreServer(s, s1)
+	go func() {
+		if err := s.Serve(lis); err != nil {
+			log.Fatalf("Failed to serve: %v", err)
+		}
+	}()
+	cleanup := func() {
+		s.GracefulStop()
+	}
+
+	return cleanup, lis.Addr().String()
+}
diff --git a/grpc/index_client.go b/grpc/index_client.go
new file mode 100644
index 0000000000000..1bc0f31b1dd34
--- /dev/null
+++ b/grpc/index_client.go
@@ -0,0 +1,107 @@
+package grpc
+
+import (
+	"context"
+	"io"
+
+	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
+)
+
+func (w *WriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	w.Writes = append(w.Writes, &IndexEntry{
+		TableName:  tableName,
+		HashValue:  hashValue,
+		RangeValue: rangeValue,
+		Value:      value,
+	})
+}
+
+func (w *WriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	w.Deletes = append(w.Deletes, &IndexEntry{
+		TableName:  tableName,
+		HashValue:  hashValue,
+		RangeValue: rangeValue,
+	})
+}
+
+func (s *StorageClient) NewWriteBatch() chunk.WriteBatch {
+	return &WriteBatch{}
+}
+
+func (s *StorageClient) BatchWrite(c context.Context, batch chunk.WriteBatch) error {
+	writeBatch := batch.(*WriteBatch)
+	batchWrites := &WriteIndexRequest{Writes: writeBatch.Writes}
+	_, err := s.client.WriteIndex(context.Background(), batchWrites)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+
+	batchDeletes := &DeleteIndexRequest{Deletes: writeBatch.Deletes}
+	_, err = s.client.DeleteIndex(context.Background(), batchDeletes)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+
+	return nil
+}
+
+func (s *StorageClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	return util.DoParallelQueries(ctx, s.query, queries, callback)
+}
+
+func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callback util.Callback) error {
+	indexQuery := &QueryIndexRequest{
+		TableName:        query.TableName,
+		HashValue:        query.HashValue,
+		RangeValuePrefix: query.RangeValuePrefix,
+		RangeValueStart:  query.RangeValueStart,
+		ValueEqual:       query.ValueEqual,
+		Immutable:        query.Immutable,
+	}
+	streamer, err := s.client.QueryIndex(ctx, indexQuery)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	for {
+		readBatch, err := streamer.Recv()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return errors.WithStack(err)
+		}
+		if !callback(query, readBatch) {
+			return nil
+		}
+	}
+
+	return nil
+}
+
+func (r *QueryIndexResponse) Iterator() chunk.ReadBatchIterator {
+	return &grpcIter{
+		i:                  -1,
+		QueryIndexResponse: r,
+	}
+}
+
+type grpcIter struct {
+	i int
+	*QueryIndexResponse
+}
+
+func (b *grpcIter) Next() bool {
+	b.i++
+	return b.i < len(b.Rows)
+}
+
+func (b *grpcIter) RangeValue() []byte {
+	return b.Rows[b.i].RangeValue
+}
+
+func (b *grpcIter) Value() []byte {
+	return b.Rows[b.i].Value
+}
diff --git a/grpc/storage_client.go b/grpc/storage_client.go
new file mode 100644
index 0000000000000..1ece225cec07d
--- /dev/null
+++ b/grpc/storage_client.go
@@ -0,0 +1,118 @@
+package grpc
+
+import (
+	"context"
+	"io"
+
+	"github.com/pkg/errors"
+	"google.golang.org/grpc"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+type StorageClient struct {
+	schemaCfg  chunk.SchemaConfig
+	client     GrpcStoreClient
+	connection *grpc.ClientConn
+}
+
+// NewStorageClient returns a new StorageClient.
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
+	grpcClient, conn, err := connectToGrpcServer(cfg.Address)
+	if err != nil {
+		return nil, err
+	}
+	client := &StorageClient{
+		schemaCfg:  schemaCfg,
+		client:     grpcClient,
+		connection: conn,
+	}
+	return client, nil
+}
+
+func (s *StorageClient) Stop() {
+	s.connection.Close()
+}
+
+// PutChunks implements chunk.ObjectClient.
+func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+	req := &PutChunksRequest{}
+	for i := range chunks {
+		buf, err := chunks[i].Encoded()
+		if err != nil {
+			return errors.WithStack(err)
+		}
+
+		key := chunks[i].ExternalKey()
+		tableName, err := s.schemaCfg.ChunkTableFor(chunks[i].From)
+		if err != nil {
+			return errors.WithStack(err)
+		}
+		writeChunk := &Chunk{
+			Encoded:   buf,
+			Key:       key,
+			TableName: tableName,
+		}
+
+		req.Chunks = append(req.Chunks, writeChunk)
+	}
+
+	_, err := s.client.PutChunks(ctx, req)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+
+	return nil
+}
+
+func (s *StorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
+	chunkInfo := &ChunkID{ChunkID: chunkID}
+	_, err := s.client.DeleteChunks(ctx, chunkInfo)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	return nil
+}
+
+func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+	req := &GetChunksRequest{}
+	req.Chunks = []*Chunk{}
+	var err error
+	for _, inputInfo := range input {
+		chunkInfo := &Chunk{}
+		// send the table name from upstream gRPC client as gRPC server is unaware of schema
+		chunkInfo.TableName, err = s.schemaCfg.ChunkTableFor(inputInfo.From)
+		if err != nil {
+			return nil, errors.WithStack(err)
+		}
+		chunkInfo.Key = inputInfo.ExternalKey()
+		req.Chunks = append(req.Chunks, chunkInfo)
+	}
+	streamer, err := s.client.GetChunks(ctx, req)
+	if err != nil {
+		return nil, errors.WithStack(err)
+	}
+	var result []chunk.Chunk
+	decodeContext := chunk.NewDecodeContext()
+	for {
+		receivedChunks, err := streamer.Recv()
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			return nil, errors.WithStack(err)
+		}
+		for _, chunkResponse := range receivedChunks.GetChunks() {
+			var c chunk.Chunk
+			if chunkResponse != nil {
+				err = c.Decode(decodeContext, chunkResponse.Encoded)
+				if err != nil {
+					return result, err
+				}
+			}
+			result = append(result, c)
+		}
+	}
+
+	return result, err
+}
diff --git a/grpc/table_client.go b/grpc/table_client.go
new file mode 100644
index 0000000000000..fe7eec7c4e265
--- /dev/null
+++ b/grpc/table_client.go
@@ -0,0 +1,100 @@
+package grpc
+
+import (
+	"context"
+
+	"github.com/golang/protobuf/ptypes/empty"
+	"github.com/pkg/errors"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+type TableClient struct {
+	client GrpcStoreClient
+}
+
+// NewTableClient returns a new TableClient.
+func NewTableClient(cfg Config) (*TableClient, error) {
+	grpcClient, _, err := connectToGrpcServer(cfg.Address)
+	if err != nil {
+		return nil, err
+	}
+	client := &TableClient{
+		client: grpcClient,
+	}
+	return client, nil
+}
+
+func (c *TableClient) ListTables(ctx context.Context) ([]string, error) {
+	tables, err := c.client.ListTables(ctx, &empty.Empty{})
+	if err != nil {
+		return nil, errors.WithStack(err)
+	}
+	return tables.TableNames, nil
+}
+
+func (c *TableClient) DeleteTable(ctx context.Context, name string) error {
+	tableName := &DeleteTableRequest{TableName: name}
+	_, err := c.client.DeleteTable(ctx, tableName)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	return nil
+}
+
+func (c *TableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
+	tableName := &DescribeTableRequest{TableName: name}
+	tableDesc, err := c.client.DescribeTable(ctx, tableName)
+	if err != nil {
+		return desc, false, errors.WithStack(err)
+	}
+	desc.Name = tableDesc.Desc.Name
+	desc.ProvisionedRead = tableDesc.Desc.ProvisionedRead
+	desc.ProvisionedWrite = tableDesc.Desc.ProvisionedWrite
+	desc.UseOnDemandIOMode = tableDesc.Desc.UseOnDemandIOMode
+	desc.Tags = tableDesc.Desc.Tags
+	return desc, tableDesc.IsActive, nil
+}
+
+func (c *TableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
+	currentTable := &TableDesc{}
+	expectedTable := &TableDesc{}
+
+	currentTable.Name = current.Name
+	currentTable.UseOnDemandIOMode = current.UseOnDemandIOMode
+	currentTable.ProvisionedWrite = current.ProvisionedWrite
+	currentTable.ProvisionedRead = current.ProvisionedRead
+	currentTable.Tags = current.Tags
+
+	expectedTable.Name = expected.Name
+	expectedTable.UseOnDemandIOMode = expected.UseOnDemandIOMode
+	expectedTable.ProvisionedWrite = expected.ProvisionedWrite
+	expectedTable.ProvisionedRead = expected.ProvisionedRead
+	expectedTable.Tags = expected.Tags
+
+	updateTableRequest := &UpdateTableRequest{
+		Current:  currentTable,
+		Expected: expectedTable,
+	}
+	_, err := c.client.UpdateTable(ctx, updateTableRequest)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	return nil
+}
+
+func (c *TableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
+	req := &CreateTableRequest{}
+	req.Desc = &TableDesc{}
+	req.Desc.Name = desc.Name
+	req.Desc.ProvisionedRead = desc.ProvisionedRead
+	req.Desc.ProvisionedWrite = desc.ProvisionedWrite
+	req.Desc.Tags = desc.Tags
+	req.Desc.UseOnDemandIOMode = desc.UseOnDemandIOMode
+
+	_, err := c.client.CreateTable(ctx, req)
+	if err != nil {
+		return errors.WithStack(err)
+	}
+	return nil
+}
diff --git a/storage/factory.go b/storage/factory.go
index 5cfb936cc8547..a158be82fc7e3 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -17,6 +17,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/cortexproject/cortex/pkg/chunk/grpc"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/chunk/openstack"
@@ -73,6 +74,8 @@ type Config struct {
 	IndexQueriesCacheConfig cache.Config `yaml:"index_queries_cache_config"`
 
 	DeleteStoreConfig purger.DeleteStoreConfig `yaml:"delete_store"`
+
+	GrpcConfig grpc.Config `yaml:"grpc_store"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
@@ -86,6 +89,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.FSConfig.RegisterFlags(f)
 	cfg.DeleteStoreConfig.RegisterFlags(f)
 	cfg.Swift.RegisterFlags(f)
+	cfg.GrpcConfig.RegisterFlags(f)
 
 	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or tsdb. Be aware tsdb is experimental and shouldn't be used in production.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
@@ -206,6 +210,8 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "boltdb":
 		return local.NewBoltDBIndexClient(cfg.BoltDBConfig)
+	case "grpc-store":
+		return grpc.NewStorageClient(cfg.GrpcConfig, schemaCfg)
 	default:
 		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
 	}
@@ -245,8 +251,10 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 			return nil, err
 		}
 		return objectclient.NewClient(store, objectclient.Base64Encoder), nil
+	case "grpc-store":
+		return grpc.NewStorageClient(cfg.GrpcConfig, schemaCfg)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, azure, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, azure, cassandra, inmemory, gcp, bigtable, bigtable-hashed, grpc-store", name)
 	}
 }
 
@@ -283,8 +291,10 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
 	case "boltdb":
 		return local.NewTableClient(cfg.BoltDBConfig.Directory)
+	case "grpc-store":
+		return grpc.NewTableClient(cfg.GrpcConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed, grpc-store", name)
 	}
 }
 

From 269a0049e21a26f828dabd191cd4a40bc7eb0c1e Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 2 Jun 2020 11:38:59 +0530
Subject: [PATCH 528/660] Provisioning of table for delete requests using table
 manager (#2546)

* provisioning of table for delete requests using table manager

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* generate doc

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fixed tag for delete store config

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/metrics_autoscaling_test.go |  38 +++++----
 chunk_store_test.go             |   2 +-
 purger/delete_requests_store.go |   6 +-
 purger/purger_test.go           |  28 ++++---
 purger/table_provisioning.go    |  30 ++++++++
 purger/tombstones_test.go       |   4 +-
 schema_config.go                |  41 ++--------
 table_manager.go                | 112 ++++++++++++++++++---------
 table_manager_test.go           | 132 ++++++++++++++++++++------------
 table_provisioning.go           | 111 +++++++++++++++++++++++++++
 testutils/testutils.go          |   4 +-
 11 files changed, 354 insertions(+), 154 deletions(-)
 create mode 100644 purger/table_provisioning.go
 create mode 100644 table_provisioning.go

diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index c97b8fd1178dd..3ebf87456949a 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -58,24 +58,32 @@ func fixturePeriodicTableConfig(prefix string) chunk.PeriodicTableConfig {
 
 func fixtureProvisionConfig(inactLastN int64, writeScale, inactWriteScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
 	return chunk.ProvisionConfig{
-		ProvisionedWriteThroughput: write,
-		ProvisionedReadThroughput:  read,
-		InactiveWriteThroughput:    inactiveWrite,
-		InactiveReadThroughput:     inactiveRead,
-		WriteScale:                 writeScale,
-		InactiveWriteScale:         inactWriteScale,
-		InactiveWriteScaleLastN:    inactLastN,
+		ActiveTableProvisionConfig: chunk.ActiveTableProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			WriteScale:                 writeScale,
+		},
+		InactiveTableProvisionConfig: chunk.InactiveTableProvisionConfig{
+			InactiveWriteThroughput: inactiveWrite,
+			InactiveReadThroughput:  inactiveRead,
+			InactiveWriteScale:      inactWriteScale,
+			InactiveWriteScaleLastN: inactLastN,
+		},
 	}
 }
 
 func fixtureReadProvisionConfig(readScale, inactReadScale chunk.AutoScalingConfig) chunk.ProvisionConfig {
 	return chunk.ProvisionConfig{
-		ProvisionedWriteThroughput: write,
-		ProvisionedReadThroughput:  read,
-		InactiveWriteThroughput:    inactiveWrite,
-		InactiveReadThroughput:     inactiveRead,
-		ReadScale:                  readScale,
-		InactiveReadScale:          inactReadScale,
+		ActiveTableProvisionConfig: chunk.ActiveTableProvisionConfig{
+			ProvisionedWriteThroughput: write,
+			ProvisionedReadThroughput:  read,
+			ReadScale:                  readScale,
+		},
+		InactiveTableProvisionConfig: chunk.InactiveTableProvisionConfig{
+			InactiveWriteThroughput: inactiveWrite,
+			InactiveReadThroughput:  inactiveRead,
+			InactiveReadScale:       inactReadScale,
+		},
 	}
 }
 
@@ -162,7 +170,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureProvisionConfig(2, chunkWriteScale, inactiveWriteScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -320,7 +328,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 		ChunkTables:         fixtureReadProvisionConfig(chunkReadScale, inactiveReadScale),
 	}
 
-	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := chunk.NewTableManager(tbm, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 7020937515a7e..792d9d029ce07 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -79,7 +79,7 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg Schema
 	require.NoError(t, err)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
-	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil, nil)
+	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil, nil, nil)
 	require.NoError(t, err)
 
 	err = tableManager.SyncTables(context.Background())
diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index dc7e4c7372f03..1d156caa78c2c 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -73,12 +73,14 @@ type DeleteStore struct {
 
 // DeleteStoreConfig holds configuration for delete store.
 type DeleteStoreConfig struct {
-	Store             string `yaml:"store"`
-	RequestsTableName string `yaml:"requests_table_name"`
+	Store             string                  `yaml:"store"`
+	RequestsTableName string                  `yaml:"requests_table_name"`
+	ProvisionConfig   TableProvisioningConfig `yaml:"table_provisioning"`
 }
 
 // RegisterFlags adds the flags required to configure this flag set.
 func (cfg *DeleteStoreConfig) RegisterFlags(f *flag.FlagSet) {
+	cfg.ProvisionConfig.RegisterFlags("deletes.table", f)
 	f.StringVar(&cfg.Store, "deletes.store", "", "Store for keeping delete request")
 	f.StringVar(&cfg.RequestsTableName, "deletes.requests-table-name", "delete_requests", "Name of the table which stores delete requests")
 }
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 32eb550c0c080..d072d41ddb654 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -24,25 +24,31 @@ const (
 	modelTimeHour = model.Time(time.Hour / time.Millisecond)
 )
 
-func setupTestDeleteStore() (*DeleteStore, error) {
-	var deleteStoreConfig DeleteStoreConfig
+func setupTestDeleteStore(t *testing.T) *DeleteStore {
+	var (
+		deleteStoreConfig DeleteStoreConfig
+		tbmConfig         chunk.TableManagerConfig
+		schemaCfg         = chunk.DefaultSchemaConfig("", "v10", 0)
+	)
 	flagext.DefaultValues(&deleteStoreConfig)
+	flagext.DefaultValues(&tbmConfig)
 
 	mockStorage := chunk.NewMockStorage()
 
-	err := mockStorage.CreateTable(context.Background(), chunk.TableDesc{
-		Name: deleteStoreConfig.RequestsTableName,
-	})
-	if err != nil {
-		return nil, err
-	}
+	extraTables := []chunk.ExtraTables{{TableClient: mockStorage, Tables: deleteStoreConfig.GetTables()}}
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, mockStorage, nil, extraTables, nil)
+	require.NoError(t, err)
+
+	require.NoError(t, tableManager.SyncTables(context.Background()))
 
-	return NewDeleteStore(deleteStoreConfig, mockStorage)
+	deleteStore, err := NewDeleteStore(deleteStoreConfig, mockStorage)
+	require.NoError(t, err)
+
+	return deleteStore
 }
 
 func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger) {
-	deleteStore, err := setupTestDeleteStore()
-	require.NoError(t, err)
+	deleteStore := setupTestDeleteStore(t)
 
 	chunkStore, err := testutils.SetupTestChunkStore()
 	require.NoError(t, err)
diff --git a/purger/table_provisioning.go b/purger/table_provisioning.go
new file mode 100644
index 0000000000000..e8ce5d6364af9
--- /dev/null
+++ b/purger/table_provisioning.go
@@ -0,0 +1,30 @@
+package purger
+
+import (
+	"flag"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+// TableProvisioningConfig holds config for table throuput and autoscaling. Currently only used by DynamoDB.
+type TableProvisioningConfig struct {
+	chunk.ActiveTableProvisionConfig `yaml:",inline"`
+	TableTags                        chunk.Tags `yaml:"tags"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+// Adding a separate RegisterFlags here instead of using it from embedded chunk.ActiveTableProvisionConfig to be able to manage defaults separately.
+// Defaults for WriteScale and ReadScale are shared for now to avoid adding further complexity since autoscaling is disabled anyways by default.
+func (cfg *TableProvisioningConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	// default values ActiveTableProvisionConfig
+	cfg.ProvisionedWriteThroughput = 1
+	cfg.ProvisionedReadThroughput = 300
+	cfg.ProvisionedThroughputOnDemandMode = false
+
+	cfg.ActiveTableProvisionConfig.RegisterFlags(argPrefix, f)
+	f.Var(&cfg.TableTags, argPrefix+".tags", "Tag (of the form key=value) to be added to the tables. Supported by DynamoDB")
+}
+
+func (cfg DeleteStoreConfig) GetTables() []chunk.TableDesc {
+	return []chunk.TableDesc{cfg.ProvisionConfig.BuildTableDesc(cfg.RequestsTableName, cfg.ProvisionConfig.TableTags)}
+}
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
index a528f3cc1f311..d0e302ecb9500 100644
--- a/purger/tombstones_test.go
+++ b/purger/tombstones_test.go
@@ -94,9 +94,7 @@ func TestTombstonesLoader(t *testing.T) {
 		},
 	} {
 		t.Run(tc.name, func(t *testing.T) {
-			deleteStore, err := setupTestDeleteStore()
-			require.NoError(t, err)
-
+			deleteStore := setupTestDeleteStore(t)
 			tombstonesLoader := NewTombstonesLoader(deleteStore, nil)
 
 			// add delete requests
diff --git a/schema_config.go b/schema_config.go
index 617cdbd577ab3..2ffc73714ad76 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -367,34 +367,13 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		firstTable = lastTable - tablesToKeep
 	}
 	for i := firstTable; i <= lastTable; i++ {
-		table := TableDesc{
-			Name:              cfg.tableForPeriod(i),
-			ProvisionedRead:   pCfg.InactiveReadThroughput,
-			ProvisionedWrite:  pCfg.InactiveWriteThroughput,
-			UseOnDemandIOMode: pCfg.InactiveThroughputOnDemandMode,
-			Tags:              cfg.Tags,
-		}
-		level.Debug(util.Logger).Log("msg", "Expected Table", "tableName", table.Name,
-			"provisionedRead", table.ProvisionedRead,
-			"provisionedWrite", table.ProvisionedWrite,
-			"useOnDemandMode", table.UseOnDemandIOMode,
-		)
+		tableName := cfg.tableForPeriod(i)
+		table := TableDesc{}
 
 		// if now is within table [start - grace, end + grace), then we need some write throughput
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
-			table.ProvisionedRead = pCfg.ProvisionedReadThroughput
-			table.ProvisionedWrite = pCfg.ProvisionedWriteThroughput
-			table.UseOnDemandIOMode = pCfg.ProvisionedThroughputOnDemandMode
-
-			if pCfg.WriteScale.Enabled {
-				table.WriteScale = pCfg.WriteScale
-				table.UseOnDemandIOMode = false
-			}
-
-			if pCfg.ReadScale.Enabled {
-				table.ReadScale = pCfg.ReadScale
-				table.UseOnDemandIOMode = false
-			}
+			table = pCfg.ActiveTableProvisionConfig.BuildTableDesc(tableName, cfg.Tags)
+
 			level.Debug(util.Logger).Log("msg", "Table is Active",
 				"tableName", table.Name,
 				"provisionedRead", table.ProvisionedRead,
@@ -403,18 +382,12 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 				"useWriteAutoScale", table.WriteScale.Enabled,
 				"useReadAutoScale", table.ReadScale.Enabled)
 
-		} else if pCfg.InactiveWriteScale.Enabled || pCfg.InactiveReadScale.Enabled {
+		} else {
 			// Autoscale last N tables
 			// this is measured against "now", since the lastWeek is the final week in the schema config range
 			// the N last tables in that range will always be set to the inactive scaling settings.
-			if pCfg.InactiveWriteScale.Enabled && i >= (nowWeek-pCfg.InactiveWriteScaleLastN) {
-				table.WriteScale = pCfg.InactiveWriteScale
-				table.UseOnDemandIOMode = false
-			}
-			if pCfg.InactiveReadScale.Enabled && i >= (nowWeek-pCfg.InactiveReadScaleLastN) {
-				table.ReadScale = pCfg.InactiveReadScale
-				table.UseOnDemandIOMode = false
-			}
+			disableAutoscale := i < (nowWeek - pCfg.InactiveWriteScaleLastN)
+			table = pCfg.InactiveTableProvisionConfig.BuildTableDesc(tableName, cfg.Tags, disableAutoscale)
 
 			level.Debug(util.Logger).Log("msg", "Table is Inactive",
 				"tableName", table.Name,
diff --git a/table_manager.go b/table_manager.go
index 7d188496f06b6..b333f8a4833dc 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -81,6 +81,13 @@ func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 	return &m
 }
 
+// ExtraTables holds the list of tables that TableManager has to manage using a TableClient.
+// This is useful for managing tables other than Chunk and Index tables.
+type ExtraTables struct {
+	TableClient TableClient
+	Tables      []TableDesc
+}
+
 // TableManagerConfig holds config for a TableManager
 type TableManagerConfig struct {
 	// Master 'off-switch' for table capacity updates, e.g. when troubleshooting
@@ -139,23 +146,6 @@ func (cfg *TableManagerConfig) Validate() error {
 	return nil
 }
 
-// ProvisionConfig holds config for provisioning capacity (on DynamoDB for now)
-type ProvisionConfig struct {
-	ProvisionedThroughputOnDemandMode bool  `yaml:"enable_ondemand_throughput_mode"`
-	ProvisionedWriteThroughput        int64 `yaml:"provisioned_write_throughput"`
-	ProvisionedReadThroughput         int64 `yaml:"provisioned_read_throughput"`
-	InactiveThroughputOnDemandMode    bool  `yaml:"enable_inactive_throughput_on_demand_mode"`
-	InactiveWriteThroughput           int64 `yaml:"inactive_write_throughput"`
-	InactiveReadThroughput            int64 `yaml:"inactive_read_throughput"`
-
-	WriteScale              AutoScalingConfig `yaml:"write_scale"`
-	InactiveWriteScale      AutoScalingConfig `yaml:"inactive_write_scale"`
-	InactiveWriteScaleLastN int64             `yaml:"inactive_write_scale_lastn"`
-	ReadScale               AutoScalingConfig `yaml:"read_scale"`
-	InactiveReadScale       AutoScalingConfig `yaml:"inactive_read_scale"`
-	InactiveReadScaleLastN  int64             `yaml:"inactive_read_scale_lastn"`
-}
-
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
@@ -168,24 +158,6 @@ func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.ChunkTables.RegisterFlags("table-manager.chunk-table", f)
 }
 
-// RegisterFlags adds the flags required to config this to the given FlagSet.
-func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
-	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", 1000, "Table default write throughput. Supported by DynamoDB")
-	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", 300, "Table default read throughput. Supported by DynamoDB")
-	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
-	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "Table write throughput for inactive tables. Supported by DynamoDB")
-	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "Table read throughput for inactive tables. Supported by DynamoDB")
-	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
-
-	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
-	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
-	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
-
-	cfg.ReadScale.RegisterFlags(argPrefix+".read-throughput.scale", f)
-	cfg.InactiveReadScale.RegisterFlags(argPrefix+".inactive-read-throughput.scale", f)
-	f.Int64Var(&cfg.InactiveReadScaleLastN, argPrefix+".inactive-read-throughput.scale-last-n", 4, "Number of last inactive tables to enable read autoscale.")
-}
-
 // TableManager creates and manages the provisioned throughput on DynamoDB tables
 type TableManager struct {
 	services.Service
@@ -196,13 +168,14 @@ type TableManager struct {
 	maxChunkAge  time.Duration
 	bucketClient BucketClient
 	metrics      *tableManagerMetrics
+	extraTables  []ExtraTables
 
 	bucketRetentionLoop services.Service
 }
 
 // NewTableManager makes a new TableManager
 func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge time.Duration, tableClient TableClient,
-	objectClient BucketClient, registerer prometheus.Registerer) (*TableManager, error) {
+	objectClient BucketClient, extraTables []ExtraTables, registerer prometheus.Registerer) (*TableManager, error) {
 
 	if cfg.RetentionPeriod != 0 {
 		// Assume the newest config is the one to use for validation of retention
@@ -219,6 +192,7 @@ func NewTableManager(cfg TableManagerConfig, schemaCfg SchemaConfig, maxChunkAge
 		client:       tableClient,
 		bucketClient: objectClient,
 		metrics:      newTableManagerMetrics(registerer),
+		extraTables:  extraTables,
 	}
 
 	tm.Service = services.NewBasicService(tm.starting, tm.loop, tm.stopping)
@@ -273,6 +247,67 @@ func (m *TableManager) loop(ctx context.Context) error {
 	}
 }
 
+func (m *TableManager) checkAndCreateExtraTables() error {
+	for _, extraTables := range m.extraTables {
+		existingTablesList, err := extraTables.TableClient.ListTables(context.Background())
+		if err != nil {
+			return err
+		}
+
+		existingTablesMap := map[string]struct{}{}
+		for _, table := range existingTablesList {
+			existingTablesMap[table] = struct{}{}
+		}
+
+		for _, tableDesc := range extraTables.Tables {
+			if _, ok := existingTablesMap[tableDesc.Name]; !ok {
+				// creating table
+				level.Info(util.Logger).Log("msg", "creating extra table",
+					"tableName", tableDesc.Name,
+					"provisionedRead", tableDesc.ProvisionedRead,
+					"provisionedWrite", tableDesc.ProvisionedWrite,
+					"useOnDemandMode", tableDesc.UseOnDemandIOMode,
+					"useWriteAutoScale", tableDesc.WriteScale.Enabled,
+					"useReadAutoScale", tableDesc.ReadScale.Enabled,
+				)
+				err = extraTables.TableClient.CreateTable(context.Background(), tableDesc)
+				if err != nil {
+					return err
+				}
+				continue
+			} else if m.cfg.ThroughputUpdatesDisabled {
+				// table already exists, throughput updates are disabled so no need to check for difference in configured throuhput vs actual
+				continue
+			}
+
+			level.Info(util.Logger).Log("msg", "checking throughput of extra table", "table", tableDesc.Name)
+			// table already exists, lets check actual throughput for tables is same as what is in configurations, if not let us update it
+			current, _, err := extraTables.TableClient.DescribeTable(context.Background(), tableDesc.Name)
+			if err != nil {
+				return err
+			}
+
+			if !current.Equals(tableDesc) {
+				level.Info(util.Logger).Log("msg", "updating throughput of extra table",
+					"table", tableDesc.Name,
+					"tableName", tableDesc.Name,
+					"provisionedRead", tableDesc.ProvisionedRead,
+					"provisionedWrite", tableDesc.ProvisionedWrite,
+					"useOnDemandMode", tableDesc.UseOnDemandIOMode,
+					"useWriteAutoScale", tableDesc.WriteScale.Enabled,
+					"useReadAutoScale", tableDesc.ReadScale.Enabled,
+				)
+				err := extraTables.TableClient.UpdateTable(context.Background(), current, tableDesc)
+				if err != nil {
+					return err
+				}
+			}
+		}
+	}
+
+	return nil
+}
+
 // single iteration of bucket retention loop
 func (m *TableManager) bucketRetentionIteration(ctx context.Context) error {
 	err := m.bucketClient.DeleteChunksBefore(ctx, mtime.Now().Add(-m.cfg.RetentionPeriod))
@@ -288,6 +323,11 @@ func (m *TableManager) bucketRetentionIteration(ctx context.Context) error {
 // SyncTables will calculate the tables expected to exist, create those that do
 // not and update those that need it.  It is exposed for testing.
 func (m *TableManager) SyncTables(ctx context.Context) error {
+	err := m.checkAndCreateExtraTables()
+	if err != nil {
+		return err
+	}
+
 	expected := m.calculateExpectedTables()
 	level.Info(util.Logger).Log("msg", "synching tables", "expected_tables", len(expected))
 
diff --git a/table_manager_test.go b/table_manager_test.go
index 80415a60bf57a..43d5accff7ed5 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -158,22 +158,30 @@ func TestTableManager(t *testing.T) {
 	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 		IndexTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			WriteScale:                 activeScalingConfig,
-			InactiveWriteScale:         inactiveScalingConfig,
-			InactiveWriteScaleLastN:    autoScaleLastN,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+				WriteScale:                 activeScalingConfig,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+				InactiveWriteScale:      inactiveScalingConfig,
+				InactiveWriteScaleLastN: autoScaleLastN,
+			},
 		},
 		ChunkTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+			},
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -344,21 +352,29 @@ func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
 	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 		IndexTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale:         inactiveScalingConfig,
-			InactiveWriteScaleLastN:    autoScaleLastN,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+				InactiveWriteScale:      inactiveScalingConfig,
+				InactiveWriteScaleLastN: autoScaleLastN,
+			},
 		},
 		ChunkTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+			},
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -430,23 +446,31 @@ func TestTableManagerDynamicIOModeInactiveOnly(t *testing.T) {
 	tbmConfig := TableManagerConfig{
 		CreationGracePeriod: gracePeriod,
 		IndexTables: ProvisionConfig{
-			ProvisionedWriteThroughput:     write,
-			ProvisionedReadThroughput:      read,
-			InactiveWriteThroughput:        inactiveWrite,
-			InactiveReadThroughput:         inactiveRead,
-			InactiveWriteScale:             inactiveScalingConfig,
-			InactiveWriteScaleLastN:        1,
-			InactiveThroughputOnDemandMode: true,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput:        inactiveWrite,
+				InactiveReadThroughput:         inactiveRead,
+				InactiveWriteScale:             inactiveScalingConfig,
+				InactiveThroughputOnDemandMode: true,
+				InactiveWriteScaleLastN:        1,
+			},
 		},
 		ChunkTables: ProvisionConfig{
-			ProvisionedWriteThroughput:     write,
-			ProvisionedReadThroughput:      read,
-			InactiveWriteThroughput:        inactiveWrite,
-			InactiveReadThroughput:         inactiveRead,
-			InactiveThroughputOnDemandMode: true,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput:        inactiveWrite,
+				InactiveReadThroughput:         inactiveRead,
+				InactiveThroughputOnDemandMode: true,
+			},
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -529,7 +553,7 @@ func TestTableManagerTags(t *testing.T) {
 				IndexTables: PeriodicTableConfig{},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -553,7 +577,7 @@ func TestTableManagerTags(t *testing.T) {
 				},
 			}},
 		}
-		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil)
+		tableManager, err := NewTableManager(TableManagerConfig{}, cfg, maxChunkAge, client, nil, nil, nil)
 		if err != nil {
 			t.Fatal(err)
 		}
@@ -593,21 +617,29 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 		RetentionDeletesEnabled: true,
 		CreationGracePeriod:     gracePeriod,
 		IndexTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
-			InactiveWriteScale:         inactiveScalingConfig,
-			InactiveWriteScaleLastN:    autoScaleLastN,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+				InactiveWriteScale:      inactiveScalingConfig,
+				InactiveWriteScaleLastN: autoScaleLastN,
+			},
 		},
 		ChunkTables: ProvisionConfig{
-			ProvisionedWriteThroughput: write,
-			ProvisionedReadThroughput:  read,
-			InactiveWriteThroughput:    inactiveWrite,
-			InactiveReadThroughput:     inactiveRead,
+			ActiveTableProvisionConfig: ActiveTableProvisionConfig{
+				ProvisionedWriteThroughput: write,
+				ProvisionedReadThroughput:  read,
+			},
+			InactiveTableProvisionConfig: InactiveTableProvisionConfig{
+				InactiveWriteThroughput: inactiveWrite,
+				InactiveReadThroughput:  inactiveRead,
+			},
 		},
 	}
-	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
+	tableManager, err := NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil, nil)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -730,6 +762,6 @@ func TestTableManagerRetentionOnly(t *testing.T) {
 
 	// Test table manager retention not multiple of periodic config
 	tbmConfig.RetentionPeriod++
-	_, err = NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil)
+	_, err = NewTableManager(tbmConfig, cfg, maxChunkAge, client, nil, nil, nil)
 	require.Error(t, err)
 }
diff --git a/table_provisioning.go b/table_provisioning.go
new file mode 100644
index 0000000000000..6539f2d4539df
--- /dev/null
+++ b/table_provisioning.go
@@ -0,0 +1,111 @@
+package chunk
+
+import "flag"
+
+// ProvisionConfig holds config for provisioning capacity for index and chunk tables (on DynamoDB for now)
+type ProvisionConfig struct {
+	ActiveTableProvisionConfig   `yaml:",inline"`
+	InactiveTableProvisionConfig `yaml:",inline"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *ProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	// defaults for ActiveTableProvisionConfig
+	cfg.ProvisionedWriteThroughput = 1000
+	cfg.ProvisionedReadThroughput = 300
+	cfg.ProvisionedThroughputOnDemandMode = false
+
+	cfg.ActiveTableProvisionConfig.RegisterFlags(argPrefix, f)
+	cfg.InactiveTableProvisionConfig.RegisterFlags(argPrefix, f)
+}
+
+type ActiveTableProvisionConfig struct {
+	ProvisionedThroughputOnDemandMode bool  `yaml:"enable_ondemand_throughput_mode"`
+	ProvisionedWriteThroughput        int64 `yaml:"provisioned_write_throughput"`
+	ProvisionedReadThroughput         int64 `yaml:"provisioned_read_throughput"`
+
+	WriteScale AutoScalingConfig `yaml:"write_scale"`
+	ReadScale  AutoScalingConfig `yaml:"read_scale"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+// Make sure defaults are set in the respective fields before calling RegisterFlags.
+func (cfg *ActiveTableProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	f.Int64Var(&cfg.ProvisionedWriteThroughput, argPrefix+".write-throughput", cfg.ProvisionedWriteThroughput, "Table default write throughput. Supported by DynamoDB")
+	f.Int64Var(&cfg.ProvisionedReadThroughput, argPrefix+".read-throughput", cfg.ProvisionedReadThroughput, "Table default read throughput. Supported by DynamoDB")
+	f.BoolVar(&cfg.ProvisionedThroughputOnDemandMode, argPrefix+".enable-ondemand-throughput-mode", cfg.ProvisionedThroughputOnDemandMode, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
+
+	cfg.WriteScale.RegisterFlags(argPrefix+".write-throughput.scale", f)
+	cfg.ReadScale.RegisterFlags(argPrefix+".read-throughput.scale", f)
+}
+
+type InactiveTableProvisionConfig struct {
+	InactiveThroughputOnDemandMode bool  `yaml:"enable_inactive_throughput_on_demand_mode"`
+	InactiveWriteThroughput        int64 `yaml:"inactive_write_throughput"`
+	InactiveReadThroughput         int64 `yaml:"inactive_read_throughput"`
+
+	InactiveWriteScale AutoScalingConfig `yaml:"inactive_write_scale"`
+	InactiveReadScale  AutoScalingConfig `yaml:"inactive_read_scale"`
+
+	InactiveWriteScaleLastN int64 `yaml:"inactive_write_scale_lastn"`
+	InactiveReadScaleLastN  int64 `yaml:"inactive_read_scale_lastn"`
+}
+
+// RegisterFlags adds the flags required to config this to the given FlagSet.
+func (cfg *InactiveTableProvisionConfig) RegisterFlags(argPrefix string, f *flag.FlagSet) {
+	f.Int64Var(&cfg.InactiveWriteThroughput, argPrefix+".inactive-write-throughput", 1, "Table write throughput for inactive tables. Supported by DynamoDB")
+	f.Int64Var(&cfg.InactiveReadThroughput, argPrefix+".inactive-read-throughput", 300, "Table read throughput for inactive tables. Supported by DynamoDB")
+	f.BoolVar(&cfg.InactiveThroughputOnDemandMode, argPrefix+".inactive-enable-ondemand-throughput-mode", false, "Enables on demand throughput provisioning for the storage provider (if supported). Applies only to tables which are not autoscaled. Supported by DynamoDB")
+
+	cfg.InactiveWriteScale.RegisterFlags(argPrefix+".inactive-write-throughput.scale", f)
+	cfg.InactiveReadScale.RegisterFlags(argPrefix+".inactive-read-throughput.scale", f)
+
+	f.Int64Var(&cfg.InactiveWriteScaleLastN, argPrefix+".inactive-write-throughput.scale-last-n", 4, "Number of last inactive tables to enable write autoscale.")
+	f.Int64Var(&cfg.InactiveReadScaleLastN, argPrefix+".inactive-read-throughput.scale-last-n", 4, "Number of last inactive tables to enable read autoscale.")
+}
+
+func (cfg ActiveTableProvisionConfig) BuildTableDesc(tableName string, tags Tags) TableDesc {
+	table := TableDesc{
+		Name:              tableName,
+		ProvisionedRead:   cfg.ProvisionedReadThroughput,
+		ProvisionedWrite:  cfg.ProvisionedWriteThroughput,
+		UseOnDemandIOMode: cfg.ProvisionedThroughputOnDemandMode,
+		Tags:              tags,
+	}
+
+	if cfg.WriteScale.Enabled {
+		table.WriteScale = cfg.WriteScale
+		table.UseOnDemandIOMode = false
+	}
+
+	if cfg.ReadScale.Enabled {
+		table.ReadScale = cfg.ReadScale
+		table.UseOnDemandIOMode = false
+	}
+
+	return table
+}
+
+func (cfg InactiveTableProvisionConfig) BuildTableDesc(tableName string, tags Tags, disableAutoscale bool) TableDesc {
+	table := TableDesc{
+		Name:              tableName,
+		ProvisionedRead:   cfg.InactiveReadThroughput,
+		ProvisionedWrite:  cfg.InactiveWriteThroughput,
+		UseOnDemandIOMode: cfg.InactiveThroughputOnDemandMode,
+		Tags:              tags,
+	}
+
+	if !disableAutoscale {
+		if cfg.InactiveWriteScale.Enabled {
+			table.WriteScale = cfg.InactiveWriteScale
+			table.UseOnDemandIOMode = false
+		}
+
+		if cfg.InactiveReadScale.Enabled {
+			table.ReadScale = cfg.InactiveReadScale
+			table.UseOnDemandIOMode = false
+		}
+	}
+
+	return table
+}
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 5ad5363a77fb9..58898da1301b2 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -45,7 +45,7 @@ func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client,
 		return nil, nil, err
 	}
 
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil, nil)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil, nil, nil)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -114,7 +114,7 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	)
 	flagext.DefaultValues(&tbmConfig)
 	storage := chunk.NewMockStorage()
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil, nil)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil, nil, nil)
 	if err != nil {
 		return nil, err
 	}

From 5efba7fdf0f533ee13f34f68722ab34b127035cc Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Tue, 2 Jun 2020 13:30:40 +0100
Subject: [PATCH 529/660] Allow users to configure the number of TCP
 connections per host for Cassandra. (#2666)

* Allow users to configure the number of TCP connections per host for Cassandra.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Update docs.

Signed-off-by: Tom Wilkie <tom@grafana.com>

* Use flagext.DefaultValues in Cassandra fixtures.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Use separate sessions for reads and write in Cassandra.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>

* Add changelog entries.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 cassandra/fixtures.go       | 13 +++++++------
 cassandra/storage_client.go | 39 ++++++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index b0d04786ba1a6..1613e6778a599 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 // GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
@@ -40,12 +41,12 @@ func Fixtures() ([]testutils.Fixture, error) {
 		return nil, nil
 	}
 
-	cfg := Config{
-		Addresses:         addresses,
-		Keyspace:          "test",
-		Consistency:       "QUORUM",
-		ReplicationFactor: 1,
-	}
+	var cfg Config
+	flagext.DefaultValues(&cfg)
+	cfg.Addresses = addresses
+	cfg.Keyspace = "test"
+	cfg.Consistency = "QUORUM"
+	cfg.ReplicationFactor = 1
 
 	// Get a SchemaConfig with the defaults.
 	schemaConfig := testutils.DefaultSchemaConfig("cassandra")
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 650d0c13bf266..6fe8d13474904 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -42,6 +42,7 @@ type Config struct {
 	MaxBackoff               time.Duration       `yaml:"retry_max_backoff"`
 	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
 	QueryConcurrency         int                 `yaml:"query_concurrency"`
+	NumConnections           int                 `yaml:"num_connections"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -66,6 +67,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
 	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. (Default is 0: no limit)")
+	f.IntVar(&cfg.NumConnections, "cassandra.num-connections", 2, "Number of TCP connections per host.")
 }
 
 func (cfg *Config) Validate() error {
@@ -92,6 +94,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.QueryObserver = observer{}
 	cluster.Timeout = cfg.Timeout
 	cluster.ConnectTimeout = cfg.ConnectTimeout
+	cluster.NumConns = cfg.NumConnections
 	if cfg.Retries > 0 {
 		cluster.RetryPolicy = &gocql.ExponentialBackoffRetryPolicy{
 			NumRetries: cfg.Retries,
@@ -197,7 +200,8 @@ func (cfg *Config) createKeyspace() error {
 type StorageClient struct {
 	cfg            Config
 	schemaCfg      chunk.SchemaConfig
-	session        *gocql.Session
+	readSession    *gocql.Session
+	writeSession   *gocql.Session
 	querySemaphore *semaphore.Weighted
 }
 
@@ -205,7 +209,12 @@ type StorageClient struct {
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
 	pkgutil.WarnExperimentalUse("Cassandra Backend")
 
-	session, err := cfg.session()
+	readSession, err := cfg.session()
+	if err != nil {
+		return nil, errors.WithStack(err)
+	}
+
+	writeSession, err := cfg.session()
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -218,7 +227,8 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient,
 	client := &StorageClient{
 		cfg:            cfg,
 		schemaCfg:      schemaCfg,
-		session:        session,
+		readSession:    readSession,
+		writeSession:   writeSession,
 		querySemaphore: querySemaphore,
 	}
 	return client, nil
@@ -226,7 +236,8 @@ func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient,
 
 // Stop implement chunk.IndexClient.
 func (s *StorageClient) Stop() {
-	s.session.Close()
+	s.readSession.Close()
+	s.writeSession.Close()
 }
 
 // Cassandra batching isn't really useful in this case, its more to do multiple
@@ -263,7 +274,7 @@ func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	b := batch.(*writeBatch)
 
 	for _, entry := range b.entries {
-		err := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)",
+		err := s.writeSession.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, ?, ?)",
 			entry.TableName), entry.HashValue, entry.RangeValue, entry.Value).WithContext(ctx).Exec()
 		if err != nil {
 			return errors.WithStack(err)
@@ -271,7 +282,7 @@ func (s *StorageClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch)
 	}
 
 	for _, entry := range b.deletes {
-		err := s.session.Query(fmt.Sprintf("DELETE FROM %s WHERE hash = ? and range = ?",
+		err := s.writeSession.Query(fmt.Sprintf("DELETE FROM %s WHERE hash = ? and range = ?",
 			entry.TableName), entry.HashValue, entry.RangeValue).WithContext(ctx).Exec()
 		if err != nil {
 			return errors.WithStack(err)
@@ -298,27 +309,27 @@ func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 
 	switch {
 	case len(query.RangeValuePrefix) > 0 && query.ValueEqual == nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ?",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ?",
 			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'))
 
 	case len(query.RangeValuePrefix) > 0 && query.ValueEqual != nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ? AND value = ? ALLOW FILTERING",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND range < ? AND value = ? ALLOW FILTERING",
 			query.TableName), query.HashValue, query.RangeValuePrefix, append(query.RangeValuePrefix, '\xff'), query.ValueEqual)
 
 	case len(query.RangeValueStart) > 0 && query.ValueEqual == nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ?",
 			query.TableName), query.HashValue, query.RangeValueStart)
 
 	case len(query.RangeValueStart) > 0 && query.ValueEqual != nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND value = ? ALLOW FILTERING",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND range >= ? AND value = ? ALLOW FILTERING",
 			query.TableName), query.HashValue, query.RangeValueStart, query.ValueEqual)
 
 	case query.ValueEqual == nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ?",
 			query.TableName), query.HashValue)
 
 	case query.ValueEqual != nil:
-		q = s.session.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND value = ? ALLOW FILTERING",
+		q = s.readSession.Query(fmt.Sprintf("SELECT range, value FROM %s WHERE hash = ? AND value = ? ALLOW FILTERING",
 			query.TableName), query.HashValue, query.ValueEqual)
 	}
 
@@ -384,7 +395,7 @@ func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 		}
 
 		// Must provide a range key, even though its not useds - hence 0x00.
-		q := s.session.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
+		q := s.writeSession.Query(fmt.Sprintf("INSERT INTO %s (hash, range, value) VALUES (?, 0x00, ?)",
 			tableName), key, buf)
 		if err := q.WithContext(ctx).Exec(); err != nil {
 			return errors.WithStack(err)
@@ -413,7 +424,7 @@ func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.Decod
 	}
 
 	var buf []byte
-	if err := s.session.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
+	if err := s.readSession.Query(fmt.Sprintf("SELECT value FROM %s WHERE hash = ?", tableName), input.ExternalKey()).
 		WithContext(ctx).Scan(&buf); err != nil {
 		return input, errors.WithStack(err)
 	}

From b78f9de7001018d4d9ee2439127e7738d0866757 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 3 Jun 2020 20:39:53 +0530
Subject: [PATCH 530/660] Disabling of chunks write dedupe (#2670)

* disabling of chunks write dedupe

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nits suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk_store.go  |  4 ++++
 series_store.go | 12 +++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 41e5d8380ab8e..aa556b5d07710 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -62,6 +62,10 @@ type StoreConfig struct {
 	// ingester chunk write deduplication. But for the queriers we need the full value. So when this option
 	// is set, use different caches for ingesters and queriers.
 	chunkCacheStubs bool // don't write the full chunk to cache, just a stub entry
+
+	// When DisableChunksDeduplication is true, cache would not be checked for whether chunk is already written.
+	// It would still write the chunk back to cache for reads.
+	DisableChunksDeduplication bool `yaml:"-"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/series_store.go b/series_store.go
index 39cecbbe4a196..74f3f67b57ff6 100644
--- a/series_store.go
+++ b/series_store.go
@@ -414,11 +414,13 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.PutOne")
-	// If this chunk is in cache it must already be in the database so we don't need to write it again
-	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
-	if len(found) > 0 {
-		dedupedChunksTotal.Inc()
-		return nil
+	if !c.cfg.DisableChunksDeduplication {
+		// If this chunk is in cache it must already be in the database so we don't need to write it again
+		found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
+		if len(found) > 0 {
+			dedupedChunksTotal.Inc()
+			return nil
+		}
 	}
 
 	chunks := []Chunk{chunk}

From f9bd0fd64f7450bdc0ff5c13a888a833b9b1c09f Mon Sep 17 00:00:00 2001
From: Annanay Agarwal <annanay25@users.noreply.github.com>
Date: Thu, 4 Jun 2020 16:10:12 +0530
Subject: [PATCH 531/660] Set cassandra default reconnect interval to 1s
 (#2687)

* Set cassandra default reconnect interval to 1s

Signed-off-by: Annanay <annanayagarwal@gmail.com>

* Update CHANGELOG

Signed-off-by: Annanay <annanayagarwal@gmail.com>
---
 cassandra/storage_client.go | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 6fe8d13474904..470cfbacab5c3 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -38,6 +38,7 @@ type Config struct {
 	CustomAuthenticators     flagext.StringSlice `yaml:"custom_authenticators"`
 	Timeout                  time.Duration       `yaml:"timeout"`
 	ConnectTimeout           time.Duration       `yaml:"connect_timeout"`
+	ReconnectInterval        time.Duration       `yaml:"reconnect_interval"`
 	Retries                  int                 `yaml:"max_retries"`
 	MaxBackoff               time.Duration       `yaml:"retry_max_backoff"`
 	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
@@ -63,6 +64,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.CustomAuthenticators, "cassandra.custom-authenticator", "If set, when authenticating with cassandra a custom authenticator will be expected during the handshake. This flag can be set multiple times.")
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
 	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 5*time.Second, "Initial connection timeout, used during initial dial to server.")
+	f.DurationVar(&cfg.ReconnectInterval, "cassandra.reconnent-interval", 1*time.Second, "Interval to retry connecting to cassandra nodes marked as DOWN.")
 	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
 	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
@@ -94,6 +96,7 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.QueryObserver = observer{}
 	cluster.Timeout = cfg.Timeout
 	cluster.ConnectTimeout = cfg.ConnectTimeout
+	cluster.ReconnectInterval = cfg.ReconnectInterval
 	cluster.NumConns = cfg.NumConnections
 	if cfg.Retries > 0 {
 		cluster.RetryPolicy = &gocql.ExponentialBackoffRetryPolicy{

From 27f38a7913ac5c10b9a995f0ab7675a9518bdfd9 Mon Sep 17 00:00:00 2001
From: storyicon <storyicon@outlook.com>
Date: Thu, 4 Jun 2020 20:19:21 +0800
Subject: [PATCH 532/660] improve query speed when does not enable caching
 (#2645)

* improve query speed when does not enable caching

Signed-off-by: storyicon <storyicon@foxmail.com>

* Update pkg/chunk/cache/tiered.go

Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
Signed-off-by: storyicon <storyicon@foxmail.com>

* simplify optimization

Signed-off-by: storyicon <storyicon@foxmail.com>

Co-authored-by: Ganesh Vernekar <15064823+codesome@users.noreply.github.com>
---
 cache/tiered.go                 | 6 ++++++
 storage/caching_index_client.go | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/cache/tiered.go b/cache/tiered.go
index 27e2310cadead..bb2012ac3181b 100644
--- a/cache/tiered.go
+++ b/cache/tiered.go
@@ -13,6 +13,12 @@ func NewTiered(caches []Cache) Cache {
 	return tiered(caches)
 }
 
+// IsEmptyTieredCache is used to determine whether the current Cache is implemented by an empty tiered.
+func IsEmptyTieredCache(cache Cache) bool {
+	c, ok := cache.(tiered)
+	return ok && len(c) == 0
+}
+
 func (t tiered) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	for _, c := range []Cache(t) {
 		c.Store(ctx, keys, bufs)
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index a536f5ab243c3..cf9e37f1ca8d0 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -49,7 +49,7 @@ type cachingIndexClient struct {
 }
 
 func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits StoreLimits) chunk.IndexClient {
-	if c == nil {
+	if c == nil || cache.IsEmptyTieredCache(c) {
 		return client
 	}
 

From bd83c858b15c2df7024ff70ac5333195067aad99 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 4 Jun 2020 18:19:45 +0530
Subject: [PATCH 533/660] do not invalidate chunks cache when series are
 deleted (#2668)

* do not invalidate chunks cache when series are deleted

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add comment regarding exclusion of chunksCache from invalidating cache for series deletion

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 storage/factory.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index a158be82fc7e3..ed43d976b9d26 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -140,9 +140,10 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	chunksCache = cache.StopOnce(chunksCache)
 	writeDedupeCache = cache.StopOnce(writeDedupeCache)
 
-	// lets wrap all caches with CacheGenMiddleware to facilitate cache invalidation using cache generation numbers
+	// Lets wrap all caches except chunksCache with CacheGenMiddleware to facilitate cache invalidation using cache generation numbers.
+	// chunksCache is not wrapped because chunks content can't be anyways modified without changing its ID so there is no use of
+	// invalidating chunks cache. Also chunks can be fetched only by their ID found in index and we are anyways removing the index and invalidating index cache here.
 	indexReadCache = cache.NewCacheGenNumMiddleware(indexReadCache)
-	chunksCache = cache.NewCacheGenNumMiddleware(chunksCache)
 	writeDedupeCache = cache.NewCacheGenNumMiddleware(writeDedupeCache)
 
 	err = schemaCfg.Load()

From 8e53379ebbcae674923ef4129d3354cad0f97052 Mon Sep 17 00:00:00 2001
From: Tom Wilkie <tomwilkie@users.noreply.github.com>
Date: Fri, 5 Jun 2020 16:43:03 +0100
Subject: [PATCH 534/660] Various Cassandra client tweaks (#2684)

* Add a noop convict policy for Cassandra that stops hosts being marked as down when requests fail.
* Use fork of github.com/gocql/gocql with metrics and logs. (This in turn updated client_golang and protobuf.)
* Split Cassandra ObjectClient from IndexClient, so we can name it separately in the metrics.
* Refactor how fixtures are cleaned up to ensure they get torn down correctly.

Signed-off-by: Tom Wilkie <tom.wilkie@gmail.com>
---
 aws/dynamodb_storage_client_test.go |  5 +-
 aws/dynamodb_table_client.go        |  2 +-
 aws/fixtures.go                     | 26 +++++----
 cassandra/fixtures.go               | 67 ++++++++++++-----------
 cassandra/storage_client.go         | 82 ++++++++++++++++++++++++++---
 cassandra/table_client.go           |  6 ++-
 gcp/fixtures.go                     | 14 ++---
 gcp/table_client.go                 |  4 ++
 grpc/table_client.go                |  9 +++-
 local/boltdb_table_client.go        |  2 +
 local/fixtures.go                   | 11 ++--
 storage/caching_fixtures.go         | 10 ++--
 storage/factory.go                  |  2 +-
 storage/utils_test.go               | 10 ++--
 table_client.go                     |  1 +
 table_manager.go                    |  1 +
 table_manager_test.go               |  2 +
 testutils/testutils.go              | 31 +++++------
 18 files changed, 190 insertions(+), 95 deletions(-)

diff --git a/aws/dynamodb_storage_client_test.go b/aws/dynamodb_storage_client_test.go
index a68c250bb6efd..0bf22229beede 100644
--- a/aws/dynamodb_storage_client_test.go
+++ b/aws/dynamodb_storage_client_test.go
@@ -18,10 +18,9 @@ const (
 
 func TestChunksPartialError(t *testing.T) {
 	fixture := dynamoDBFixture(0, 10, 20)
-	defer testutils.TeardownFixture(t, fixture)
-
-	_, client, err := testutils.Setup(fixture, tableName)
+	_, client, closer, err := testutils.Setup(fixture, tableName)
 	require.NoError(t, err)
+	defer closer.Close()
 
 	sc, ok := client.(*dynamoDBStorageClient)
 	if !ok {
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 1e3ff1102fa47..8d8571f7adf6d 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -64,7 +64,7 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 	}, nil
 }
 
-func (d *dynamoTableClient) Stop() {
+func (d dynamoTableClient) Stop() {
 }
 
 func (d dynamoTableClient) backoffAndRetry(ctx context.Context, fn func(context.Context) error) error {
diff --git a/aws/fixtures.go b/aws/fixtures.go
index c4c0800ba970c..0d23d9ee31eb7 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -2,6 +2,7 @@ package aws
 
 import (
 	"fmt"
+	"io"
 	"time"
 
 	"golang.org/x/time/rate"
@@ -14,26 +15,22 @@ import (
 
 type fixture struct {
 	name    string
-	clients func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error)
+	clients func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error)
 }
 
 func (f fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error) {
 	return f.clients()
 }
 
-func (f fixture) Teardown() error {
-	return nil
-}
-
 // Fixtures for testing the various configuration of AWS storage.
 var Fixtures = []testutils.Fixture{
 	fixture{
 		name: "S3 chunks",
-		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error) {
 			schemaConfig := testutils.DefaultSchemaConfig("s3")
 			dynamoDB := newMockDynamoDB(0, 0)
 			table := &dynamoTableClient{
@@ -49,7 +46,12 @@ var Fixtures = []testutils.Fixture{
 				S3:        newMockS3(),
 				delimiter: chunk.DirDelim,
 			}, nil)
-			return index, object, table, schemaConfig, nil
+			return index, object, table, schemaConfig, testutils.CloserFunc(func() error {
+				table.Stop()
+				index.Stop()
+				object.Stop()
+				return nil
+			}), nil
 		},
 	},
 	dynamoDBFixture(0, 10, 20),
@@ -61,7 +63,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 	return fixture{
 		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
 			provisionedErr, gangsize, maxParallelism),
-		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
+		clients: func() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error) {
 			dynamoDB := newMockDynamoDB(0, provisionedErr)
 			schemaCfg := testutils.DefaultSchemaConfig("aws")
 			table := &dynamoTableClient{
@@ -83,7 +85,11 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaCfg,
 			}
-			return storage, storage, table, schemaCfg, nil
+			return storage, storage, table, schemaCfg, testutils.CloserFunc(func() error {
+				table.Stop()
+				storage.Stop()
+				return nil
+			}), nil
 		},
 	}
 }
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index 1613e6778a599..feb2df252195a 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -2,6 +2,7 @@ package cassandra
 
 import (
 	"context"
+	"io"
 	"os"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -11,39 +12,22 @@ import (
 
 // GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
 // To enable these tests:
-// $ docker run --name cassandra --rm -p 9042:9042 cassandra:3.11
+// $ docker run -d --name cassandra --rm -p 9042:9042 cassandra:3.11
 // $ CASSANDRA_TEST_ADDRESSES=localhost:9042 go test ./pkg/chunk/storage
 
 type fixture struct {
-	name         string
-	indexClient  chunk.IndexClient
-	objectClient chunk.Client
-	tableClient  chunk.TableClient
-	schemaConfig chunk.SchemaConfig
+	name      string
+	addresses string
 }
 
-func (f fixture) Name() string {
+func (f *fixture) Name() string {
 	return f.name
 }
 
-func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
-	return f.indexClient, f.objectClient, f.tableClient, f.schemaConfig, nil
-}
-
-func (f fixture) Teardown() error {
-	return nil
-}
-
-// Fixtures for unit testing Cassandra integration.
-func Fixtures() ([]testutils.Fixture, error) {
-	addresses := os.Getenv("CASSANDRA_TEST_ADDRESSES")
-	if addresses == "" {
-		return nil, nil
-	}
-
+func (f *fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error) {
 	var cfg Config
 	flagext.DefaultValues(&cfg)
-	cfg.Addresses = addresses
+	cfg.Addresses = f.addresses
 	cfg.Keyspace = "test"
 	cfg.Consistency = "QUORUM"
 	cfg.ReplicationFactor = 1
@@ -53,21 +37,40 @@ func Fixtures() ([]testutils.Fixture, error) {
 
 	storageClient, err := NewStorageClient(cfg, schemaConfig)
 	if err != nil {
-		return nil, err
+		return nil, nil, nil, schemaConfig, nil, err
+	}
+
+	objectClient, err := NewObjectClient(cfg, schemaConfig)
+	if err != nil {
+		return nil, nil, nil, schemaConfig, nil, err
 	}
 
 	tableClient, err := NewTableClient(context.Background(), cfg)
 	if err != nil {
-		return nil, err
+		return nil, nil, nil, schemaConfig, nil, err
+	}
+
+	closer := testutils.CloserFunc(func() error {
+		storageClient.Stop()
+		objectClient.Stop()
+		tableClient.Stop()
+		return nil
+	})
+
+	return storageClient, objectClient, tableClient, schemaConfig, closer, nil
+}
+
+// Fixtures for unit testing Cassandra integration.
+func Fixtures() []testutils.Fixture {
+	addresses := os.Getenv("CASSANDRA_TEST_ADDRESSES")
+	if addresses == "" {
+		return nil
 	}
 
 	return []testutils.Fixture{
-		fixture{
-			name:         "Cassandra",
-			indexClient:  storageClient,
-			objectClient: storageClient,
-			tableClient:  tableClient,
-			schemaConfig: schemaConfig,
+		&fixture{
+			name:      "Cassandra",
+			addresses: addresses,
 		},
-	}, nil
+	}
 }
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 470cfbacab5c3..d76410a788b70 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -10,8 +10,11 @@ import (
 	"strings"
 	"time"
 
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/sync/semaphore"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -44,6 +47,7 @@ type Config struct {
 	MinBackoff               time.Duration       `yaml:"retry_min_backoff"`
 	QueryConcurrency         int                 `yaml:"query_concurrency"`
 	NumConnections           int                 `yaml:"num_connections"`
+	ConvictHosts             bool                `yaml:"convict_hosts_on_failure"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -70,6 +74,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
 	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. (Default is 0: no limit)")
 	f.IntVar(&cfg.NumConnections, "cassandra.num-connections", 2, "Number of TCP connections per host.")
+	f.BoolVar(&cfg.ConvictHosts, "cassandra.convict-hosts-on-failure", true, "Convict hosts of being down on failure.")
 }
 
 func (cfg *Config) Validate() error {
@@ -82,7 +87,7 @@ func (cfg *Config) Validate() error {
 	return nil
 }
 
-func (cfg *Config) session() (*gocql.Session, error) {
+func (cfg *Config) session(name string) (*gocql.Session, error) {
 	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
 	if err != nil {
 		return nil, errors.WithStack(err)
@@ -98,6 +103,9 @@ func (cfg *Config) session() (*gocql.Session, error) {
 	cluster.ConnectTimeout = cfg.ConnectTimeout
 	cluster.ReconnectInterval = cfg.ReconnectInterval
 	cluster.NumConns = cfg.NumConnections
+	cluster.Logger = log.With(pkgutil.Logger, "module", "gocql", "client", name)
+	cluster.Registerer = prometheus.WrapRegistererWith(
+		prometheus.Labels{"client": name}, prometheus.DefaultRegisterer)
 	if cfg.Retries > 0 {
 		cluster.RetryPolicy = &gocql.ExponentialBackoffRetryPolicy{
 			NumRetries: cfg.Retries,
@@ -105,6 +113,9 @@ func (cfg *Config) session() (*gocql.Session, error) {
 			Max:        cfg.MaxBackoff,
 		}
 	}
+	if !cfg.ConvictHosts {
+		cluster.ConvictionPolicy = noopConvictionPolicy{}
+	}
 	if err = cfg.setClusterConfig(cluster); err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -212,12 +223,12 @@ type StorageClient struct {
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
 	pkgutil.WarnExperimentalUse("Cassandra Backend")
 
-	readSession, err := cfg.session()
+	readSession, err := cfg.session("index-read")
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
 
-	writeSession, err := cfg.session()
+	writeSession, err := cfg.session("index-write")
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -384,8 +395,46 @@ func (b *readBatchIter) Value() []byte {
 	return b.value
 }
 
+// ObjectClient implements chunk.ObjectClient for Cassandra.
+type ObjectClient struct {
+	cfg            Config
+	schemaCfg      chunk.SchemaConfig
+	readSession    *gocql.Session
+	writeSession   *gocql.Session
+	querySemaphore *semaphore.Weighted
+}
+
+// NewObjectClient returns a new ObjectClient.
+func NewObjectClient(cfg Config, schemaCfg chunk.SchemaConfig) (*ObjectClient, error) {
+	pkgutil.WarnExperimentalUse("Cassandra Backend")
+
+	readSession, err := cfg.session("chunks-read")
+	if err != nil {
+		return nil, errors.WithStack(err)
+	}
+
+	writeSession, err := cfg.session("chunks-write")
+	if err != nil {
+		return nil, errors.WithStack(err)
+	}
+
+	var querySemaphore *semaphore.Weighted
+	if cfg.QueryConcurrency > 0 {
+		querySemaphore = semaphore.NewWeighted(int64(cfg.QueryConcurrency))
+	}
+
+	client := &ObjectClient{
+		cfg:            cfg,
+		schemaCfg:      schemaCfg,
+		readSession:    readSession,
+		writeSession:   writeSession,
+		querySemaphore: querySemaphore,
+	}
+	return client, nil
+}
+
 // PutChunks implements chunk.ObjectClient.
-func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
+func (s *ObjectClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) error {
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
 		if err != nil {
@@ -409,11 +458,11 @@ func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 }
 
 // GetChunks implements chunk.ObjectClient.
-func (s *StorageClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
+func (s *ObjectClient) GetChunks(ctx context.Context, input []chunk.Chunk) ([]chunk.Chunk, error) {
 	return util.GetParallelChunks(ctx, input, s.getChunk)
 }
 
-func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
+func (s *ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.DecodeContext, input chunk.Chunk) (chunk.Chunk, error) {
 	if s.querySemaphore != nil {
 		if err := s.querySemaphore.Acquire(ctx, 1); err != nil {
 			return input, err
@@ -435,7 +484,26 @@ func (s *StorageClient) getChunk(ctx context.Context, decodeContext *chunk.Decod
 	return input, err
 }
 
-func (s *StorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
+func (s *ObjectClient) DeleteChunk(ctx context.Context, chunkID string) error {
 	// ToDo: implement this to support deleting chunks from Cassandra
 	return chunk.ErrMethodNotImplemented
 }
+
+// Stop implement chunk.ObjectClient.
+func (s *ObjectClient) Stop() {
+	s.readSession.Close()
+	s.writeSession.Close()
+}
+
+type noopConvictionPolicy struct{}
+
+// AddFailure should return `true` if the host should be convicted, `false` otherwise.
+// Convicted means connections are removed - we don't want that.
+// Implementats gocql.ConvictionPolicy.
+func (noopConvictionPolicy) AddFailure(err error, host *gocql.HostInfo) bool {
+	level.Error(pkgutil.Logger).Log("msg", "Cassandra host failure", "err", err, "host", host.String())
+	return false
+}
+
+// Implementats gocql.ConvictionPolicy.
+func (noopConvictionPolicy) Reset(host *gocql.HostInfo) {}
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 48df881d557fb..0207a0964b0c1 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -17,7 +17,7 @@ type tableClient struct {
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	session, err := cfg.session()
+	session, err := cfg.session("table-manager")
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -65,3 +65,7 @@ func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chun
 func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	return nil
 }
+
+func (c *tableClient) Stop() {
+	c.session.Close()
+}
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index 123d279ab47ac..f87ca59b0ca3b 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -3,6 +3,7 @@ package gcp
 import (
 	"context"
 	"fmt"
+	"io"
 
 	"cloud.google.com/go/bigtable"
 	"cloud.google.com/go/bigtable/bttest"
@@ -36,7 +37,7 @@ func (f *fixture) Name() string {
 
 func (f *fixture) Clients() (
 	iClient chunk.IndexClient, cClient chunk.Client, tClient chunk.TableClient,
-	schemaConfig chunk.SchemaConfig, err error,
+	schemaConfig chunk.SchemaConfig, closer io.Closer, err error,
 ) {
 	f.btsrv, err = bttest.NewServer("localhost:0")
 	if err != nil {
@@ -84,13 +85,12 @@ func (f *fixture) Clients() (
 		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
 
-	return
-}
+	closer = testutils.CloserFunc(func() error {
+		conn.Close()
+		return nil
+	})
 
-func (f *fixture) Teardown() error {
-	f.btsrv.Close()
-	f.gcssrv.Stop()
-	return nil
+	return
 }
 
 // Fixtures for unit testing GCP storage.
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 725021f5548c7..268e087c579ab 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -117,3 +117,7 @@ func (c *tableClient) DescribeTable(ctx context.Context, name string) (desc chun
 func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	return nil
 }
+
+func (c *tableClient) Stop() {
+	c.client.Close()
+}
diff --git a/grpc/table_client.go b/grpc/table_client.go
index fe7eec7c4e265..9e7d201f54503 100644
--- a/grpc/table_client.go
+++ b/grpc/table_client.go
@@ -5,22 +5,25 @@ import (
 
 	"github.com/golang/protobuf/ptypes/empty"
 	"github.com/pkg/errors"
+	"google.golang.org/grpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 )
 
 type TableClient struct {
 	client GrpcStoreClient
+	conn   *grpc.ClientConn
 }
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(cfg Config) (*TableClient, error) {
-	grpcClient, _, err := connectToGrpcServer(cfg.Address)
+	grpcClient, conn, err := connectToGrpcServer(cfg.Address)
 	if err != nil {
 		return nil, err
 	}
 	client := &TableClient{
 		client: grpcClient,
+		conn:   conn,
 	}
 	return client, nil
 }
@@ -98,3 +101,7 @@ func (c *TableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) err
 	}
 	return nil
 }
+
+func (c *TableClient) Stop() {
+	c.conn.Close()
+}
diff --git a/local/boltdb_table_client.go b/local/boltdb_table_client.go
index e60ca9e8dbd9a..bb3d6f57d0ea8 100644
--- a/local/boltdb_table_client.go
+++ b/local/boltdb_table_client.go
@@ -57,3 +57,5 @@ func (c *TableClient) DescribeTable(ctx context.Context, name string) (desc chun
 func (c *TableClient) UpdateTable(ctx context.Context, current, expected chunk.TableDesc) error {
 	return nil
 }
+
+func (*TableClient) Stop() {}
diff --git a/local/fixtures.go b/local/fixtures.go
index 60a0b426a9d57..1e101a281a042 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -1,6 +1,7 @@
 package local
 
 import (
+	"io"
 	"io/ioutil"
 	"os"
 	"time"
@@ -23,7 +24,7 @@ func (f *fixture) Name() string {
 
 func (f *fixture) Clients() (
 	indexClient chunk.IndexClient, chunkClient chunk.Client, tableClient chunk.TableClient,
-	schemaConfig chunk.SchemaConfig, err error,
+	schemaConfig chunk.SchemaConfig, closer io.Closer, err error,
 ) {
 	f.dirname, err = ioutil.TempDir(os.TempDir(), "boltdb")
 	if err != nil {
@@ -66,11 +67,11 @@ func (f *fixture) Clients() (
 		}},
 	}
 
-	return
-}
+	closer = testutils.CloserFunc(func() error {
+		return os.RemoveAll(f.dirname)
+	})
 
-func (f *fixture) Teardown() error {
-	return os.RemoveAll(f.dirname)
+	return
 }
 
 // Fixtures for unit testing GCP storage.
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index fc94907a14475..ee032300b8089 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -1,6 +1,7 @@
 package storage
 
 import (
+	io "io"
 	"time"
 
 	"github.com/cortexproject/cortex/pkg/util/flagext"
@@ -18,19 +19,18 @@ type fixture struct {
 }
 
 func (f fixture) Name() string { return "caching-store" }
-func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error) {
+func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error) {
 	limits, err := defaultLimits()
 	if err != nil {
-		return nil, nil, nil, chunk.SchemaConfig{}, err
+		return nil, nil, nil, chunk.SchemaConfig{}, nil, err
 	}
-	indexClient, chunkClient, tableClient, schemaConfig, err := f.fixture.Clients()
+	indexClient, chunkClient, tableClient, schemaConfig, closer, err := f.fixture.Clients()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		MaxSizeItems: 500,
 		Validity:     5 * time.Minute,
 	}), 5*time.Minute, limits)
-	return indexClient, chunkClient, tableClient, schemaConfig, err
+	return indexClient, chunkClient, tableClient, schemaConfig, closer, err
 }
-func (f fixture) Teardown() error { return f.fixture.Teardown() }
 
 // Fixtures for unit testing the caching storage.
 var Fixtures = []testutils.Fixture{
diff --git a/storage/factory.go b/storage/factory.go
index ed43d976b9d26..8d87a7d34f39f 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -245,7 +245,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	case "swift":
 		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim))
 	case "cassandra":
-		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+		return cassandra.NewObjectClient(cfg.CassandraStorageConfig, schemaCfg)
 	case "filesystem":
 		store, err := local.NewFSObjectClient(cfg.FSConfig)
 		if err != nil {
diff --git a/storage/utils_test.go b/storage/utils_test.go
index 6f7b263dbe2ee..a22f915ca08d0 100644
--- a/storage/utils_test.go
+++ b/storage/utils_test.go
@@ -25,18 +25,14 @@ func forAllFixtures(t *testing.T, storageClientTest storageClientTest) {
 	fixtures = append(fixtures, aws.Fixtures...)
 	fixtures = append(fixtures, gcp.Fixtures...)
 	fixtures = append(fixtures, local.Fixtures...)
+	fixtures = append(fixtures, cassandra.Fixtures()...)
 	fixtures = append(fixtures, Fixtures...)
 
-	cassandraFixtures, err := cassandra.Fixtures()
-	require.NoError(t, err)
-	fixtures = append(fixtures, cassandraFixtures...)
-
 	for _, fixture := range fixtures {
 		t.Run(fixture.Name(), func(t *testing.T) {
-			indexClient, objectClient, err := testutils.Setup(fixture, tableName)
+			indexClient, objectClient, closer, err := testutils.Setup(fixture, tableName)
 			require.NoError(t, err)
-			defer testutils.TeardownFixture(t, fixture)
-
+			defer closer.Close()
 			storageClientTest(t, indexClient, objectClient)
 		})
 	}
diff --git a/table_client.go b/table_client.go
index 43fcc0d1cf820..b2cf5eb2dc02f 100644
--- a/table_client.go
+++ b/table_client.go
@@ -9,6 +9,7 @@ type TableClient interface {
 	DeleteTable(ctx context.Context, name string) error
 	DescribeTable(ctx context.Context, name string) (desc TableDesc, isActive bool, err error)
 	UpdateTable(ctx context.Context, current, expected TableDesc) error
+	Stop()
 }
 
 // TableDesc describes a table.
diff --git a/table_manager.go b/table_manager.go
index b333f8a4833dc..383a4cf5da877 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -213,6 +213,7 @@ func (m *TableManager) stopping(_ error) error {
 	if m.bucketRetentionLoop != nil {
 		return services.StopAndAwaitTerminated(context.Background(), m.bucketRetentionLoop)
 	}
+	m.client.Stop()
 	return nil
 }
 
diff --git a/table_manager_test.go b/table_manager_test.go
index 43d5accff7ed5..3b4477e389ebc 100644
--- a/table_manager_test.go
+++ b/table_manager_test.go
@@ -92,6 +92,8 @@ func (m *mockTableClient) UpdateTable(_ context.Context, current, expected Table
 	return nil
 }
 
+func (*mockTableClient) Stop() {}
+
 func tmTest(t *testing.T, client *mockTableClient, tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
 	t.Run(name, func(t *testing.T) {
 		ctx := context.Background()
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 58898da1301b2..02f3db1e5c74f 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -2,12 +2,10 @@ package testutils
 
 import (
 	"context"
+	"io"
 	"strconv"
-	"testing"
 	"time"
 
-	"github.com/stretchr/testify/require"
-
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
@@ -26,8 +24,15 @@ const (
 // Fixture type for per-backend testing.
 type Fixture interface {
 	Name() string
-	Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, error)
-	Teardown() error
+	Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient, chunk.SchemaConfig, io.Closer, error)
+}
+
+// CloserFunc is to io.Closer as http.HandlerFunc is to http.Handler.
+type CloserFunc func() error
+
+// Close implements io.Closer.
+func (f CloserFunc) Close() error {
+	return f()
 }
 
 // DefaultSchemaConfig returns default schema for use in test fixtures
@@ -37,29 +42,29 @@ func DefaultSchemaConfig(kind string) chunk.SchemaConfig {
 }
 
 // Setup a fixture with initial tables
-func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client, error) {
+func Setup(fixture Fixture, tableName string) (chunk.IndexClient, chunk.Client, io.Closer, error) {
 	var tbmConfig chunk.TableManagerConfig
 	flagext.DefaultValues(&tbmConfig)
-	indexClient, chunkClient, tableClient, schemaConfig, err := fixture.Clients()
+	indexClient, chunkClient, tableClient, schemaConfig, closer, err := fixture.Clients()
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 
 	tableManager, err := chunk.NewTableManager(tbmConfig, schemaConfig, 12*time.Hour, tableClient, nil, nil, nil)
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 
 	err = tableManager.SyncTables(context.Background())
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 
 	err = tableClient.CreateTable(context.Background(), chunk.TableDesc{
 		Name: tableName,
 	})
 
-	return indexClient, chunkClient, err
+	return indexClient, chunkClient, closer, err
 }
 
 // CreateChunks creates some chunks for testing
@@ -103,10 +108,6 @@ func dummyChunkFor(from, through model.Time, metric labels.Labels) chunk.Chunk {
 	return chunk
 }
 
-func TeardownFixture(t *testing.T, fixture Fixture) {
-	require.NoError(t, fixture.Teardown())
-}
-
 func SetupTestChunkStore() (chunk.Store, error) {
 	var (
 		tbmConfig chunk.TableManagerConfig

From a4aba358271be2e6aa933e494796af513788f25f Mon Sep 17 00:00:00 2001
From: KyeongWon Seo <ruddnjs1230@gmail.com>
Date: Tue, 9 Jun 2020 01:42:48 +0900
Subject: [PATCH 535/660] Add Cassandra table options flag (#2575)

* add the cassandra.table-with flag

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* update CHANGELOG and docs

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* add PR number to CHANGELOG

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* remove blacklisted package import

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* fix import with goimports

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* rename table_with to table_options and improve the flag usage

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* add a description about configuring table options to documentation: Running Cortex with Cassandra

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>

* clean white noise

Signed-off-by: Kyeongwon Seo <ruddnjs1230@gmail.com>
---
 cassandra/storage_client.go    |  2 ++
 cassandra/table_client.go      | 23 +++++++++++-----
 cassandra/table_client_test.go | 48 ++++++++++++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 7 deletions(-)
 create mode 100644 cassandra/table_client_test.go

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index d76410a788b70..327938399abf9 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -48,6 +48,7 @@ type Config struct {
 	QueryConcurrency         int                 `yaml:"query_concurrency"`
 	NumConnections           int                 `yaml:"num_connections"`
 	ConvictHosts             bool                `yaml:"convict_hosts_on_failure"`
+	TableOptions             string              `yaml:"table_options"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -75,6 +76,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. (Default is 0: no limit)")
 	f.IntVar(&cfg.NumConnections, "cassandra.num-connections", 2, "Number of TCP connections per host.")
 	f.BoolVar(&cfg.ConvictHosts, "cassandra.convict-hosts-on-failure", true, "Convict hosts of being down on failure.")
+	f.StringVar(&cfg.TableOptions, "cassandra.table-options", "", "Table options used to create index or chunk tables. This value is used as plain text in the table `WITH` like this, \"CREATE TABLE <generated_by_cortex> (...) WITH <cassandra.table-options>\". For details, see https://cortexmetrics.io/docs/production/cassandra. (Default = \"\": use default table options of your Cassandra)")
 }
 
 func (cfg *Config) Validate() error {
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index 0207a0964b0c1..ee242e354c760 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -40,13 +40,8 @@ func (c *tableClient) ListTables(ctx context.Context) ([]string, error) {
 }
 
 func (c *tableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
-	err := c.session.Query(fmt.Sprintf(`
-		CREATE TABLE IF NOT EXISTS %s (
-			hash text,
-			range blob,
-			value blob,
-			PRIMARY KEY (hash, range)
-		)`, desc.Name)).WithContext(ctx).Exec()
+	query := c.getCreateTableQuery(&desc)
+	err := c.session.Query(query).WithContext(ctx).Exec()
 	return errors.WithStack(err)
 }
 
@@ -69,3 +64,17 @@ func (c *tableClient) UpdateTable(ctx context.Context, current, expected chunk.T
 func (c *tableClient) Stop() {
 	c.session.Close()
 }
+
+func (c *tableClient) getCreateTableQuery(desc *chunk.TableDesc) (query string) {
+	query = fmt.Sprintf(`
+		CREATE TABLE IF NOT EXISTS %s (
+			hash text,
+			range blob,
+			value blob,
+			PRIMARY KEY (hash, range)
+		)`, desc.Name)
+	if c.cfg.TableOptions != "" {
+		query = fmt.Sprintf("%s WITH %s", query, c.cfg.TableOptions)
+	}
+	return
+}
diff --git a/cassandra/table_client_test.go b/cassandra/table_client_test.go
new file mode 100644
index 0000000000000..ad885a51c0d2f
--- /dev/null
+++ b/cassandra/table_client_test.go
@@ -0,0 +1,48 @@
+package cassandra
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestTableClient_getCreateTableQuery_default(t *testing.T) {
+	client := &tableClient{
+		cfg: Config{},
+	}
+	desc, _, _ := client.DescribeTable(context.Background(), "test_table")
+	query := client.getCreateTableQuery(&desc)
+	assert.Equal(
+		t,
+		`
+		CREATE TABLE IF NOT EXISTS test_table (
+			hash text,
+			range blob,
+			value blob,
+			PRIMARY KEY (hash, range)
+		)`,
+		query,
+	)
+}
+
+func TestTableClient_getCreateTableQuery_withOptions(t *testing.T) {
+	client := &tableClient{
+		cfg: Config{
+			TableOptions: "CLUSTERING ORDER BY (range DESC) AND compaction = { 'class' : 'LeveledCompactionStrategy' }",
+		},
+	}
+	desc, _, _ := client.DescribeTable(context.Background(), "test_table")
+	query := client.getCreateTableQuery(&desc)
+	assert.Equal(
+		t,
+		`
+		CREATE TABLE IF NOT EXISTS test_table (
+			hash text,
+			range blob,
+			value blob,
+			PRIMARY KEY (hash, range)
+		) WITH CLUSTERING ORDER BY (range DESC) AND compaction = { 'class' : 'LeveledCompactionStrategy' }`,
+		query,
+	)
+}

From 89de65cdeb297c010b341f13f286cd45491750ea Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 15 Jun 2020 11:40:27 +0530
Subject: [PATCH 536/660] load delete requests frequently if there are more
 pending requests for a user (#2711)

* load delete requests frequently if there are more pending requests for a user

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix an issue with a minor nit

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/purger.go | 87 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 64 insertions(+), 23 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index a5e62ced6716c..8b463ef89ad2e 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -102,6 +102,13 @@ type DataPurger struct {
 	inProcessRequestIDs    map[string]string
 	inProcessRequestIDsMtx sync.RWMutex
 
+	// We do not want to limit pulling new delete requests to a fixed interval which otherwise would limit number of delete requests we process per user.
+	// While loading delete requests if we find more requests from user pending to be processed, we just set their id in usersWithPendingRequests and
+	// when a user's delete request gets processed we just check this map to see whether we want to load more requests without waiting for next ticker to load new batch.
+	usersWithPendingRequests    map[string]struct{}
+	usersWithPendingRequestsMtx sync.Mutex
+	pullNewRequestsChan         chan struct{}
+
 	pendingPlansCount    map[string]int // per request pending plan count
 	pendingPlansCountMtx sync.Mutex
 
@@ -113,31 +120,23 @@ func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store,
 	util.WarnExperimentalUse("Delete series API")
 
 	dataPurger := DataPurger{
-		cfg:                 cfg,
-		deleteStore:         deleteStore,
-		chunkStore:          chunkStore,
-		objectClient:        storageClient,
-		metrics:             newPurgerMetrics(registerer),
-		executePlansChan:    make(chan deleteRequestWithLogger, 50),
-		workerJobChan:       make(chan workerJob, 50),
-		inProcessRequestIDs: map[string]string{},
-		pendingPlansCount:   map[string]int{},
+		cfg:                      cfg,
+		deleteStore:              deleteStore,
+		chunkStore:               chunkStore,
+		objectClient:             storageClient,
+		metrics:                  newPurgerMetrics(registerer),
+		pullNewRequestsChan:      make(chan struct{}, 1),
+		executePlansChan:         make(chan deleteRequestWithLogger, 50),
+		workerJobChan:            make(chan workerJob, 50),
+		inProcessRequestIDs:      map[string]string{},
+		usersWithPendingRequests: map[string]struct{}{},
+		pendingPlansCount:        map[string]int{},
 	}
 
-	dataPurger.Service = services.NewTimerService(time.Hour, dataPurger.init, dataPurger.runOneIteration, dataPurger.stop)
+	dataPurger.Service = services.NewBasicService(dataPurger.init, dataPurger.loop, dataPurger.stop)
 	return &dataPurger, nil
 }
 
-// Run keeps pulling delete requests for planning after initializing necessary things
-func (dp *DataPurger) runOneIteration(ctx context.Context) error {
-	err := dp.pullDeleteRequestsToPlanDeletes()
-	if err != nil {
-		level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
-	}
-	// Don't return error here, or Timer service will stop.
-	return nil
-}
-
 // init starts workers, scheduler and then loads in process delete requests
 func (dp *DataPurger) init(ctx context.Context) error {
 	for i := 0; i < dp.cfg.NumWorkers; i++ {
@@ -151,6 +150,29 @@ func (dp *DataPurger) init(ctx context.Context) error {
 	return dp.loadInprocessDeleteRequests()
 }
 
+func (dp *DataPurger) loop(ctx context.Context) error {
+	loadRequestsTicker := time.NewTicker(time.Hour)
+	defer loadRequestsTicker.Stop()
+
+	loadRequests := func() {
+		err := dp.pullDeleteRequestsToPlanDeletes()
+		if err != nil {
+			level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
+		}
+	}
+
+	for {
+		select {
+		case <-loadRequestsTicker.C:
+			loadRequests()
+		case <-dp.pullNewRequestsChan:
+			loadRequests()
+		case <-ctx.Done():
+			return nil
+		}
+	}
+}
+
 // Stop waits until all background tasks stop.
 func (dp *DataPurger) stop(_ error) error {
 	dp.wg.Wait()
@@ -183,6 +205,21 @@ func (dp *DataPurger) workerJobCleanup(job workerJob) {
 		dp.inProcessRequestIDsMtx.Lock()
 		delete(dp.inProcessRequestIDs, job.userID)
 		dp.inProcessRequestIDsMtx.Unlock()
+
+		// request loading of more delete request if
+		// - user has more pending requests and
+		// - we do not have a pending request to load more requests
+		dp.usersWithPendingRequestsMtx.Lock()
+		defer dp.usersWithPendingRequestsMtx.Unlock()
+		if _, ok := dp.usersWithPendingRequests[job.userID]; ok {
+			delete(dp.usersWithPendingRequests, job.userID)
+			select {
+			case dp.pullNewRequestsChan <- struct{}{}:
+				// sent
+			default:
+				// already sent
+			}
+		}
 	} else {
 		dp.pendingPlansCountMtx.Unlock()
 	}
@@ -345,12 +382,16 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		}
 
 		dp.inProcessRequestIDsMtx.RLock()
-		inprocessDeleteRequstID := dp.inProcessRequestIDs[deleteRequest.UserID]
+		inprocessDeleteRequestID := dp.inProcessRequestIDs[deleteRequest.UserID]
 		dp.inProcessRequestIDsMtx.RUnlock()
 
-		if inprocessDeleteRequstID != "" {
+		if inprocessDeleteRequestID != "" {
+			dp.usersWithPendingRequestsMtx.Lock()
+			dp.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
+			dp.usersWithPendingRequestsMtx.Unlock()
+
 			level.Debug(util.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
-				"inprocess_request_id", inprocessDeleteRequstID,
+				"inprocess_request_id", inprocessDeleteRequestID,
 				"skipped_request_id", deleteRequest.RequestID, "user_id", deleteRequest.UserID)
 			continue
 		}

From 86d93ef5d9e0c73d0c9a2b0e569a1cdda5160d43 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 15 Jun 2020 09:16:32 +0200
Subject: [PATCH 537/660] Fix Redis cache error when a query has no chunks to
 lookup (#2700)

* Fix Redis cache error when a query has no chunks to lookup

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed another case leading to 'wrong number of arguments for 'mget' command'

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 chunk_store.go     | 12 +++++++++++-
 composite_store.go |  5 +++++
 series_store.go    |  5 +++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/chunk_store.go b/chunk_store.go
index aa556b5d07710..bd69eedeea384 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -505,6 +505,11 @@ func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQ
 	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
 	defer log.Span.Finish()
 
+	// Nothing to do if there are no queries.
+	if len(queries) == 0 {
+		return nil, nil
+	}
+
 	var lock sync.Mutex
 	var entries []IndexEntry
 	err := c.index.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
@@ -527,7 +532,12 @@ func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQ
 	return entries, err
 }
 
-func (c *baseStore) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+	// Nothing to do if there are no entries.
+	if len(entries) == 0 {
+		return nil, nil
+	}
+
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
 		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
diff --git a/composite_store.go b/composite_store.go
index 659ef65edf6e0..46e055fcb1308 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -162,6 +162,11 @@ func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, t
 			return err
 		}
 
+		// Skip it if there are no chunks.
+		if len(ids) == 0 {
+			return nil
+		}
+
 		chunkIDs = append(chunkIDs, ids...)
 		fetchers = append(fetchers, fetcher...)
 		return nil
diff --git a/series_store.go b/series_store.go
index 74f3f67b57ff6..4928ca9b96455 100644
--- a/series_store.go
+++ b/series_store.go
@@ -182,6 +182,11 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, userID string, from, thr
 	level.Debug(log).Log("chunks-post-filtering", len(chunks))
 	chunksPerQuery.Observe(float64(len(chunks)))
 
+	// We should return an empty chunks slice if there are no chunks.
+	if len(chunks) == 0 {
+		return [][]Chunk{}, []*Fetcher{}, nil
+	}
+
 	return [][]Chunk{chunks}, []*Fetcher{c.baseStore.Fetcher}, nil
 }
 

From 23facdde2966cda7c694ede93786d4a2636b6d83 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 18 Jun 2020 18:31:35 +0530
Subject: [PATCH 538/660] more metrics in purger for monitoring delete requests
 processing progress (#2730)

* more metrics in purger for monitoring delete requests processing progress

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* tests for metrics

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix a variable name

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add changelog entry and update test waiting for a condition to be true

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/delete_requests_store.go |   8 ++-
 purger/purger.go                |  97 +++++++++++++++++++++-----
 purger/purger_test.go           | 118 ++++++++++++++++++++++++++------
 3 files changed, 185 insertions(+), 38 deletions(-)

diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index 1d156caa78c2c..4f8d1025f37d9 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -97,6 +97,12 @@ func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*Dele
 
 // Add creates entries for a new delete request.
 func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error {
+	return ds.addDeleteRequest(ctx, userID, model.Now(), startTime, endTime, selectors)
+
+}
+
+// addDeleteRequest is also used for tests to create delete requests with different createdAt time.
+func (ds *DeleteStore) addDeleteRequest(ctx context.Context, userID string, createdAt, startTime, endTime model.Time, selectors []string) error {
 	requestID := generateUniqueID(userID, selectors)
 
 	for {
@@ -122,7 +128,7 @@ func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, star
 	writeBatch.Add(ds.cfg.RequestsTableName, string(deleteRequestID), []byte(userIDAndRequestID), []byte(StatusReceived))
 
 	// Add another entry with additional details like creation time, time range of delete request and selectors in value
-	rangeValue := fmt.Sprintf("%x:%x:%x", int64(model.Now()), int64(startTime), int64(endTime))
+	rangeValue := fmt.Sprintf("%x:%x:%x", int64(createdAt), int64(startTime), int64(endTime))
 	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s", deleteRequestDetails, userIDAndRequestID),
 		[]byte(rangeValue), []byte(strings.Join(selectors, separator)))
 
diff --git a/purger/purger.go b/purger/purger.go
index 8b463ef89ad2e..b3f282e5c612e 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -28,12 +28,17 @@ import (
 const (
 	millisecondPerDay                 = int64(24 * time.Hour / time.Millisecond)
 	deleteRequestCancellationDeadline = 24 * time.Hour
+	statusSuccess                     = "success"
+	statusFail                        = "fail"
 )
 
 type purgerMetrics struct {
-	deleteRequestsProcessedTotal      *prometheus.CounterVec
-	deleteRequestsChunksSelectedTotal *prometheus.CounterVec
-	deleteRequestsProcessingFailures  *prometheus.CounterVec
+	deleteRequestsProcessedTotal         *prometheus.CounterVec
+	deleteRequestsChunksSelectedTotal    *prometheus.CounterVec
+	deleteRequestsProcessingFailures     *prometheus.CounterVec
+	loadPendingRequestsAttempsTotal      *prometheus.CounterVec
+	oldestPendingDeleteRequestAgeSeconds prometheus.Gauge
+	pendingDeleteRequestsCount           prometheus.Gauge
 }
 
 func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
@@ -54,6 +59,21 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
 		Name:      "purger_delete_requests_processing_failures_total",
 		Help:      "Number of delete requests processing failures per user",
 	}, []string{"user"})
+	m.loadPendingRequestsAttempsTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "purger_load_pending_requests_attempts_total",
+		Help:      "Number of attempts that were made to load pending requests with status",
+	}, []string{"status"})
+	m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "purger_oldest_pending_delete_request_age_seconds",
+		Help:      "Age of oldest pending delete request in seconds",
+	})
+	m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{
+		Namespace: "cortex",
+		Name:      "purger_pending_delete_requests_count",
+		Help:      "Count of requests which are in process or are ready to be processed",
+	})
 
 	return &m
 }
@@ -99,7 +119,7 @@ type DataPurger struct {
 
 	// we would only allow processing of singe delete request at a time since delete requests touching same chunks could change the chunk IDs of partially deleted chunks
 	// and break the purge plan for other requests
-	inProcessRequestIDs    map[string]string
+	inProcessRequests      map[string]DeleteRequest
 	inProcessRequestIDsMtx sync.RWMutex
 
 	// We do not want to limit pulling new delete requests to a fixed interval which otherwise would limit number of delete requests we process per user.
@@ -128,7 +148,7 @@ func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store,
 		pullNewRequestsChan:      make(chan struct{}, 1),
 		executePlansChan:         make(chan deleteRequestWithLogger, 50),
 		workerJobChan:            make(chan workerJob, 50),
-		inProcessRequestIDs:      map[string]string{},
+		inProcessRequests:        map[string]DeleteRequest{},
 		usersWithPendingRequests: map[string]struct{}{},
 		pendingPlansCount:        map[string]int{},
 	}
@@ -151,16 +171,24 @@ func (dp *DataPurger) init(ctx context.Context) error {
 }
 
 func (dp *DataPurger) loop(ctx context.Context) error {
-	loadRequestsTicker := time.NewTicker(time.Hour)
-	defer loadRequestsTicker.Stop()
-
 	loadRequests := func() {
+		status := statusSuccess
+
 		err := dp.pullDeleteRequestsToPlanDeletes()
 		if err != nil {
+			status = statusFail
 			level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
 		}
+
+		dp.metrics.loadPendingRequestsAttempsTotal.WithLabelValues(status).Inc()
 	}
 
+	// load requests on startup instead of waiting for first ticker
+	loadRequests()
+
+	loadRequestsTicker := time.NewTicker(time.Hour)
+	defer loadRequestsTicker.Stop()
+
 	for {
 		select {
 		case <-loadRequestsTicker.C:
@@ -203,7 +231,7 @@ func (dp *DataPurger) workerJobCleanup(job workerJob) {
 		dp.pendingPlansCountMtx.Unlock()
 
 		dp.inProcessRequestIDsMtx.Lock()
-		delete(dp.inProcessRequestIDs, job.userID)
+		delete(dp.inProcessRequests, job.userID)
 		dp.inProcessRequestIDsMtx.Unlock()
 
 		// request loading of more delete request if
@@ -339,7 +367,7 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status building plan")
 
-		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
 		err := dp.buildDeletePlan(req)
 		if err != nil {
 			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
@@ -360,7 +388,7 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status deleting")
 
-		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
 		dp.executePlansChan <- req
 	}
 
@@ -375,23 +403,43 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		return err
 	}
 
+	dp.inProcessRequestIDsMtx.RLock()
+	pendingDeleteRequestsCount := len(dp.inProcessRequests)
+	dp.inProcessRequestIDsMtx.RUnlock()
+
+	now := model.Now()
+	oldestPendingRequestCreatedAt := now
+
+	// requests which are still being processed are also considered pending
+	if pendingDeleteRequestsCount != 0 {
+		oldestInProcessRequest := dp.getOldestInProcessRequest()
+		if oldestInProcessRequest != nil {
+			oldestPendingRequestCreatedAt = oldestInProcessRequest.CreatedAt
+		}
+	}
+
 	for _, deleteRequest := range deleteRequests {
 		// adding an extra minute here to avoid a race between cancellation of request and picking of the request for processing
 		if deleteRequest.CreatedAt.Add(deleteRequestCancellationDeadline).Add(time.Minute).After(model.Now()) {
 			continue
 		}
 
+		pendingDeleteRequestsCount++
+		if deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
+			oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
+		}
+
 		dp.inProcessRequestIDsMtx.RLock()
-		inprocessDeleteRequestID := dp.inProcessRequestIDs[deleteRequest.UserID]
+		inprocessDeleteRequest, ok := dp.inProcessRequests[deleteRequest.UserID]
 		dp.inProcessRequestIDsMtx.RUnlock()
 
-		if inprocessDeleteRequestID != "" {
+		if ok {
 			dp.usersWithPendingRequestsMtx.Lock()
 			dp.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
 			dp.usersWithPendingRequestsMtx.Unlock()
 
 			level.Debug(util.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
-				"inprocess_request_id", inprocessDeleteRequestID,
+				"inprocess_request_id", inprocessDeleteRequest.RequestID,
 				"skipped_request_id", deleteRequest.RequestID, "user_id", deleteRequest.UserID)
 			continue
 		}
@@ -402,7 +450,7 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		}
 
 		dp.inProcessRequestIDsMtx.Lock()
-		dp.inProcessRequestIDs[deleteRequest.UserID] = deleteRequest.RequestID
+		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
 		dp.inProcessRequestIDsMtx.Unlock()
 
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
@@ -413,7 +461,7 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		if err != nil {
 			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
 
-			// We do not want to remove this delete request from inProcessRequestIDs to make sure
+			// We do not want to remove this delete request from inProcessRequests to make sure
 			// we do not move multiple deleting requests in deletion process.
 			// None of the other delete requests from the user would be considered for processing until then.
 			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
@@ -424,6 +472,9 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		dp.executePlansChan <- req
 	}
 
+	dp.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
+	dp.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))
+
 	return nil
 }
 
@@ -538,6 +589,20 @@ func (dp *DataPurger) removeDeletePlan(ctx context.Context, userID, requestID st
 	return dp.objectClient.DeleteObject(ctx, objectKey)
 }
 
+func (dp *DataPurger) getOldestInProcessRequest() *DeleteRequest {
+	dp.inProcessRequestIDsMtx.RLock()
+	defer dp.inProcessRequestIDsMtx.RUnlock()
+
+	var oldestRequest *DeleteRequest
+	for _, request := range dp.inProcessRequests {
+		if oldestRequest == nil || request.CreatedAt.Before(oldestRequest.CreatedAt) {
+			oldestRequest = &request
+		}
+	}
+
+	return oldestRequest
+}
+
 // returns interval per plan
 func splitByDay(start, end model.Time) []model.Interval {
 	numOfDays := numPlans(start, end)
diff --git a/purger/purger_test.go b/purger/purger_test.go
index d072d41ddb654..6acb3f998dbc0 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -4,9 +4,13 @@ import (
 	"context"
 	"fmt"
 	"sort"
+	"strings"
 	"testing"
 	"time"
 
+	"github.com/prometheus/client_golang/prometheus/testutil"
+
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/require"
@@ -16,6 +20,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/services"
+	"github.com/cortexproject/cortex/pkg/util/test"
 )
 
 const (
@@ -47,7 +52,9 @@ func setupTestDeleteStore(t *testing.T) *DeleteStore {
 	return deleteStore
 }
 
-func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger) {
+func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger, *prometheus.Registry) {
+	registry := prometheus.NewRegistry()
+
 	deleteStore := setupTestDeleteStore(t)
 
 	chunkStore, err := testutils.SetupTestChunkStore()
@@ -59,10 +66,10 @@ func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.Object
 	var cfg Config
 	flagext.DefaultValues(&cfg)
 
-	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, nil)
+	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, registry)
 	require.NoError(t, err)
 
-	return deleteStore, chunkStore, storageClient, dataPurger
+	return deleteStore, chunkStore, storageClient, dataPurger, registry
 }
 
 func buildChunks(from, through model.Time, batchSize int) ([]chunk.Chunk, error) {
@@ -170,7 +177,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 	for _, tc := range purgePlanTestCases {
 		for batchSize := 1; batchSize <= 5; batchSize++ {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
-				deleteStore, chunkStore, storageClient, dataPurger := setupStoresAndPurger(t)
+				deleteStore, chunkStore, storageClient, dataPurger, _ := setupStoresAndPurger(t)
 				defer func() {
 					dataPurger.StopAsync()
 					chunkStore.Stop()
@@ -237,6 +244,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 				}
 
 				require.Equal(t, tc.numChunksToDelete*batchSize, len(chunkIDs))
+				require.Equal(t, float64(tc.numChunksToDelete*batchSize), testutil.ToFloat64(dataPurger.metrics.deleteRequestsChunksSelectedTotal))
 			})
 		}
 	}
@@ -251,7 +259,7 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 	for _, tc := range purgePlanTestCases {
 		for batchSize := 1; batchSize <= 5; batchSize++ {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
-				deleteStore, chunkStore, _, dataPurger := setupStoresAndPurger(t)
+				deleteStore, chunkStore, _, dataPurger, _ := setupStoresAndPurger(t)
 				defer func() {
 					dataPurger.StopAsync()
 					chunkStore.Stop()
@@ -312,7 +320,7 @@ func TestDataPurger_Restarts(t *testing.T) {
 		t.Fatal(err)
 	}
 
-	deleteStore, chunkStore, storageClient, dataPurger := setupStoresAndPurger(t)
+	deleteStore, chunkStore, storageClient, dataPurger, _ := setupStoresAndPurger(t)
 	defer func() {
 		chunkStore.Stop()
 	}()
@@ -342,30 +350,23 @@ func TestDataPurger_Restarts(t *testing.T) {
 	// create a new purger to check whether it picks up in process delete requests
 	var cfg Config
 	flagext.DefaultValues(&cfg)
-	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, nil)
+	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, prometheus.NewPedanticRegistry())
 	require.NoError(t, err)
 
 	// load in process delete requests by calling Run
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), newPurger))
 
-	defer newPurger.StopAsync()
+	// there must be 1 pending delete request
+	require.Equal(t, float64(1), testutil.ToFloat64(newPurger.metrics.pendingDeleteRequestsCount))
 
-	// lets wait till purger finishes execution of in process delete requests
-	ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
-	defer cancel()
+	defer newPurger.StopAsync()
 
-	for ctx.Err() == nil {
+	test.Poll(t, time.Minute, 0, func() interface{} {
 		newPurger.inProcessRequestIDsMtx.RLock()
+		defer newPurger.inProcessRequestIDsMtx.RUnlock()
 
-		if len(newPurger.inProcessRequestIDs) == 0 {
-			newPurger.inProcessRequestIDsMtx.RUnlock()
-			break
-		}
-
-		newPurger.inProcessRequestIDsMtx.RUnlock()
-		time.Sleep(time.Second / 2)
-	}
-	require.NoError(t, ctx.Err())
+		return len(newPurger.inProcessRequests)
+	})
 
 	// check whether data got deleted from the store since delete request has been processed
 	chunks, err = chunkStore.Get(context.Background(), userID, 0, model.Time(0).Add(10*24*time.Hour), fooMetricNameMatcher...)
@@ -377,6 +378,81 @@ func TestDataPurger_Restarts(t *testing.T) {
 	deleteRequests, err = deleteStore.GetAllDeleteRequestsForUser(context.Background(), userID)
 	require.NoError(t, err)
 	require.Equal(t, StatusProcessed, deleteRequests[0].Status)
+
+	require.Equal(t, float64(1), testutil.ToFloat64(newPurger.metrics.deleteRequestsProcessedTotal))
+	require.PanicsWithError(t, "collected 0 metrics instead of exactly 1", func() {
+		testutil.ToFloat64(newPurger.metrics.deleteRequestsProcessingFailures)
+	})
+}
+
+func TestPurger_Metrics(t *testing.T) {
+	deleteStore, chunkStore, _, purger, registry := setupStoresAndPurger(t)
+	defer func() {
+		purger.StopAsync()
+		chunkStore.Stop()
+	}()
+
+	// start loop to load requests
+	require.NoError(t, services.StartAndAwaitRunning(context.Background(), purger))
+
+	// no delete requests for processing so age and pending request is 0 while we have successfully attempted loading request once.
+	require.NoError(t, testutil.GatherAndCompare(registry, strings.NewReader(
+		`
+			# HELP cortex_purger_load_pending_requests_attempts_total Number of attempts that were made to load pending requests with status
+			# TYPE cortex_purger_load_pending_requests_attempts_total counter
+			cortex_purger_load_pending_requests_attempts_total{status="success"} 1
+			# HELP cortex_purger_oldest_pending_delete_request_age_seconds Age of oldest pending delete request in seconds
+			# TYPE cortex_purger_oldest_pending_delete_request_age_seconds gauge
+			cortex_purger_oldest_pending_delete_request_age_seconds 0
+			# HELP cortex_purger_pending_delete_requests_count Count of requests which are in process or are ready to be processed
+			# TYPE cortex_purger_pending_delete_requests_count gauge
+			cortex_purger_pending_delete_requests_count 0
+		`),
+		"cortex_purger_load_pending_requests_attempts_total",
+		"cortex_purger_oldest_pending_delete_request_age_seconds",
+		"cortex_purger_pending_delete_requests_count",
+	))
+
+	// add delete request whose createdAt is now
+	err := deleteStore.AddDeleteRequest(context.Background(), userID, model.Time(0).Add(24*time.Hour),
+		model.Time(0).Add(2*24*time.Hour), []string{"foo"})
+	require.NoError(t, err)
+
+	// add delete request whose createdAt is 2 days back
+	err = deleteStore.addDeleteRequest(context.Background(), userID, model.Now().Add(-2*24*time.Hour), model.Time(0).Add(24*time.Hour),
+		model.Time(0).Add(2*24*time.Hour), []string{"foo"})
+	require.NoError(t, err)
+
+	// add delete request whose createdAt is 3 days back
+	err = deleteStore.addDeleteRequest(context.Background(), userID, model.Now().Add(-3*24*time.Hour), model.Time(0).Add(24*time.Hour),
+		model.Time(0).Add(8*24*time.Hour), []string{"foo"})
+	require.NoError(t, err)
+
+	// load new delete requests for processing
+	require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
+
+	// there must be 2 pending delete requests, oldest being 3 days old
+	require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
+	require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
+
+	// wait until purger_delete_requests_processed_total starts to show up.
+	test.Poll(t, 2*time.Second, 1, func() interface{} {
+		count, err := testutil.GatherAndCount(registry, "cortex_purger_delete_requests_processed_total")
+		require.NoError(t, err)
+		return count
+	})
+
+	// wait until both the pending delete requests are processed.
+	test.Poll(t, 2*time.Second, float64(2), func() interface{} {
+		return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal)
+	})
+
+	// load new delete requests for processing which should update the metrics
+	require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
+
+	// there must be 0 pending delete requests so the age for oldest pending must be 0
+	require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
+	require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
 }
 
 func getNonDeletedIntervals(originalInterval, deletedInterval model.Interval) []model.Interval {

From f7f5e51ded39da06e4a9bd13fbd5e0a88c5d8760 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Fri, 19 Jun 2020 14:17:49 +0530
Subject: [PATCH 539/660] change flag DisableChunksDeduplication to
 DisableIndexDeduplication for disabling just index deduplication (#2728)

* change flag DisableChunksDeduplication to DisableIndexDeduplication for disabling just index deduplication

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* tests and other changes suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/dynamodb_storage_client.go |  4 +--
 chunk_store.go                 |  5 ++--
 chunk_store_test.go            | 54 ++++++++++++++++++++++++++++++++--
 inmemory_storage_client.go     |  7 +++--
 series_store.go                | 44 +++++++++++++++++++--------
 storage_client.go              |  2 +-
 6 files changed, 93 insertions(+), 23 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index c5131ddfd34c7..6d7ca71fb46e8 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -576,8 +576,8 @@ func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map
 
 // PutChunkAndIndex implements chunk.ObjectAndIndexClient
 // Combine both sets of writes before sending to DynamoDB, for performance
-func (a dynamoDBStorageClient) PutChunkAndIndex(ctx context.Context, c chunk.Chunk, index chunk.WriteBatch) error {
-	dynamoDBWrites, err := a.writesForChunks([]chunk.Chunk{c})
+func (a dynamoDBStorageClient) PutChunksAndIndex(ctx context.Context, chunks []chunk.Chunk, index chunk.WriteBatch) error {
+	dynamoDBWrites, err := a.writesForChunks(chunks)
 	if err != nil {
 		return err
 	}
diff --git a/chunk_store.go b/chunk_store.go
index bd69eedeea384..45721b0460843 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -63,9 +63,8 @@ type StoreConfig struct {
 	// is set, use different caches for ingesters and queriers.
 	chunkCacheStubs bool // don't write the full chunk to cache, just a stub entry
 
-	// When DisableChunksDeduplication is true, cache would not be checked for whether chunk is already written.
-	// It would still write the chunk back to cache for reads.
-	DisableChunksDeduplication bool `yaml:"-"`
+	// When DisableIndexDeduplication is true and chunk is already there in cache, only index would be written to the store and not chunk.
+	DisableIndexDeduplication bool `yaml:"-"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 792d9d029ce07..f983f71f30771 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -868,7 +868,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	require.NoError(t, err)
 	err = store.Put(ctx, []Chunk{fooChunk1})
 	require.NoError(t, err)
-	n := storage.numWrites
+	n := storage.numIndexWrites
 
 	// Only one extra entry for the new chunk of same series.
 	fooChunk2 := dummyChunkFor(model.Time(0).Add(30*time.Second), metric)
@@ -876,7 +876,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	require.NoError(t, err)
 	err = store.Put(ctx, []Chunk{fooChunk2})
 	require.NoError(t, err)
-	require.Equal(t, n+1, storage.numWrites)
+	require.Equal(t, n+1, storage.numIndexWrites)
 }
 
 func BenchmarkIndexCaching(b *testing.B) {
@@ -1301,3 +1301,53 @@ func TestStore_DeleteSeriesIDs(t *testing.T) {
 		})
 	}
 }
+
+func TestDisableIndexDeduplication(t *testing.T) {
+	for i, disableIndexDeduplication := range []bool{
+		false, true,
+	} {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			ctx := context.Background()
+			metric := labels.Labels{
+				{Name: labels.MetricName, Value: "foo"},
+				{Name: "bar", Value: "baz"},
+			}
+			storeMaker := stores[0]
+			storeCfg := storeMaker.configFn()
+			storeCfg.ChunkCacheConfig.Cache = cache.NewFifoCache("chunk-cache", cache.FifoCacheConfig{
+				MaxSizeItems: 5,
+			})
+			storeCfg.DisableIndexDeduplication = disableIndexDeduplication
+
+			store := newTestChunkStoreConfig(t, "v9", storeCfg)
+			defer store.Stop()
+
+			storage := store.(CompositeStore).stores[0].Store.(*seriesStore).storage.(*MockStorage)
+
+			fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
+			err := fooChunk1.Encode()
+			require.NoError(t, err)
+			err = store.Put(ctx, []Chunk{fooChunk1})
+			require.NoError(t, err)
+			n := storage.numIndexWrites
+
+			// see if we have written the chunk to the store
+			require.Equal(t, 1, storage.numChunkWrites)
+
+			// Put the same chunk again
+			err = store.Put(ctx, []Chunk{fooChunk1})
+			require.NoError(t, err)
+
+			expectedTotalWrites := n
+			if disableIndexDeduplication {
+				expectedTotalWrites *= 2
+			}
+			require.Equal(t, expectedTotalWrites, storage.numIndexWrites)
+
+			// see if we deduped the chunk and the number of chunks we wrote is still 1
+			require.Equal(t, 1, storage.numChunkWrites)
+		})
+
+	}
+
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 5a3aed3b6db38..4c3a5bd6a427a 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -20,7 +20,8 @@ type MockStorage struct {
 	tables  map[string]*mockTable
 	objects map[string][]byte
 
-	numWrites int
+	numIndexWrites int
+	numChunkWrites int
 }
 
 type mockTable struct {
@@ -137,7 +138,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 	mockBatch := *batch.(*mockWriteBatch)
 	seenWrites := map[string]bool{}
 
-	m.numWrites += len(mockBatch.inserts)
+	m.numIndexWrites += len(mockBatch.inserts)
 
 	for _, req := range mockBatch.inserts {
 		table, ok := m.tables[req.tableName]
@@ -301,6 +302,8 @@ func (m *MockStorage) PutChunks(_ context.Context, chunks []Chunk) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	m.numChunkWrites += len(chunks)
+
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
 		if err != nil {
diff --git a/series_store.go b/series_store.go
index 4928ca9b96455..a1e062f9770c9 100644
--- a/series_store.go
+++ b/series_store.go
@@ -419,13 +419,20 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.PutOne")
-	if !c.cfg.DisableChunksDeduplication {
-		// If this chunk is in cache it must already be in the database so we don't need to write it again
-		found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
-		if len(found) > 0 {
-			dedupedChunksTotal.Inc()
-			return nil
-		}
+	writeChunk := true
+
+	// If this chunk is in cache it must already be in the database so we don't need to write it again
+	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
+	if len(found) > 0 {
+		writeChunk = false
+		dedupedChunksTotal.Inc()
+	}
+
+	// If we dont have to write the chunk and DisableIndexDeduplication is false, we do not have to do anything.
+	// If we dont have to write the chunk and DisableIndexDeduplication is true, we have to write index and not chunk.
+	// Otherwise write both index and chunk.
+	if !writeChunk && !c.cfg.DisableIndexDeduplication {
+		return nil
 	}
 
 	chunks := []Chunk{chunk}
@@ -436,20 +443,31 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 	}
 
 	if oic, ok := c.storage.(ObjectAndIndexClient); ok {
-		if err = oic.PutChunkAndIndex(ctx, chunk, writeReqs); err != nil {
+		chunks := chunks
+		if !writeChunk {
+			chunks = []Chunk{}
+		}
+		if err = oic.PutChunksAndIndex(ctx, chunks, writeReqs); err != nil {
 			return err
 		}
 	} else {
-		err := c.storage.PutChunks(ctx, chunks)
-		if err != nil {
-			return err
+		// chunk not found, write it.
+		if writeChunk {
+			err := c.storage.PutChunks(ctx, chunks)
+			if err != nil {
+				return err
+			}
 		}
 		if err := c.index.BatchWrite(ctx, writeReqs); err != nil {
 			return err
 		}
 	}
-	if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
-		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+
+	// we already have the chunk in the cache so don't write it back to the cache.
+	if writeChunk {
+		if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
+			level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
+		}
 	}
 
 	bufs := make([][]byte, len(keysToCache))
diff --git a/storage_client.go b/storage_client.go
index 6797e1493f04a..84fc12d8cfb89 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -40,7 +40,7 @@ type Client interface {
 
 // ObjectAndIndexClient allows optimisations where the same client handles both
 type ObjectAndIndexClient interface {
-	PutChunkAndIndex(ctx context.Context, c Chunk, index WriteBatch) error
+	PutChunksAndIndex(ctx context.Context, chunks []Chunk, index WriteBatch) error
 }
 
 // WriteBatch represents a batch of writes.

From 65748193005c71e9d39742e9e530cf88f41d3080 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 19 Jun 2020 14:21:22 +0200
Subject: [PATCH 540/660] Add memcached server address to Set() error (#2736)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cache/memcached_client.go | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index c468b35655f54..df0969dc78f3b 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/thanos-io/thanos/pkg/discovery/dns"
@@ -128,6 +129,22 @@ func (c *memcachedClient) Stop() {
 	c.wait.Wait()
 }
 
+func (c *memcachedClient) Set(item *memcache.Item) error {
+	err := c.Client.Set(item)
+	if err == nil {
+		return nil
+	}
+
+	// Inject the server address in order to have more information about which memcached
+	// backend server failed. This is a best effort.
+	addr, addrErr := c.serverList.PickServer(item.Key)
+	if addrErr != nil {
+		return err
+	}
+
+	return errors.Wrapf(err, "server=%s", addr)
+}
+
 func (c *memcachedClient) updateLoop(updateInterval time.Duration) {
 	defer c.wait.Done()
 	ticker := time.NewTicker(updateInterval)

From 17d06ae505001f12571ce59b3db7fa76b03c1681 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 22 Jun 2020 19:29:23 +0530
Subject: [PATCH 541/660] make delete request cancellation duration
 configurable and rename data-purger to purger (#2760)

* make delete request cancellation duration configurable and rename data-purger to purger

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update CHANGELOG

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* rename flag for request cancellation deadline

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/purger.go          | 221 +++++++++++++++++++-------------------
 purger/purger_test.go     |  38 +++----
 purger/request_handler.go |  17 +--
 3 files changed, 140 insertions(+), 136 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index b3f282e5c612e..7a771e0aa34e3 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -26,10 +26,9 @@ import (
 )
 
 const (
-	millisecondPerDay                 = int64(24 * time.Hour / time.Millisecond)
-	deleteRequestCancellationDeadline = 24 * time.Hour
-	statusSuccess                     = "success"
-	statusFail                        = "fail"
+	millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
+	statusSuccess     = "success"
+	statusFail        = "fail"
 )
 
 type purgerMetrics struct {
@@ -83,11 +82,12 @@ type deleteRequestWithLogger struct {
 	logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this
 }
 
-// Config holds config for DataPurger
+// Config holds config for Purger
 type Config struct {
-	Enable          bool   `yaml:"enable"`
-	NumWorkers      int    `yaml:"num_workers"`
-	ObjectStoreType string `yaml:"object_store_type"`
+	Enable                    bool          `yaml:"enable"`
+	NumWorkers                int           `yaml:"num_workers"`
+	ObjectStoreType           string        `yaml:"object_store_type"`
+	DeleteRequestCancelPeriod time.Duration `yaml:"delete_request_cancel_period"`
 }
 
 // RegisterFlags registers CLI flags for Config
@@ -95,6 +95,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.Enable, "purger.enable", false, "Enable purger to allow deletion of series. Be aware that Delete series feature is still experimental")
 	f.IntVar(&cfg.NumWorkers, "purger.num-workers", 2, "Number of workers executing delete plans in parallel")
 	f.StringVar(&cfg.ObjectStoreType, "purger.object-store-type", "", "Name of the object store to use for storing delete plans")
+	f.DurationVar(&cfg.DeleteRequestCancelPeriod, "purger.delete-request-cancel-period", 24*time.Hour, "Allow cancellation of delete request until duration after they are created. Data would be deleted only after delete requests have been older than this duration. Ideally this should be set to at least 24h.")
 }
 
 type workerJob struct {
@@ -104,8 +105,8 @@ type workerJob struct {
 	logger          log.Logger
 }
 
-// DataPurger does the purging of data which is requested to be deleted
-type DataPurger struct {
+// Purger does the purging of data which is requested to be deleted
+type Purger struct {
 	services.Service
 
 	cfg          Config
@@ -135,11 +136,11 @@ type DataPurger struct {
 	wg sync.WaitGroup
 }
 
-// NewDataPurger creates a new DataPurger
-func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient, registerer prometheus.Registerer) (*DataPurger, error) {
+// NewPurger creates a new Purger
+func NewPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient, registerer prometheus.Registerer) (*Purger, error) {
 	util.WarnExperimentalUse("Delete series API")
 
-	dataPurger := DataPurger{
+	purger := Purger{
 		cfg:                      cfg,
 		deleteStore:              deleteStore,
 		chunkStore:               chunkStore,
@@ -153,34 +154,34 @@ func NewDataPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store,
 		pendingPlansCount:        map[string]int{},
 	}
 
-	dataPurger.Service = services.NewBasicService(dataPurger.init, dataPurger.loop, dataPurger.stop)
-	return &dataPurger, nil
+	purger.Service = services.NewBasicService(purger.init, purger.loop, purger.stop)
+	return &purger, nil
 }
 
 // init starts workers, scheduler and then loads in process delete requests
-func (dp *DataPurger) init(ctx context.Context) error {
-	for i := 0; i < dp.cfg.NumWorkers; i++ {
-		dp.wg.Add(1)
-		go dp.worker()
+func (p *Purger) init(ctx context.Context) error {
+	for i := 0; i < p.cfg.NumWorkers; i++ {
+		p.wg.Add(1)
+		go p.worker()
 	}
 
-	dp.wg.Add(1)
-	go dp.jobScheduler(ctx)
+	p.wg.Add(1)
+	go p.jobScheduler(ctx)
 
-	return dp.loadInprocessDeleteRequests()
+	return p.loadInprocessDeleteRequests()
 }
 
-func (dp *DataPurger) loop(ctx context.Context) error {
+func (p *Purger) loop(ctx context.Context) error {
 	loadRequests := func() {
 		status := statusSuccess
 
-		err := dp.pullDeleteRequestsToPlanDeletes()
+		err := p.pullDeleteRequestsToPlanDeletes()
 		if err != nil {
 			status = statusFail
 			level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
 		}
 
-		dp.metrics.loadPendingRequestsAttempsTotal.WithLabelValues(status).Inc()
+		p.metrics.loadPendingRequestsAttempsTotal.WithLabelValues(status).Inc()
 	}
 
 	// load requests on startup instead of waiting for first ticker
@@ -193,7 +194,7 @@ func (dp *DataPurger) loop(ctx context.Context) error {
 		select {
 		case <-loadRequestsTicker.C:
 			loadRequests()
-		case <-dp.pullNewRequestsChan:
+		case <-p.pullNewRequestsChan:
 			loadRequests()
 		case <-ctx.Done():
 			return nil
@@ -202,102 +203,102 @@ func (dp *DataPurger) loop(ctx context.Context) error {
 }
 
 // Stop waits until all background tasks stop.
-func (dp *DataPurger) stop(_ error) error {
-	dp.wg.Wait()
+func (p *Purger) stop(_ error) error {
+	p.wg.Wait()
 	return nil
 }
 
-func (dp *DataPurger) workerJobCleanup(job workerJob) {
-	err := dp.removeDeletePlan(context.Background(), job.userID, job.deleteRequestID, job.planNo)
+func (p *Purger) workerJobCleanup(job workerJob) {
+	err := p.removeDeletePlan(context.Background(), job.userID, job.deleteRequestID, job.planNo)
 	if err != nil {
 		level.Error(job.logger).Log("msg", "error removing delete plan",
 			"plan_no", job.planNo, "err", err)
 		return
 	}
 
-	dp.pendingPlansCountMtx.Lock()
-	dp.pendingPlansCount[job.deleteRequestID]--
+	p.pendingPlansCountMtx.Lock()
+	p.pendingPlansCount[job.deleteRequestID]--
 
-	if dp.pendingPlansCount[job.deleteRequestID] == 0 {
+	if p.pendingPlansCount[job.deleteRequestID] == 0 {
 		level.Info(job.logger).Log("msg", "finished execution of all plans, cleaning up and updating status of request")
 
-		err := dp.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, StatusProcessed)
+		err := p.deleteStore.UpdateStatus(context.Background(), job.userID, job.deleteRequestID, StatusProcessed)
 		if err != nil {
 			level.Error(job.logger).Log("msg", "error updating delete request status to process", "err", err)
 		}
 
-		dp.metrics.deleteRequestsProcessedTotal.WithLabelValues(job.userID).Inc()
-		delete(dp.pendingPlansCount, job.deleteRequestID)
-		dp.pendingPlansCountMtx.Unlock()
+		p.metrics.deleteRequestsProcessedTotal.WithLabelValues(job.userID).Inc()
+		delete(p.pendingPlansCount, job.deleteRequestID)
+		p.pendingPlansCountMtx.Unlock()
 
-		dp.inProcessRequestIDsMtx.Lock()
-		delete(dp.inProcessRequests, job.userID)
-		dp.inProcessRequestIDsMtx.Unlock()
+		p.inProcessRequestIDsMtx.Lock()
+		delete(p.inProcessRequests, job.userID)
+		p.inProcessRequestIDsMtx.Unlock()
 
 		// request loading of more delete request if
 		// - user has more pending requests and
 		// - we do not have a pending request to load more requests
-		dp.usersWithPendingRequestsMtx.Lock()
-		defer dp.usersWithPendingRequestsMtx.Unlock()
-		if _, ok := dp.usersWithPendingRequests[job.userID]; ok {
-			delete(dp.usersWithPendingRequests, job.userID)
+		p.usersWithPendingRequestsMtx.Lock()
+		defer p.usersWithPendingRequestsMtx.Unlock()
+		if _, ok := p.usersWithPendingRequests[job.userID]; ok {
+			delete(p.usersWithPendingRequests, job.userID)
 			select {
-			case dp.pullNewRequestsChan <- struct{}{}:
+			case p.pullNewRequestsChan <- struct{}{}:
 				// sent
 			default:
 				// already sent
 			}
 		}
 	} else {
-		dp.pendingPlansCountMtx.Unlock()
+		p.pendingPlansCountMtx.Unlock()
 	}
 }
 
 // we send all the delete plans to workerJobChan
-func (dp *DataPurger) jobScheduler(ctx context.Context) {
-	defer dp.wg.Done()
+func (p *Purger) jobScheduler(ctx context.Context) {
+	defer p.wg.Done()
 
 	for {
 		select {
-		case req := <-dp.executePlansChan:
+		case req := <-p.executePlansChan:
 			numPlans := numPlans(req.StartTime, req.EndTime)
 			level.Info(req.logger).Log("msg", "sending jobs to workers for purging data", "num_jobs", numPlans)
 
-			dp.pendingPlansCountMtx.Lock()
-			dp.pendingPlansCount[req.RequestID] = numPlans
-			dp.pendingPlansCountMtx.Unlock()
+			p.pendingPlansCountMtx.Lock()
+			p.pendingPlansCount[req.RequestID] = numPlans
+			p.pendingPlansCountMtx.Unlock()
 
 			for i := 0; i < numPlans; i++ {
-				dp.workerJobChan <- workerJob{planNo: i, userID: req.UserID,
+				p.workerJobChan <- workerJob{planNo: i, userID: req.UserID,
 					deleteRequestID: req.RequestID, logger: req.logger}
 			}
 		case <-ctx.Done():
-			close(dp.workerJobChan)
+			close(p.workerJobChan)
 			return
 		}
 	}
 }
 
-func (dp *DataPurger) worker() {
-	defer dp.wg.Done()
+func (p *Purger) worker() {
+	defer p.wg.Done()
 
-	for job := range dp.workerJobChan {
-		err := dp.executePlan(job.userID, job.deleteRequestID, job.planNo, job.logger)
+	for job := range p.workerJobChan {
+		err := p.executePlan(job.userID, job.deleteRequestID, job.planNo, job.logger)
 		if err != nil {
-			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(job.userID).Inc()
+			p.metrics.deleteRequestsProcessingFailures.WithLabelValues(job.userID).Inc()
 			level.Error(job.logger).Log("msg", "error executing delete plan",
 				"plan_no", job.planNo, "err", err)
 			continue
 		}
 
-		dp.workerJobCleanup(job)
+		p.workerJobCleanup(job)
 	}
 }
 
-func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger log.Logger) error {
+func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Logger) error {
 	logger = log.With(logger, "plan_no", planNo)
 
-	plan, err := dp.getDeletePlan(context.Background(), userID, requestID, planNo)
+	plan, err := p.getDeletePlan(context.Background(), userID, requestID, planNo)
 	if err != nil {
 		if err == chunk.ErrStorageObjectNotFound {
 			level.Info(logger).Log("msg", "plan not found, must have been executed already")
@@ -328,7 +329,7 @@ func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger l
 				}
 			}
 
-			err = dp.chunkStore.DeleteChunk(ctx, chunkRef.From, chunkRef.Through, chunkRef.UserID,
+			err = p.chunkStore.DeleteChunk(ctx, chunkRef.From, chunkRef.Through, chunkRef.UserID,
 				chunkDetails.ID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels), partiallyDeletedInterval)
 			if err != nil {
 				if isMissingChunkErr(err) {
@@ -343,7 +344,7 @@ func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger l
 		level.Debug(logger).Log("msg", "deleting series", "labels", plan.ChunksGroup[i].Labels)
 
 		// this is mostly required to clean up series ids from series store
-		err := dp.chunkStore.DeleteSeriesIDs(ctx, model.Time(plan.PlanInterval.StartTimestampMs), model.Time(plan.PlanInterval.EndTimestampMs),
+		err := p.chunkStore.DeleteSeriesIDs(ctx, model.Time(plan.PlanInterval.StartTimestampMs), model.Time(plan.PlanInterval.EndTimestampMs),
 			userID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels))
 		if err != nil {
 			return err
@@ -356,8 +357,8 @@ func (dp *DataPurger) executePlan(userID, requestID string, planNo int, logger l
 }
 
 // we need to load all in process delete requests on startup to finish them first
-func (dp *DataPurger) loadInprocessDeleteRequests() error {
-	requestsWithBuildingPlanStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan)
+func (p *Purger) loadInprocessDeleteRequests() error {
+	requestsWithBuildingPlanStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan)
 	if err != nil {
 		return err
 	}
@@ -367,19 +368,19 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status building plan")
 
-		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
-		err := dp.buildDeletePlan(req)
+		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
+		err := p.buildDeletePlan(req)
 		if err != nil {
-			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
+			p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
 			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
 			continue
 		}
 
 		level.Info(req.logger).Log("msg", "sending delete request for execution")
-		dp.executePlansChan <- req
+		p.executePlansChan <- req
 	}
 
-	requestsWithDeletingStatus, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting)
+	requestsWithDeletingStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting)
 	if err != nil {
 		return err
 	}
@@ -388,8 +389,8 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status deleting")
 
-		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
-		dp.executePlansChan <- req
+		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
+		p.executePlansChan <- req
 	}
 
 	return nil
@@ -397,22 +398,22 @@ func (dp *DataPurger) loadInprocessDeleteRequests() error {
 
 // pullDeleteRequestsToPlanDeletes pulls delete requests which do not have their delete plans built yet and sends them for building delete plans
 // after pulling delete requests for building plans, it updates its status to StatusBuildingPlan status to avoid picking this up again next time
-func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
-	deleteRequests, err := dp.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusReceived)
+func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
+	deleteRequests, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusReceived)
 	if err != nil {
 		return err
 	}
 
-	dp.inProcessRequestIDsMtx.RLock()
-	pendingDeleteRequestsCount := len(dp.inProcessRequests)
-	dp.inProcessRequestIDsMtx.RUnlock()
+	p.inProcessRequestIDsMtx.RLock()
+	pendingDeleteRequestsCount := len(p.inProcessRequests)
+	p.inProcessRequestIDsMtx.RUnlock()
 
 	now := model.Now()
 	oldestPendingRequestCreatedAt := now
 
 	// requests which are still being processed are also considered pending
 	if pendingDeleteRequestsCount != 0 {
-		oldestInProcessRequest := dp.getOldestInProcessRequest()
+		oldestInProcessRequest := p.getOldestInProcessRequest()
 		if oldestInProcessRequest != nil {
 			oldestPendingRequestCreatedAt = oldestInProcessRequest.CreatedAt
 		}
@@ -420,7 +421,7 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 
 	for _, deleteRequest := range deleteRequests {
 		// adding an extra minute here to avoid a race between cancellation of request and picking of the request for processing
-		if deleteRequest.CreatedAt.Add(deleteRequestCancellationDeadline).Add(time.Minute).After(model.Now()) {
+		if deleteRequest.CreatedAt.Add(p.cfg.DeleteRequestCancelPeriod).Add(time.Minute).After(model.Now()) {
 			continue
 		}
 
@@ -429,14 +430,14 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 			oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
 		}
 
-		dp.inProcessRequestIDsMtx.RLock()
-		inprocessDeleteRequest, ok := dp.inProcessRequests[deleteRequest.UserID]
-		dp.inProcessRequestIDsMtx.RUnlock()
+		p.inProcessRequestIDsMtx.RLock()
+		inprocessDeleteRequest, ok := p.inProcessRequests[deleteRequest.UserID]
+		p.inProcessRequestIDsMtx.RUnlock()
 
 		if ok {
-			dp.usersWithPendingRequestsMtx.Lock()
-			dp.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
-			dp.usersWithPendingRequestsMtx.Unlock()
+			p.usersWithPendingRequestsMtx.Lock()
+			p.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
+			p.usersWithPendingRequestsMtx.Unlock()
 
 			level.Debug(util.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
 				"inprocess_request_id", inprocessDeleteRequest.RequestID,
@@ -444,22 +445,22 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 			continue
 		}
 
-		err = dp.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, StatusBuildingPlan)
+		err = p.deleteStore.UpdateStatus(context.Background(), deleteRequest.UserID, deleteRequest.RequestID, StatusBuildingPlan)
 		if err != nil {
 			return err
 		}
 
-		dp.inProcessRequestIDsMtx.Lock()
-		dp.inProcessRequests[deleteRequest.UserID] = deleteRequest
-		dp.inProcessRequestIDsMtx.Unlock()
+		p.inProcessRequestIDsMtx.Lock()
+		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
+		p.inProcessRequestIDsMtx.Unlock()
 
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 
 		level.Info(req.logger).Log("msg", "building plan for a new delete request")
 
-		err := dp.buildDeletePlan(req)
+		err := p.buildDeletePlan(req)
 		if err != nil {
-			dp.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
+			p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
 
 			// We do not want to remove this delete request from inProcessRequests to make sure
 			// we do not move multiple deleting requests in deletion process.
@@ -469,11 +470,11 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 		}
 
 		level.Info(req.logger).Log("msg", "sending delete request for execution")
-		dp.executePlansChan <- req
+		p.executePlansChan <- req
 	}
 
-	dp.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
-	dp.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))
+	p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
+	p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))
 
 	return nil
 }
@@ -482,7 +483,7 @@ func (dp *DataPurger) pullDeleteRequestsToPlanDeletes() error {
 // A days plan will include chunk ids and labels of all the chunks which are supposed to be deleted.
 // Chunks are grouped together by labels to avoid storing labels repetitively.
 // After building delete plans it updates status of delete request to StatusDeleting and sends it for execution
-func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
+func (p *Purger) buildDeletePlan(req deleteRequestWithLogger) error {
 	ctx := context.Background()
 	ctx = user.InjectOrgID(ctx, req.UserID)
 
@@ -501,7 +502,7 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 				return err
 			}
 
-			chunks, err := dp.chunkStore.Get(ctx, req.UserID, planRange.Start, planRange.End, matchers...)
+			chunks, err := p.chunkStore.Get(ctx, req.UserID, planRange.Start, planRange.End, matchers...)
 			if err != nil {
 				return err
 			}
@@ -530,28 +531,28 @@ func (dp *DataPurger) buildDeletePlan(req deleteRequestWithLogger) error {
 		plans[i] = pb
 	}
 
-	err := dp.putDeletePlans(ctx, req.UserID, req.RequestID, plans)
+	err := p.putDeletePlans(ctx, req.UserID, req.RequestID, plans)
 	if err != nil {
 		return err
 	}
 
-	err = dp.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, StatusDeleting)
+	err = p.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, StatusDeleting)
 	if err != nil {
 		return err
 	}
 
-	dp.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(req.UserID).Add(float64(len(includedChunkIDs)))
+	p.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(req.UserID).Add(float64(len(includedChunkIDs)))
 
 	level.Info(req.logger).Log("msg", "built delete plans", "num_plans", len(perDayTimeRange))
 
 	return nil
 }
 
-func (dp *DataPurger) putDeletePlans(ctx context.Context, userID, requestID string, plans [][]byte) error {
+func (p *Purger) putDeletePlans(ctx context.Context, userID, requestID string, plans [][]byte) error {
 	for i, plan := range plans {
 		objectKey := buildObjectKeyForPlan(userID, requestID, i)
 
-		err := dp.objectClient.PutObject(ctx, objectKey, bytes.NewReader(plan))
+		err := p.objectClient.PutObject(ctx, objectKey, bytes.NewReader(plan))
 		if err != nil {
 			return err
 		}
@@ -560,10 +561,10 @@ func (dp *DataPurger) putDeletePlans(ctx context.Context, userID, requestID stri
 	return nil
 }
 
-func (dp *DataPurger) getDeletePlan(ctx context.Context, userID, requestID string, planNo int) (*DeletePlan, error) {
+func (p *Purger) getDeletePlan(ctx context.Context, userID, requestID string, planNo int) (*DeletePlan, error) {
 	objectKey := buildObjectKeyForPlan(userID, requestID, planNo)
 
-	readCloser, err := dp.objectClient.GetObject(ctx, objectKey)
+	readCloser, err := p.objectClient.GetObject(ctx, objectKey)
 	if err != nil {
 		return nil, err
 	}
@@ -584,17 +585,17 @@ func (dp *DataPurger) getDeletePlan(ctx context.Context, userID, requestID strin
 	return &plan, nil
 }
 
-func (dp *DataPurger) removeDeletePlan(ctx context.Context, userID, requestID string, planNo int) error {
+func (p *Purger) removeDeletePlan(ctx context.Context, userID, requestID string, planNo int) error {
 	objectKey := buildObjectKeyForPlan(userID, requestID, planNo)
-	return dp.objectClient.DeleteObject(ctx, objectKey)
+	return p.objectClient.DeleteObject(ctx, objectKey)
 }
 
-func (dp *DataPurger) getOldestInProcessRequest() *DeleteRequest {
-	dp.inProcessRequestIDsMtx.RLock()
-	defer dp.inProcessRequestIDsMtx.RUnlock()
+func (p *Purger) getOldestInProcessRequest() *DeleteRequest {
+	p.inProcessRequestIDsMtx.RLock()
+	defer p.inProcessRequestIDsMtx.RUnlock()
 
 	var oldestRequest *DeleteRequest
-	for _, request := range dp.inProcessRequests {
+	for _, request := range p.inProcessRequests {
 		if oldestRequest == nil || request.CreatedAt.Before(oldestRequest.CreatedAt) {
 			oldestRequest = &request
 		}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 6acb3f998dbc0..15f3ce3d72106 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -52,7 +52,7 @@ func setupTestDeleteStore(t *testing.T) *DeleteStore {
 	return deleteStore
 }
 
-func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *DataPurger, *prometheus.Registry) {
+func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *Purger, *prometheus.Registry) {
 	registry := prometheus.NewRegistry()
 
 	deleteStore := setupTestDeleteStore(t)
@@ -66,10 +66,10 @@ func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.Object
 	var cfg Config
 	flagext.DefaultValues(&cfg)
 
-	dataPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, registry)
+	purger, err := NewPurger(cfg, deleteStore, chunkStore, storageClient, registry)
 	require.NoError(t, err)
 
-	return deleteStore, chunkStore, storageClient, dataPurger, registry
+	return deleteStore, chunkStore, storageClient, purger, registry
 }
 
 func buildChunks(from, through model.Time, batchSize int) ([]chunk.Chunk, error) {
@@ -173,13 +173,13 @@ var purgePlanTestCases = []struct {
 	},
 }
 
-func TestDataPurger_BuildPlan(t *testing.T) {
+func TestPurger_BuildPlan(t *testing.T) {
 	for _, tc := range purgePlanTestCases {
 		for batchSize := 1; batchSize <= 5; batchSize++ {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
-				deleteStore, chunkStore, storageClient, dataPurger, _ := setupStoresAndPurger(t)
+				deleteStore, chunkStore, storageClient, purger, _ := setupStoresAndPurger(t)
 				defer func() {
-					dataPurger.StopAsync()
+					purger.StopAsync()
 					chunkStore.Stop()
 				}()
 
@@ -198,7 +198,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 				deleteRequest := deleteRequests[0]
 				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 
-				err = dataPurger.buildDeletePlan(requestWithLogger)
+				err = purger.buildDeletePlan(requestWithLogger)
 				require.NoError(t, err)
 				planPath := fmt.Sprintf("%s:%s/", userID, deleteRequest.RequestID)
 
@@ -213,7 +213,7 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 				chunkIDs := map[string]struct{}{}
 
 				for i := range plans {
-					deletePlan, err := dataPurger.getDeletePlan(context.Background(), userID, deleteRequest.RequestID, i)
+					deletePlan, err := purger.getDeletePlan(context.Background(), userID, deleteRequest.RequestID, i)
 					require.NoError(t, err)
 					for _, chunksGroup := range deletePlan.ChunksGroup {
 						numChunksInGroup := len(chunksGroup.Chunks)
@@ -244,13 +244,13 @@ func TestDataPurger_BuildPlan(t *testing.T) {
 				}
 
 				require.Equal(t, tc.numChunksToDelete*batchSize, len(chunkIDs))
-				require.Equal(t, float64(tc.numChunksToDelete*batchSize), testutil.ToFloat64(dataPurger.metrics.deleteRequestsChunksSelectedTotal))
+				require.Equal(t, float64(tc.numChunksToDelete*batchSize), testutil.ToFloat64(purger.metrics.deleteRequestsChunksSelectedTotal))
 			})
 		}
 	}
 }
 
-func TestDataPurger_ExecutePlan(t *testing.T) {
+func TestPurger_ExecutePlan(t *testing.T) {
 	fooMetricNameMatcher, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
@@ -259,9 +259,9 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 	for _, tc := range purgePlanTestCases {
 		for batchSize := 1; batchSize <= 5; batchSize++ {
 			t.Run(fmt.Sprintf("%s/batch-size=%d", tc.name, batchSize), func(t *testing.T) {
-				deleteStore, chunkStore, _, dataPurger, _ := setupStoresAndPurger(t)
+				deleteStore, chunkStore, _, purger, _ := setupStoresAndPurger(t)
 				defer func() {
-					dataPurger.StopAsync()
+					purger.StopAsync()
 					chunkStore.Stop()
 				}()
 
@@ -290,12 +290,12 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 
 				deleteRequest := deleteRequests[0]
 				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
-				err = dataPurger.buildDeletePlan(requestWithLogger)
+				err = purger.buildDeletePlan(requestWithLogger)
 				require.NoError(t, err)
 
 				// execute all the plans
 				for i := 0; i < tc.expectedNumberOfPlans; i++ {
-					err := dataPurger.executePlan(userID, deleteRequest.RequestID, i, requestWithLogger.logger)
+					err := purger.executePlan(userID, deleteRequest.RequestID, i, requestWithLogger.logger)
 					require.NoError(t, err)
 				}
 
@@ -314,13 +314,13 @@ func TestDataPurger_ExecutePlan(t *testing.T) {
 	}
 }
 
-func TestDataPurger_Restarts(t *testing.T) {
+func TestPurger_Restarts(t *testing.T) {
 	fooMetricNameMatcher, err := parser.ParseMetricSelector(`foo`)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	deleteStore, chunkStore, storageClient, dataPurger, _ := setupStoresAndPurger(t)
+	deleteStore, chunkStore, storageClient, purger, _ := setupStoresAndPurger(t)
 	defer func() {
 		chunkStore.Stop()
 	}()
@@ -341,16 +341,16 @@ func TestDataPurger_Restarts(t *testing.T) {
 
 	deleteRequest := deleteRequests[0]
 	requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
-	err = dataPurger.buildDeletePlan(requestWithLogger)
+	err = purger.buildDeletePlan(requestWithLogger)
 	require.NoError(t, err)
 
 	// stop the existing purger
-	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), dataPurger))
+	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), purger))
 
 	// create a new purger to check whether it picks up in process delete requests
 	var cfg Config
 	flagext.DefaultValues(&cfg)
-	newPurger, err := NewDataPurger(cfg, deleteStore, chunkStore, storageClient, prometheus.NewPedanticRegistry())
+	newPurger, err := NewPurger(cfg, deleteStore, chunkStore, storageClient, prometheus.NewPedanticRegistry())
 	require.NoError(t, err)
 
 	// load in process delete requests by calling Run
diff --git a/purger/request_handler.go b/purger/request_handler.go
index 7b019614e967d..c8475800aee5e 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -4,6 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"net/http"
+	"time"
 
 	"github.com/go-kit/kit/log/level"
 
@@ -34,15 +35,17 @@ func newDeleteRequestHandlerMetrics(r prometheus.Registerer) *deleteRequestHandl
 
 // DeleteRequestHandler provides handlers for delete requests
 type DeleteRequestHandler struct {
-	deleteStore *DeleteStore
-	metrics     *deleteRequestHandlerMetrics
+	deleteStore               *DeleteStore
+	metrics                   *deleteRequestHandlerMetrics
+	deleteRequestCancelPeriod time.Duration
 }
 
 // NewDeleteRequestHandler creates a DeleteRequestHandler
-func NewDeleteRequestHandler(deleteStore *DeleteStore, registerer prometheus.Registerer) *DeleteRequestHandler {
+func NewDeleteRequestHandler(deleteStore *DeleteStore, deleteRequestCancelPeriod time.Duration, registerer prometheus.Registerer) *DeleteRequestHandler {
 	deleteMgr := DeleteRequestHandler{
-		deleteStore: deleteStore,
-		metrics:     newDeleteRequestHandlerMetrics(registerer),
+		deleteStore:               deleteStore,
+		deleteRequestCancelPeriod: deleteRequestCancelPeriod,
+		metrics:                   newDeleteRequestHandlerMetrics(registerer),
 	}
 
 	return &deleteMgr
@@ -163,8 +166,8 @@ func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter
 		return
 	}
 
-	if deleteRequest.CreatedAt.Add(deleteRequestCancellationDeadline).Before(model.Now()) {
-		http.Error(w, fmt.Sprintf("deletion of request past the deadline of %s since its creation is not allowed", deleteRequestCancellationDeadline.String()), http.StatusBadRequest)
+	if deleteRequest.CreatedAt.Add(dm.deleteRequestCancelPeriod).Before(model.Now()) {
+		http.Error(w, fmt.Sprintf("deletion of request past the deadline of %s since its creation is not allowed", dm.deleteRequestCancelPeriod.String()), http.StatusBadRequest)
 		return
 	}
 

From 12b8b2f6196c40ccef20454c2359a99d5d37243a Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 22 Jun 2020 17:55:06 +0100
Subject: [PATCH 542/660] Remove -store.fullsize-chunks option (#2656)

It broke ingester hand-overs, and is not needed now we use BigChunk.

I left in the code which allows non-full-size chunks to be read in:
maybe someone has some in a database.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 encoding/chunk_test.go | 1 -
 encoding/factory.go    | 6 ++----
 encoding/varbit.go     | 6 ++----
 3 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index f0815649611d8..c78e06381b010 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -56,7 +56,6 @@ func TestLen(t *testing.T) {
 var step = int(15 * time.Second / time.Millisecond)
 
 func TestChunk(t *testing.T) {
-	alwaysMarshalFullsizeChunks = false
 	for _, tc := range []struct {
 		encoding   Encoding
 		maxSamples int
diff --git a/encoding/factory.go b/encoding/factory.go
index 95f2a61ccdc18..e69a51a2229f8 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -15,15 +15,13 @@ type Config struct{}
 
 var (
 	// DefaultEncoding exported for use in unit tests elsewhere
-	DefaultEncoding             = Bigchunk
-	alwaysMarshalFullsizeChunks = true
-	bigchunkSizeCapBytes        = 0
+	DefaultEncoding      = Bigchunk
+	bigchunkSizeCapBytes = 0
 )
 
 // RegisterFlags registers configuration settings.
 func (Config) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&DefaultEncoding, "ingester.chunk-encoding", "Encoding version to use for chunks.")
-	flag.BoolVar(&alwaysMarshalFullsizeChunks, "store.fullsize-chunks", alwaysMarshalFullsizeChunks, "When saving varbit chunks, pad to 1024 bytes")
 	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
 }
 
diff --git a/encoding/varbit.go b/encoding/varbit.go
index c9580214d2cdd..a9d1c2f28771d 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -318,6 +318,7 @@ func (c varbitChunk) Utilization() float64 {
 }
 
 // marshalLen returns the number of bytes that should be marshalled for this chunk
+// (if someone has used a version of this code that doesn't just send 1024 every time)
 func (c varbitChunk) marshalLen() int {
 	bits := c.nextSampleOffset()
 	if bits < varbitThirdSampleBitOffset {
@@ -340,10 +341,7 @@ func (c varbitChunk) Len() int {
 }
 
 func (c varbitChunk) Size() int {
-	if alwaysMarshalFullsizeChunks {
-		return cap(c)
-	}
-	return c.marshalLen()
+	return cap(c)
 }
 
 func (c varbitChunk) firstTime() model.Time {

From 1818bdf5346a1efd00e92c387bb6e7b49401af42 Mon Sep 17 00:00:00 2001
From: Trevor Wood <trevor.g.wood@gmail.com>
Date: Tue, 23 Jun 2020 10:36:18 -0400
Subject: [PATCH 543/660] fix: set correct number of keys in background cache
 metrics (#2764)

Signed-off-by: Trevor Wood <Trevor.G.Wood@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/background.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 9ada6b61602f6..994c49b745239 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -96,12 +96,12 @@ func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byt
 		}
 		select {
 		case c.bgWrites <- bgWrite:
-			c.queueLength.Add(float64(len(keys)))
+			c.queueLength.Add(float64(num))
 		default:
-			c.droppedWriteBack.Add(float64(len(keys)))
+			c.droppedWriteBack.Add(float64(num))
 			sp := opentracing.SpanFromContext(ctx)
 			if sp != nil {
-				sp.LogFields(otlog.Int("dropped", len(keys)))
+				sp.LogFields(otlog.Int("dropped", num))
 			}
 			return // queue is full; give up
 		}

From 7ec17168e9749b6da80c402968ab26c034354fc7 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 25 Jun 2020 12:29:17 +0530
Subject: [PATCH 544/660] Rename purger target flag (#2777)

* rename purger target flag

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix a flaky test in purger

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/purger_test.go | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/purger/purger_test.go b/purger/purger_test.go
index 15f3ce3d72106..e73d104414881 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -4,7 +4,6 @@ import (
 	"context"
 	"fmt"
 	"sort"
-	"strings"
 	"testing"
 	"time"
 
@@ -392,27 +391,7 @@ func TestPurger_Metrics(t *testing.T) {
 		chunkStore.Stop()
 	}()
 
-	// start loop to load requests
-	require.NoError(t, services.StartAndAwaitRunning(context.Background(), purger))
-
-	// no delete requests for processing so age and pending request is 0 while we have successfully attempted loading request once.
-	require.NoError(t, testutil.GatherAndCompare(registry, strings.NewReader(
-		`
-			# HELP cortex_purger_load_pending_requests_attempts_total Number of attempts that were made to load pending requests with status
-			# TYPE cortex_purger_load_pending_requests_attempts_total counter
-			cortex_purger_load_pending_requests_attempts_total{status="success"} 1
-			# HELP cortex_purger_oldest_pending_delete_request_age_seconds Age of oldest pending delete request in seconds
-			# TYPE cortex_purger_oldest_pending_delete_request_age_seconds gauge
-			cortex_purger_oldest_pending_delete_request_age_seconds 0
-			# HELP cortex_purger_pending_delete_requests_count Count of requests which are in process or are ready to be processed
-			# TYPE cortex_purger_pending_delete_requests_count gauge
-			cortex_purger_pending_delete_requests_count 0
-		`),
-		"cortex_purger_load_pending_requests_attempts_total",
-		"cortex_purger_oldest_pending_delete_request_age_seconds",
-		"cortex_purger_pending_delete_requests_count",
-	))
-
+	// add delete requests without starting purger loops to load and process delete requests.
 	// add delete request whose createdAt is now
 	err := deleteStore.AddDeleteRequest(context.Background(), userID, model.Time(0).Add(24*time.Hour),
 		model.Time(0).Add(2*24*time.Hour), []string{"foo"})
@@ -435,6 +414,9 @@ func TestPurger_Metrics(t *testing.T) {
 	require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
 	require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
 
+	// start loop to process requests
+	require.NoError(t, services.StartAndAwaitRunning(context.Background(), purger))
+
 	// wait until purger_delete_requests_processed_total starts to show up.
 	test.Poll(t, 2*time.Second, 1, func() interface{} {
 		count, err := testutil.GatherAndCount(registry, "cortex_purger_delete_requests_processed_total")

From 167fd0fe494b95e305888d082db995d1363278c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Thu, 25 Jun 2020 09:37:58 +0200
Subject: [PATCH 545/660] Support querying multiple stores by Querier (#2747)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Added support to querier to work with multiple stores.
Store queryables now have filtering function.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Querier can now use second store for querying.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixes for using chunks and blocks at the same time.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added tests for new filtering queryables.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for distributor querier filter.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for using multiple store queryables.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Ignore linter trying to get rid of else.

This form allows q to have smaller scope,
which reduces possibility of incorrect reuse.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Allow "0" value.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Mention available formats.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Support for flagext.Time.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Extend help for querier.use-second-store-before-time.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added CHANGELOG entry, and check to avoid using same primary and secondary engines.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Put comment why formatMatcher is used.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Mention that querying feature is experimental.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Enhance CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Explain default value.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added <time> placeholder, with example better examples.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed `buildService` function.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Mention that last two formats are specified by RFC 3339.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix docs.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store.go | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 45721b0460843..41eef9c41be51 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -446,7 +446,7 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 }
 
 func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, filter func([]IndexQuery) []IndexQuery) ([]string, error) {
-	log, ctx := spanlogger.New(ctx, "Store.lookupIdsByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
+	log, ctx := spanlogger.New(ctx, "Store.lookupIdsByMetricNameMatcher", "metricName", metricName, "matcher", formatMatcher(matcher))
 	defer log.Span.Finish()
 
 	var err error
@@ -474,11 +474,11 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", matcher, "queries", len(queries))
+	level.Debug(log).Log("matcher", formatMatcher(matcher), "queries", len(queries))
 
 	if filter != nil {
 		queries = filter(queries)
-		level.Debug(log).Log("matcher", matcher, "filteredQueries", len(queries))
+		level.Debug(log).Log("matcher", formatMatcher(matcher), "filteredQueries", len(queries))
 	}
 
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
@@ -489,17 +489,27 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	} else if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", matcher, "entries", len(entries))
+	level.Debug(log).Log("matcher", formatMatcher(matcher), "entries", len(entries))
 
 	ids, err := c.parseIndexEntries(ctx, entries, matcher)
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", matcher, "ids", len(ids))
+	level.Debug(log).Log("matcher", formatMatcher(matcher), "ids", len(ids))
 
 	return ids, nil
 }
 
+// Using this function avoids logging of nil matcher, which works, but indirectly via panic and recover.
+// That confuses attached debugger, which wants to breakpoint on each panic.
+// Using simple check is also faster.
+func formatMatcher(matcher *labels.Matcher) string {
+	if matcher == nil {
+		return "nil"
+	}
+	return matcher.String()
+}
+
 func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
 	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
 	defer log.Span.Finish()

From e997537a343ee0f83576c0a191b8914c9064984f Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 25 Jun 2020 14:46:25 +0530
Subject: [PATCH 546/660] refactor processing of writes in boltdb index client
 (#2771)

* refactor processing of writes in boltdb index client

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nit suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go      | 106 ++++++++++++++++--------------
 local/boltdb_index_client_test.go | 102 ++++++++++++++++++++++++++++
 2 files changed, 158 insertions(+), 50 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 8dad54ee5e100..0ef1a401a1451 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"os"
 	"path"
+	"path/filepath"
 	"sync"
 	"time"
 
@@ -127,9 +128,8 @@ func (b *BoltIndexClient) Stop() {
 }
 
 func (b *BoltIndexClient) NewWriteBatch() chunk.WriteBatch {
-	return &boltWriteBatch{
-		puts:    map[string]map[string][]byte{},
-		deletes: map[string]map[string]struct{}{},
+	return &BoltWriteBatch{
+		Writes: map[string]TableWrites{},
 	}
 }
 
@@ -171,52 +171,49 @@ func (b *BoltIndexClient) GetDB(name string, operation int) (*bbolt.DB, error) {
 	return db, nil
 }
 
-func (b *BoltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
-	// ToDo: too much code duplication, refactor this
-	for table, kvps := range batch.(*boltWriteBatch).puts {
-		db, err := b.GetDB(table, DBOperationWrite)
-		if err != nil {
-			return err
-		}
+func (b *BoltIndexClient) WriteToDB(ctx context.Context, db *bbolt.DB, writes TableWrites) error {
+	return db.Update(func(tx *bbolt.Tx) error {
+		var b *bbolt.Bucket
 
-		if err := db.Update(func(tx *bbolt.Tx) error {
-			b, err := tx.CreateBucketIfNotExists(bucketName)
+		// a bucket should already exist for deletes, for other writes we create one otherwise.
+		if len(writes.deletes) != 0 {
+			b = tx.Bucket(bucketName)
+			if b == nil {
+				return fmt.Errorf("bucket %s not found in table %s", bucketName, filepath.Base(db.Path()))
+			}
+		} else {
+			var err error
+			b, err = tx.CreateBucketIfNotExists(bucketName)
 			if err != nil {
 				return err
 			}
+		}
 
-			for key, value := range kvps {
-				if err := b.Put([]byte(key), value); err != nil {
-					return err
-				}
+		for key, value := range writes.puts {
+			if err := b.Put([]byte(key), value); err != nil {
+				return err
 			}
+		}
 
-			return nil
-		}); err != nil {
-			return err
+		for key := range writes.deletes {
+			if err := b.Delete([]byte(key)); err != nil {
+				return err
+			}
 		}
-	}
 
-	for table, kvps := range batch.(*boltWriteBatch).deletes {
+		return nil
+	})
+}
+
+func (b *BoltIndexClient) BatchWrite(ctx context.Context, batch chunk.WriteBatch) error {
+	for table, writes := range batch.(*BoltWriteBatch).Writes {
 		db, err := b.GetDB(table, DBOperationWrite)
 		if err != nil {
 			return err
 		}
 
-		if err := db.Update(func(tx *bbolt.Tx) error {
-			b := tx.Bucket(bucketName)
-			if b == nil {
-				return fmt.Errorf("Bucket %s not found in table %s", bucketName, table)
-			}
-
-			for key := range kvps {
-				if err := b.Delete([]byte(key)); err != nil {
-					return err
-				}
-			}
-
-			return nil
-		}); err != nil {
+		err = b.WriteToDB(ctx, db, writes)
+		if err != nil {
 			return err
 		}
 	}
@@ -285,31 +282,40 @@ func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk
 	})
 }
 
-type boltWriteBatch struct {
-	puts    map[string]map[string][]byte
-	deletes map[string]map[string]struct{}
+type TableWrites struct {
+	puts    map[string][]byte
+	deletes map[string]struct{}
 }
 
-func (b *boltWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
-	table, ok := b.deletes[tableName]
+type BoltWriteBatch struct {
+	Writes map[string]TableWrites
+}
+
+func (b *BoltWriteBatch) getOrCreateTableWrites(tableName string) TableWrites {
+	writes, ok := b.Writes[tableName]
 	if !ok {
-		table = map[string]struct{}{}
-		b.deletes[tableName] = table
+		writes = TableWrites{
+			puts:    map[string][]byte{},
+			deletes: map[string]struct{}{},
+		}
+		b.Writes[tableName] = writes
 	}
 
+	return writes
+}
+
+func (b *BoltWriteBatch) Delete(tableName, hashValue string, rangeValue []byte) {
+	writes := b.getOrCreateTableWrites(tableName)
+
 	key := hashValue + separator + string(rangeValue)
-	table[key] = struct{}{}
+	writes.deletes[key] = struct{}{}
 }
 
-func (b *boltWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
-	table, ok := b.puts[tableName]
-	if !ok {
-		table = map[string][]byte{}
-		b.puts[tableName] = table
-	}
+func (b *BoltWriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
+	writes := b.getOrCreateTableWrites(tableName)
 
 	key := hashValue + separator + string(rangeValue)
-	table[key] = value
+	writes.puts[key] = value
 }
 
 type boltReadBatch struct {
diff --git a/local/boltdb_index_client_test.go b/local/boltdb_index_client_test.go
index e2877b481613b..578c030b3ad2c 100644
--- a/local/boltdb_index_client_test.go
+++ b/local/boltdb_index_client_test.go
@@ -170,3 +170,105 @@ func Test_CreateTable_BoltdbRW(t *testing.T) {
 	}, have)
 
 }
+
+func TestBoltDB_Writes(t *testing.T) {
+	dirname, err := ioutil.TempDir(os.TempDir(), "boltdb")
+	require.NoError(t, err)
+
+	defer func() {
+		require.NoError(t, os.RemoveAll(dirname))
+	}()
+
+	for i, tc := range []struct {
+		name              string
+		initialPuts       []string
+		testPuts          []string
+		testDeletes       []string
+		err               error
+		valuesAfterWrites []string
+	}{
+		{
+			name:              "just puts",
+			testPuts:          []string{"1", "2"},
+			valuesAfterWrites: []string{"1", "2"},
+		},
+		{
+			name:              "just deletes",
+			initialPuts:       []string{"1", "2", "3", "4"},
+			testDeletes:       []string{"1", "2"},
+			valuesAfterWrites: []string{"3", "4"},
+		},
+		{
+			name:              "both puts and deletes",
+			initialPuts:       []string{"1", "2", "3", "4"},
+			testPuts:          []string{"5", "6"},
+			testDeletes:       []string{"1", "2"},
+			valuesAfterWrites: []string{"3", "4", "5", "6"},
+		},
+		{
+			name:        "deletes without initial writes",
+			testDeletes: []string{"1", "2"},
+			err:         fmt.Errorf("bucket %s not found in table 3", bucketName),
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			tableName := fmt.Sprint(i)
+
+			indexClient, err := NewBoltDBIndexClient(BoltDBConfig{
+				Directory: dirname,
+			})
+			require.NoError(t, err)
+
+			defer func() {
+				indexClient.Stop()
+			}()
+
+			// doing initial writes if there are any
+			if len(tc.initialPuts) != 0 {
+				batch := indexClient.NewWriteBatch()
+				for _, put := range tc.initialPuts {
+					batch.Add(tableName, "hash", []byte(put), []byte(put))
+				}
+
+				require.NoError(t, indexClient.BatchWrite(context.Background(), batch))
+			}
+
+			// doing writes with testPuts and testDeletes
+			batch := indexClient.NewWriteBatch()
+			for _, put := range tc.testPuts {
+				batch.Add(tableName, "hash", []byte(put), []byte(put))
+			}
+			for _, put := range tc.testDeletes {
+				batch.Delete(tableName, "hash", []byte(put))
+			}
+
+			require.Equal(t, tc.err, indexClient.BatchWrite(context.Background(), batch))
+
+			// verifying test writes by querying
+			var have []chunk.IndexEntry
+			err = indexClient.query(context.Background(), chunk.IndexQuery{
+				TableName: tableName,
+				HashValue: "hash",
+			}, func(_ chunk.IndexQuery, read chunk.ReadBatch) bool {
+				iter := read.Iterator()
+				for iter.Next() {
+					have = append(have, chunk.IndexEntry{
+						RangeValue: iter.RangeValue(),
+						Value:      iter.Value(),
+					})
+				}
+				return true
+			})
+
+			require.NoError(t, err)
+			require.Len(t, have, len(tc.valuesAfterWrites))
+
+			for i, value := range tc.valuesAfterWrites {
+				require.Equal(t, chunk.IndexEntry{
+					RangeValue: []byte(value),
+					Value:      []byte(value),
+				}, have[i])
+			}
+		})
+	}
+}

From fb0ee71d3aed6b39d46c654de458da622ea3207b Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 26 Jun 2020 03:11:14 -0400
Subject: [PATCH 547/660] Fixes an issue in the index chunks/series intersect
 code. (#2796)

* Fixes an issue in the index chunks/series intersect code.

This was introduce in #2700, more specifically this line https://github.com/cortexproject/cortex/pull/2700/files#diff-10bca0f4f31a2ca1edc507d0289b143dR537

This causes any query with the first label matcher not matching anything to return all matches of all other labels.
This is a nasty one since, the code was relying on empty slice, and so it would skip nil values instead of returning no matches. I've added a regression test proving this is fixed everywhere. I think in cortex it can probably affect performance (since you have to download all chunk not required) but not read integrity.

I have found this with @slim-bean while deploying Loki, all queriers where OOMing because of this.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Update changelog.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go      | 4 +++-
 chunk_store_test.go | 4 ++++
 series_store.go     | 4 +++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 41eef9c41be51..4363696cf0dfa 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -424,11 +424,13 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 	// Receive chunkSets from all matchers
 	var chunkIDs []string
 	var lastErr error
+	var initialized bool
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingChunkIDs:
-			if chunkIDs == nil {
+			if !initialized {
 				chunkIDs = incoming
+				initialized = true
 			} else {
 				chunkIDs = intersectStrings(chunkIDs, incoming)
 			}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index f983f71f30771..24a49d30c7d73 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -177,6 +177,10 @@ func TestChunkStore_Get(t *testing.T) {
 			query:  `foo{toms="code", bar="baz"}`,
 			expect: []Chunk{fooChunk1},
 		},
+		{
+			query:  `foo{a="b", bar="baz"}`,
+			expect: nil,
+		},
 		{
 			query: `{__name__=~"foo"}`,
 			err:   "query must contain metric name",
diff --git a/series_store.go b/series_store.go
index a1e062f9770c9..4792ca1ae2e00 100644
--- a/series_store.go
+++ b/series_store.go
@@ -305,12 +305,14 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	var lastErr error
 	var cardinalityExceededErrors int
 	var cardinalityExceededError CardinalityExceededError
+	var initialized bool
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingIDs:
 			preIntersectionCount += len(incoming)
-			if ids == nil {
+			if !initialized {
 				ids = incoming
+				initialized = true
 			} else {
 				ids = intersectStrings(ids, incoming)
 			}

From 107010f871f0df9ce924763360952f7f93696e47 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 26 Jun 2020 03:11:14 -0400
Subject: [PATCH 548/660] Fixes an issue in the index chunks/series intersect
 code. (#2796)

* Fixes an issue in the index chunks/series intersect code.

This was introduce in #2700, more specifically this line https://github.com/cortexproject/cortex/pull/2700/files#diff-10bca0f4f31a2ca1edc507d0289b143dR537

This causes any query with the first label matcher not matching anything to return all matches of all other labels.
This is a nasty one since, the code was relying on empty slice, and so it would skip nil values instead of returning no matches. I've added a regression test proving this is fixed everywhere. I think in cortex it can probably affect performance (since you have to download all chunk not required) but not read integrity.

I have found this with @slim-bean while deploying Loki, all queriers where OOMing because of this.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Update changelog.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go      | 4 +++-
 chunk_store_test.go | 4 ++++
 series_store.go     | 4 +++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 45721b0460843..598160dc40674 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -424,11 +424,13 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 	// Receive chunkSets from all matchers
 	var chunkIDs []string
 	var lastErr error
+	var initialized bool
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingChunkIDs:
-			if chunkIDs == nil {
+			if !initialized {
 				chunkIDs = incoming
+				initialized = true
 			} else {
 				chunkIDs = intersectStrings(chunkIDs, incoming)
 			}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index f983f71f30771..24a49d30c7d73 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -177,6 +177,10 @@ func TestChunkStore_Get(t *testing.T) {
 			query:  `foo{toms="code", bar="baz"}`,
 			expect: []Chunk{fooChunk1},
 		},
+		{
+			query:  `foo{a="b", bar="baz"}`,
+			expect: nil,
+		},
 		{
 			query: `{__name__=~"foo"}`,
 			err:   "query must contain metric name",
diff --git a/series_store.go b/series_store.go
index a1e062f9770c9..4792ca1ae2e00 100644
--- a/series_store.go
+++ b/series_store.go
@@ -305,12 +305,14 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	var lastErr error
 	var cardinalityExceededErrors int
 	var cardinalityExceededError CardinalityExceededError
+	var initialized bool
 	for i := 0; i < len(matchers); i++ {
 		select {
 		case incoming := <-incomingIDs:
 			preIntersectionCount += len(incoming)
-			if ids == nil {
+			if !initialized {
 				ids = incoming
+				initialized = true
 			} else {
 				ids = intersectStrings(ids, incoming)
 			}

From ff37693877672d5e22f212aca84f0846acf3f046 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 29 Jun 2020 21:06:05 +0530
Subject: [PATCH 549/660] fix cache generation keys prefix (#2800)

* fix cache generation keys prefix

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* checking len of returned keys in tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 cache/cache_gen.go      | 17 ++++++++++-------
 cache/cache_gen_test.go | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 7 deletions(-)
 create mode 100644 cache/cache_gen_test.go

diff --git a/cache/cache_gen.go b/cache/cache_gen.go
index 292981c5eb1b4..3fd151db1d4fb 100644
--- a/cache/cache_gen.go
+++ b/cache/cache_gen.go
@@ -58,32 +58,35 @@ func ExtractCacheGenNumber(ctx context.Context) string {
 	return cacheGenNumber
 }
 
-// addCacheGenNumToCacheKeys adds gen number to keys as suffix.
+// addCacheGenNumToCacheKeys adds gen number to keys as prefix.
 func addCacheGenNumToCacheKeys(ctx context.Context, keys []string) []string {
 	cacheGen := ExtractCacheGenNumber(ctx)
 	if cacheGen == "" {
 		return keys
 	}
 
+	prefixedKeys := make([]string, len(keys))
+
 	for i := range keys {
-		keys[i] = cacheGen + keys[i]
+		prefixedKeys[i] = cacheGen + keys[i]
 	}
 
-	return keys
+	return prefixedKeys
 }
 
-// removeCacheGenNumFromKeys removes suffixed gen number from keys.
+// removeCacheGenNumFromKeys removes prefixed gen number from keys.
 func removeCacheGenNumFromKeys(ctx context.Context, keys []string) []string {
 	cacheGen := ExtractCacheGenNumber(ctx)
 	if cacheGen == "" {
 		return keys
 	}
 
-	cacheGenSuffixLen := len(cacheGen) - 1
+	unprefixedKeys := make([]string, len(keys))
+	cacheGenPrefixLen := len(cacheGen)
 
 	for i := range keys {
-		keys[i] = keys[i][cacheGenSuffixLen:]
+		unprefixedKeys[i] = keys[i][cacheGenPrefixLen:]
 	}
 
-	return keys
+	return unprefixedKeys
 }
diff --git a/cache/cache_gen_test.go b/cache/cache_gen_test.go
new file mode 100644
index 0000000000000..1ac45dc69a101
--- /dev/null
+++ b/cache/cache_gen_test.go
@@ -0,0 +1,41 @@
+package cache
+
+import (
+	"context"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestCacheGenNumCacheKeysPrefix(t *testing.T) {
+	keys := []string{"foo", "bar", "baz"}
+
+	for _, tc := range []struct {
+		name   string
+		prefix string
+	}{
+		{
+			name: "empty-prefix",
+		},
+		{
+			name:   "with-prefix",
+			prefix: "prefix",
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			ctx := InjectCacheGenNumber(context.Background(), tc.prefix)
+
+			prefixedKeys := addCacheGenNumToCacheKeys(ctx, keys)
+			for i, key := range prefixedKeys {
+				require.Equal(t, tc.prefix+keys[i], key)
+			}
+			require.Len(t, prefixedKeys, len(keys))
+
+			unprefixedKeys := removeCacheGenNumFromKeys(ctx, prefixedKeys)
+			for i, key := range unprefixedKeys {
+				require.Equal(t, keys[i], key)
+			}
+			require.Len(t, unprefixedKeys, len(keys))
+		})
+	}
+}

From e6fa03c622e015f76eb778017d0514ac3ba2d790 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 30 Jun 2020 13:06:47 +0530
Subject: [PATCH 550/660] track oldest delete request age since their
 cancellation period is over (#2806)

* track oldest delete request age since their cancellation period is over

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update metric for oldest pending request and number of pending requests to 0 sooner

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nits suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* Fixed flaky test

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/purger.go      | 19 ++++++++++++++-----
 purger/purger_test.go |  7 ++-----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index 7a771e0aa34e3..2e966a659a971 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -66,12 +66,12 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
 	m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "purger_oldest_pending_delete_request_age_seconds",
-		Help:      "Age of oldest pending delete request in seconds",
+		Help:      "Age of oldest pending delete request in seconds, since they are over their cancellation period",
 	})
 	m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "purger_pending_delete_requests_count",
-		Help:      "Count of requests which are in process or are ready to be processed",
+		Help:      "Count of delete requests which are over their cancellation period and have not finished processing yet",
 	})
 
 	return &m
@@ -248,6 +248,10 @@ func (p *Purger) workerJobCleanup(job workerJob) {
 			default:
 				// already sent
 			}
+		} else if len(p.usersWithPendingRequests) == 0 {
+			// there are no pending requests from any of the users, set the oldest pending request and number of pending requests to 0
+			p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(0)
+			p.metrics.pendingDeleteRequestsCount.Set(0)
 		}
 	} else {
 		p.pendingPlansCountMtx.Unlock()
@@ -409,7 +413,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 	p.inProcessRequestIDsMtx.RUnlock()
 
 	now := model.Now()
-	oldestPendingRequestCreatedAt := now
+	oldestPendingRequestCreatedAt := model.Time(0)
 
 	// requests which are still being processed are also considered pending
 	if pendingDeleteRequestsCount != 0 {
@@ -426,7 +430,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 		}
 
 		pendingDeleteRequestsCount++
-		if deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
+		if oldestPendingRequestCreatedAt == 0 || deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
 			oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
 		}
 
@@ -473,7 +477,12 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 		p.executePlansChan <- req
 	}
 
-	p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
+	// track age of oldest delete request since they are over their cancellation period
+	oldestPendingRequestAge := time.Duration(0)
+	if oldestPendingRequestCreatedAt != 0 {
+		oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod))
+	}
+	p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second))
 	p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))
 
 	return nil
diff --git a/purger/purger_test.go b/purger/purger_test.go
index e73d104414881..5b6fac6b4e062 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -410,8 +410,8 @@ func TestPurger_Metrics(t *testing.T) {
 	// load new delete requests for processing
 	require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
 
-	// there must be 2 pending delete requests, oldest being 3 days old
-	require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
+	// there must be 2 pending delete requests, oldest being 2 days old since its cancellation time is over
+	require.InDelta(t, float64(2*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
 	require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
 
 	// start loop to process requests
@@ -429,9 +429,6 @@ func TestPurger_Metrics(t *testing.T) {
 		return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal)
 	})
 
-	// load new delete requests for processing which should update the metrics
-	require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
-
 	// there must be 0 pending delete requests so the age for oldest pending must be 0
 	require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
 	require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))

From 7483b656c1d489fbac4497a3341aa9a278358a4b Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 30 Jun 2020 17:29:09 +0530
Subject: [PATCH 551/660] add support for delete chunks in index stores (#2801)

* add support for delete chunks in index stores

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* getting userID as a param to DeleteChunk function call

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nit

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 aws/dynamodb_storage_client.go | 21 ++++++++++++++++-----
 azure/blob_storage_client.go   | 11 ++++++++---
 cassandra/storage_client.go    | 21 ++++++++++++++++++---
 chunk_store.go                 |  2 +-
 gcp/bigtable_object_client.go  | 18 +++++++++++++++---
 grpc/grpc_client_test.go       |  2 +-
 grpc/storage_client.go         |  2 +-
 inmemory_storage_client.go     |  2 +-
 objectclient/client.go         |  2 +-
 storage/metrics.go             |  4 ++--
 storage_client.go              |  2 +-
 11 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 6d7ca71fb46e8..126de4e209d2c 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -539,11 +539,6 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 	return result, nil
 }
 
-func (a dynamoDBStorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from DynamoDB
-	return chunk.ErrMethodNotImplemented
-}
-
 func processChunkResponse(response *dynamodb.BatchGetItemOutput, chunksByKey map[string]chunk.Chunk) ([]chunk.Chunk, error) {
 	result := []chunk.Chunk{}
 	decodeContext := chunk.NewDecodeContext()
@@ -594,6 +589,22 @@ func (a dynamoDBStorageClient) PutChunks(ctx context.Context, chunks []chunk.Chu
 	return a.BatchWrite(ctx, dynamoDBWrites)
 }
 
+func (a dynamoDBStorageClient) DeleteChunk(ctx context.Context, userID, chunkID string) error {
+	chunkRef, err := chunk.ParseExternalKey(userID, chunkID)
+	if err != nil {
+		return err
+	}
+
+	tableName, err := a.schemaCfg.ChunkTableFor(chunkRef.From)
+	if err != nil {
+		return err
+	}
+
+	dynamoDBWrites := dynamoDBWriteBatch{}
+	dynamoDBWrites.Delete(tableName, chunkID, placeholder)
+	return a.BatchWrite(ctx, dynamoDBWrites)
+}
+
 func (a dynamoDBStorageClient) writesForChunks(chunks []chunk.Chunk) (dynamoDBWriteBatch, error) {
 	var (
 		dynamoDBWrites = dynamoDBWriteBatch{}
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index fd856302ed473..2da1aa2683b05 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -201,7 +201,12 @@ func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageO
 	return storageObjects, commonPrefixes, nil
 }
 
-func (b *BlobStorage) DeleteObject(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from Azure BlobStorage
-	return chunk.ErrMethodNotImplemented
+func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error {
+	blockBlobURL, err := b.getBlobURL(blobID)
+	if err != nil {
+		return err
+	}
+
+	_, err = blockBlobURL.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
+	return err
 }
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 327938399abf9..c4a0fa56f2950 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -486,9 +486,24 @@ func (s *ObjectClient) getChunk(ctx context.Context, decodeContext *chunk.Decode
 	return input, err
 }
 
-func (s *ObjectClient) DeleteChunk(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from Cassandra
-	return chunk.ErrMethodNotImplemented
+func (s *ObjectClient) DeleteChunk(ctx context.Context, userID, chunkID string) error {
+	chunkRef, err := chunk.ParseExternalKey(userID, chunkID)
+	if err != nil {
+		return err
+	}
+
+	tableName, err := s.schemaCfg.ChunkTableFor(chunkRef.From)
+	if err != nil {
+		return err
+	}
+
+	q := s.writeSession.Query(fmt.Sprintf("DELETE FROM %s WHERE hash = ?",
+		tableName), chunkID)
+	if err := q.WithContext(ctx).Exec(); err != nil {
+		return errors.WithStack(err)
+	}
+
+	return nil
 }
 
 // Stop implement chunk.ObjectClient.
diff --git a/chunk_store.go b/chunk_store.go
index 4363696cf0dfa..eadfa8121ee06 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -627,7 +627,7 @@ func (c *baseStore) deleteChunk(ctx context.Context,
 		return errors.Wrapf(err, "when deleting index entries for chunkID=%s", chunkID)
 	}
 
-	err = c.chunks.DeleteChunk(ctx, chunkID)
+	err = c.chunks.DeleteChunk(ctx, userID, chunkID)
 	if err != nil {
 		if err == ErrStorageObjectNotFound {
 			return nil
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index 46fbe2c2da9b0..a0cc62013b735 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -161,7 +161,19 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 	return output, nil
 }
 
-func (s *bigtableObjectClient) DeleteChunk(ctx context.Context, chunkID string) error {
-	// ToDo: implement this to support deleting chunks from Bigtable
-	return chunk.ErrMethodNotImplemented
+func (s *bigtableObjectClient) DeleteChunk(ctx context.Context, userID, chunkID string) error {
+	chunkRef, err := chunk.ParseExternalKey(userID, chunkID)
+	if err != nil {
+		return err
+	}
+
+	tableName, err := s.schemaCfg.ChunkTableFor(chunkRef.From)
+	if err != nil {
+		return err
+	}
+
+	mut := bigtable.NewMutation()
+	mut.DeleteCellsInColumn(columnFamily, column)
+
+	return s.client.Open(tableName).Apply(ctx, chunkID, mut)
 }
diff --git a/grpc/grpc_client_test.go b/grpc/grpc_client_test.go
index e97e34e69cba2..2bd70d500df86 100644
--- a/grpc/grpc_client_test.go
+++ b/grpc/grpc_client_test.go
@@ -134,7 +134,7 @@ func TestGrpcStore(t *testing.T) {
 	_, err = storageClient.GetChunks(context.Background(), getChunksTestData)
 	require.NoError(t, err)
 
-	err = storageClient.DeleteChunk(context.Background(), "")
+	err = storageClient.DeleteChunk(context.Background(), "", "")
 	require.NoError(t, err)
 
 	//rpc calls specific to indexClient
diff --git a/grpc/storage_client.go b/grpc/storage_client.go
index 1ece225cec07d..99595f8c3800d 100644
--- a/grpc/storage_client.go
+++ b/grpc/storage_client.go
@@ -65,7 +65,7 @@ func (s *StorageClient) PutChunks(ctx context.Context, chunks []chunk.Chunk) err
 	return nil
 }
 
-func (s *StorageClient) DeleteChunk(ctx context.Context, chunkID string) error {
+func (s *StorageClient) DeleteChunk(ctx context.Context, userID, chunkID string) error {
 	chunkInfo := &ChunkID{ChunkID: chunkID}
 	_, err := s.client.DeleteChunks(ctx, chunkInfo)
 	if err != nil {
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 4c3a5bd6a427a..1a7ae5c2dced1 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -336,7 +336,7 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 }
 
 // DeleteChunk implements StorageClient.
-func (m *MockStorage) DeleteChunk(ctx context.Context, chunkID string) error {
+func (m *MockStorage) DeleteChunk(ctx context.Context, userID, chunkID string) error {
 	return m.DeleteObject(ctx, chunkID)
 }
 
diff --git a/objectclient/client.go b/objectclient/client.go
index 70b2ff1128bde..a3d3e24d262fb 100644
--- a/objectclient/client.go
+++ b/objectclient/client.go
@@ -110,6 +110,6 @@ func (o *Client) getChunk(ctx context.Context, decodeContext *chunk.DecodeContex
 }
 
 // GetChunks retrieves the specified chunks from the configured backend
-func (o *Client) DeleteChunk(ctx context.Context, chunkID string) error {
+func (o *Client) DeleteChunk(ctx context.Context, userID, chunkID string) error {
 	return o.store.DeleteObject(ctx, chunkID)
 }
diff --git a/storage/metrics.go b/storage/metrics.go
index 28feece364cc1..628c8924517af 100644
--- a/storage/metrics.go
+++ b/storage/metrics.go
@@ -105,6 +105,6 @@ func (c metricsChunkClient) GetChunks(ctx context.Context, chunks []chunk.Chunk)
 	return chks, nil
 }
 
-func (c metricsChunkClient) DeleteChunk(ctx context.Context, chunkID string) error {
-	return c.client.DeleteChunk(ctx, chunkID)
+func (c metricsChunkClient) DeleteChunk(ctx context.Context, userID, chunkID string) error {
+	return c.client.DeleteChunk(ctx, userID, chunkID)
 }
diff --git a/storage_client.go b/storage_client.go
index 84fc12d8cfb89..bb25d4737a3cc 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -35,7 +35,7 @@ type Client interface {
 
 	PutChunks(ctx context.Context, chunks []Chunk) error
 	GetChunks(ctx context.Context, chunks []Chunk) ([]Chunk, error)
-	DeleteChunk(ctx context.Context, chunkID string) error
+	DeleteChunk(ctx context.Context, userID, chunkID string) error
 }
 
 // ObjectAndIndexClient allows optimisations where the same client handles both

From 3c8b348886f7883b6f9a353af27ffcd89e65356b Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 6 Jul 2020 11:25:41 +0200
Subject: [PATCH 552/660] Add -store.max-query-length support to blocks storage
 (#2826)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add -store.max-query-length support to blocks storage

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added PR number to CHANGELOG

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Update pkg/util/validation/limits.go

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <pstibrany@gmail.com>

* Updated doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <pstibrany@gmail.com>
---
 chunk_store_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 24a49d30c7d73..0a4a80f0924e8 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -918,7 +918,7 @@ func TestChunkStoreError(t *testing.T) {
 			query:   "foo",
 			from:    model.Time(0),
 			through: model.Time(0).Add(31 * 24 * time.Hour),
-			err:     "invalid query, length > limit (744h0m0s > 720h0m0s)",
+			err:     "the query time range exceeds the limit (query length: 744h0m0s, limit: 720h0m0s)",
 		},
 		{
 			query:   "{foo=\"bar\"}",

From 29107baaac1db2dbc07cf854349e44408d7ee1e5 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 8 Jul 2020 12:30:21 +0530
Subject: [PATCH 553/660] fix a flaky test in purger (#2841)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/purger_test.go | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/purger/purger_test.go b/purger/purger_test.go
index 5b6fac6b4e062..84039a1f2625f 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -52,8 +52,6 @@ func setupTestDeleteStore(t *testing.T) *DeleteStore {
 }
 
 func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.ObjectClient, *Purger, *prometheus.Registry) {
-	registry := prometheus.NewRegistry()
-
 	deleteStore := setupTestDeleteStore(t)
 
 	chunkStore, err := testutils.SetupTestChunkStore()
@@ -62,13 +60,21 @@ func setupStoresAndPurger(t *testing.T) (*DeleteStore, chunk.Store, chunk.Object
 	storageClient, err := testutils.SetupTestObjectStore()
 	require.NoError(t, err)
 
+	purger, registry := setupPurger(t, deleteStore, chunkStore, storageClient)
+
+	return deleteStore, chunkStore, storageClient, purger, registry
+}
+
+func setupPurger(t *testing.T, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient) (*Purger, *prometheus.Registry) {
+	registry := prometheus.NewRegistry()
+
 	var cfg Config
 	flagext.DefaultValues(&cfg)
 
 	purger, err := NewPurger(cfg, deleteStore, chunkStore, storageClient, registry)
 	require.NoError(t, err)
 
-	return deleteStore, chunkStore, storageClient, purger, registry
+	return purger, registry
 }
 
 func buildChunks(from, through model.Time, batchSize int) ([]chunk.Chunk, error) {
@@ -347,10 +353,7 @@ func TestPurger_Restarts(t *testing.T) {
 	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), purger))
 
 	// create a new purger to check whether it picks up in process delete requests
-	var cfg Config
-	flagext.DefaultValues(&cfg)
-	newPurger, err := NewPurger(cfg, deleteStore, chunkStore, storageClient, prometheus.NewPedanticRegistry())
-	require.NoError(t, err)
+	newPurger, _ := setupPurger(t, deleteStore, chunkStore, storageClient)
 
 	// load in process delete requests by calling Run
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), newPurger))
@@ -385,7 +388,7 @@ func TestPurger_Restarts(t *testing.T) {
 }
 
 func TestPurger_Metrics(t *testing.T) {
-	deleteStore, chunkStore, _, purger, registry := setupStoresAndPurger(t)
+	deleteStore, chunkStore, storageClient, purger, registry := setupStoresAndPurger(t)
 	defer func() {
 		purger.StopAsync()
 		chunkStore.Stop()
@@ -414,9 +417,17 @@ func TestPurger_Metrics(t *testing.T) {
 	require.InDelta(t, float64(2*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
 	require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
 
-	// start loop to process requests
+	// stop the existing purger
+	require.NoError(t, services.StopAndAwaitTerminated(context.Background(), purger))
+
+	// create a new purger
+	purger, registry = setupPurger(t, deleteStore, chunkStore, storageClient)
+
+	// load in process delete requests by starting the service
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), purger))
 
+	defer purger.StopAsync()
+
 	// wait until purger_delete_requests_processed_total starts to show up.
 	test.Poll(t, 2*time.Second, 1, func() interface{} {
 		count, err := testutil.GatherAndCount(registry, "cortex_purger_delete_requests_processed_total")

From e4f9647140665506181b839259862e2dc2d47f70 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Wed, 8 Jul 2020 11:24:13 -0400
Subject: [PATCH 554/660] S3 Config Improvements (#2831)

* established pattern

Signed-off-by: Joe Elliott <number101010@gmail.com>

* First pass http config

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added insecure.  Include bucketn names

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added tests

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Tests pass!

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added url test

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added mixed config test

Signed-off-by: Joe Elliott <number101010@gmail.com>

* added sse support

Signed-off-by: Joe Elliott <number101010@gmail.com>

* PutUserMetadata

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Removed comments

Signed-off-by: Joe Elliott <number101010@gmail.com>

* lint

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Better error contextualization

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Improved error msgs

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added yaml tags and descriptions

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Moved storage client test to where it will work

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Updated docs and changelog

Signed-off-by: Joe Elliott <number101010@gmail.com>

* removed putUserMetaData

Signed-off-by: Joe Elliott <number101010@gmail.com>

* removed capitlization of error msgs

Signed-off-by: Joe Elliott <number101010@gmail.com>

* errors cleanup

Signed-off-by: Joe Elliott <number101010@gmail.com>

* removed obsolete comment

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Cleaned up comments

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Addressed feedback

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 aws/s3_storage_client.go | 157 +++++++++++++++++++++++++++++++++------
 1 file changed, 133 insertions(+), 24 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 9744d3bf0da3b..944ad2dab75a5 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -2,18 +2,22 @@ package aws
 
 import (
 	"context"
+	"crypto/tls"
 	"flag"
-	"fmt"
 	"hash/fnv"
 	"io"
+	"net"
 	"net/http"
 	"strings"
+	"time"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
+	"github.com/aws/aws-sdk-go/aws/credentials"
 	"github.com/aws/aws-sdk-go/aws/session"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
@@ -38,8 +42,23 @@ func init() {
 // S3Config specifies config for storing chunks on AWS S3.
 type S3Config struct {
 	S3               flagext.URLValue
-	BucketNames      string
 	S3ForcePathStyle bool
+
+	BucketNames     string
+	Endpoint        string     `yaml:"endpoint"`
+	Region          string     `yaml:"region"`
+	AccessKeyID     string     `yaml:"access_key_id"`
+	SecretAccessKey string     `yaml:"secret_access_key"`
+	Insecure        bool       `yaml:"insecure"`
+	SSEEncryption   bool       `yaml:"sse_encryption"`
+	HTTPConfig      HTTPConfig `yaml:"http_config"`
+}
+
+// HTTPConfig stores the http.Transport configuration
+type HTTPConfig struct {
+	IdleConnTimeout       time.Duration `yaml:"idle_conn_timeout"`
+	ResponseHeaderTimeout time.Duration `yaml:"response_header_timeout"`
+	InsecureSkipVerify    bool          `yaml:"insecure_skip_verify"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -53,45 +72,134 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 		"If only region is specified as a host, proper endpoint will be deduced. Use inmemory:///<bucket-name> to use a mock in-memory implementation.")
 	f.BoolVar(&cfg.S3ForcePathStyle, prefix+"s3.force-path-style", false, "Set this to `true` to force the request to use path-style addressing.")
 	f.StringVar(&cfg.BucketNames, prefix+"s3.buckets", "", "Comma separated list of bucket names to evenly distribute chunks over. Overrides any buckets specified in s3.url flag")
+
+	f.StringVar(&cfg.Endpoint, prefix+"s3.endpoint", "", "S3 Endpoint to connect to.")
+	f.StringVar(&cfg.Region, prefix+"s3.region", "", "AWS region to use.")
+	f.StringVar(&cfg.AccessKeyID, prefix+"s3.access-key-id", "", "AWS Access Key ID")
+	f.StringVar(&cfg.SecretAccessKey, prefix+"s3.secret-access-key", "", "AWS Secret Access Key")
+	f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "Disable https on s3 connection.")
+	f.BoolVar(&cfg.SSEEncryption, prefix+"s3.sse-encryption", false, "Enable AES256 AWS Server Side Encryption")
+
+	f.DurationVar(&cfg.HTTPConfig.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The maximum amount of time an idle connection will be held open.")
+	f.DurationVar(&cfg.HTTPConfig.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 0, "If non-zero, specifies the amount of time to wait for a server's response headers after fully writing the request.")
+	f.BoolVar(&cfg.HTTPConfig.InsecureSkipVerify, prefix+"s3.http.insecure-skip-verify", false, "Set to false to skip verifying the certificate chain and hostname.")
 }
 
 type S3ObjectClient struct {
-	bucketNames []string
-	S3          s3iface.S3API
-	delimiter   string
+	bucketNames   []string
+	S3            s3iface.S3API
+	delimiter     string
+	sseEncryption *string
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
 func NewS3ObjectClient(cfg S3Config, delimiter string) (*S3ObjectClient, error) {
-	if cfg.S3.URL == nil {
-		return nil, fmt.Errorf("no URL specified for S3")
-	}
-	s3Config, err := awscommon.ConfigFromURL(cfg.S3.URL)
+	s3Config, bucketNames, err := buildS3Config(cfg)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "failed to build s3 config")
 	}
 
-	s3Config = s3Config.WithS3ForcePathStyle(cfg.S3ForcePathStyle) // support for Path Style S3 url if has the flag
-
-	s3Config = s3Config.WithMaxRetries(0) // We do our own retries, so we can monitor them
-	s3Config = s3Config.WithHTTPClient(&http.Client{Transport: defaultTransport})
 	sess, err := session.NewSession(s3Config)
 	if err != nil {
-		return nil, err
+		return nil, errors.Wrap(err, "failed to create new s3 session")
 	}
+
 	s3Client := s3.New(sess)
-	bucketNames := []string{strings.TrimPrefix(cfg.S3.URL.Path, "/")}
-	if cfg.BucketNames != "" {
-		bucketNames = strings.Split(cfg.BucketNames, ",") // comma separated list of bucket names
+
+	var sseEncryption *string
+	if cfg.SSEEncryption {
+		sseEncryption = aws.String("AES256")
 	}
+
 	client := S3ObjectClient{
-		S3:          s3Client,
-		bucketNames: bucketNames,
-		delimiter:   delimiter,
+		S3:            s3Client,
+		bucketNames:   bucketNames,
+		delimiter:     delimiter,
+		sseEncryption: sseEncryption,
 	}
 	return &client, nil
 }
 
+func buildS3Config(cfg S3Config) (*aws.Config, []string, error) {
+	var s3Config *aws.Config
+	var err error
+
+	// if an s3 url is passed use it to initialize the s3Config and then override with any additional params
+	if cfg.S3.URL != nil {
+		s3Config, err = awscommon.ConfigFromURL(cfg.S3.URL)
+		if err != nil {
+			return nil, nil, err
+		}
+	} else {
+		s3Config = &aws.Config{}
+		s3Config = s3Config.WithRegion("dummy")
+		s3Config = s3Config.WithCredentials(credentials.AnonymousCredentials)
+	}
+
+	s3Config = s3Config.WithMaxRetries(0)                          // We do our own retries, so we can monitor them
+	s3Config = s3Config.WithS3ForcePathStyle(cfg.S3ForcePathStyle) // support for Path Style S3 url if has the flag
+
+	if cfg.Endpoint != "" {
+		s3Config = s3Config.WithEndpoint(cfg.Endpoint)
+	}
+
+	if cfg.Insecure {
+		s3Config = s3Config.WithDisableSSL(true)
+	}
+
+	if cfg.Region != "" {
+		s3Config = s3Config.WithRegion(cfg.Region)
+	}
+
+	if cfg.AccessKeyID != "" && cfg.SecretAccessKey == "" ||
+		cfg.AccessKeyID == "" && cfg.SecretAccessKey != "" {
+		return nil, nil, errors.New("must supply both an Access Key ID and Secret Access Key or neither")
+	}
+
+	if cfg.AccessKeyID != "" && cfg.SecretAccessKey != "" {
+		creds := credentials.NewStaticCredentials(cfg.AccessKeyID, cfg.SecretAccessKey, "")
+		s3Config = s3Config.WithCredentials(creds)
+	}
+
+	// While extending S3 configuration this http config was copied in order to
+	// to maintain backwards compatibility with previous versions of Cortex while providing
+	// more flexible configuration of the http client
+	// https://github.com/weaveworks/common/blob/4b1847531bc94f54ce5cf210a771b2a86cd34118/aws/config.go#L23
+	s3Config = s3Config.WithHTTPClient(&http.Client{
+		Transport: &http.Transport{
+			Proxy: http.ProxyFromEnvironment,
+			DialContext: (&net.Dialer{
+				Timeout:   30 * time.Second,
+				KeepAlive: 30 * time.Second,
+				DualStack: true,
+			}).DialContext,
+			MaxIdleConns:          100,
+			IdleConnTimeout:       cfg.HTTPConfig.IdleConnTimeout,
+			MaxIdleConnsPerHost:   100,
+			TLSHandshakeTimeout:   3 * time.Second,
+			ExpectContinueTimeout: 1 * time.Second,
+			ResponseHeaderTimeout: time.Duration(cfg.HTTPConfig.ResponseHeaderTimeout),
+			TLSClientConfig:       &tls.Config{InsecureSkipVerify: cfg.HTTPConfig.InsecureSkipVerify},
+		},
+	})
+
+	// bucketnames
+	var bucketNames []string
+	if cfg.S3.URL != nil {
+		bucketNames = []string{strings.TrimPrefix(cfg.S3.URL.Path, "/")}
+	}
+
+	if cfg.BucketNames != "" {
+		bucketNames = strings.Split(cfg.BucketNames, ",") // comma separated list of bucket names
+	}
+
+	if len(bucketNames) == 0 {
+		return nil, nil, errors.New("at least one bucket name must be specified")
+	}
+
+	return s3Config, bucketNames, nil
+}
+
 // Stop fulfills the chunk.ObjectClient interface
 func (a *S3ObjectClient) Stop() {}
 
@@ -160,9 +268,10 @@ func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 func (a *S3ObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
 	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
-			Body:   object,
-			Bucket: aws.String(a.bucketFromKey(objectKey)),
-			Key:    aws.String(objectKey),
+			Body:                 object,
+			Bucket:               aws.String(a.bucketFromKey(objectKey)),
+			Key:                  aws.String(objectKey),
+			ServerSideEncryption: a.sseEncryption,
 		})
 		return err
 	})

From 8344fdc5a8dcc176e1a66debf667c314f724091c Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 9 Jul 2020 20:58:25 +0530
Subject: [PATCH 555/660] fix a race in purger managing in process delete
 requests (#2817)

* fix a race in purger managing in process delete requests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix a problem in taking address of delete requests in loops

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nit

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* nit suggested in PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/purger.go      | 108 +++++++++++++++++++++++++++---------------
 purger/purger_test.go |   5 +-
 2 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index 2e966a659a971..5d2df4a4f60da 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -120,8 +120,7 @@ type Purger struct {
 
 	// we would only allow processing of singe delete request at a time since delete requests touching same chunks could change the chunk IDs of partially deleted chunks
 	// and break the purge plan for other requests
-	inProcessRequests      map[string]DeleteRequest
-	inProcessRequestIDsMtx sync.RWMutex
+	inProcessRequests *inProcessRequestsCollection
 
 	// We do not want to limit pulling new delete requests to a fixed interval which otherwise would limit number of delete requests we process per user.
 	// While loading delete requests if we find more requests from user pending to be processed, we just set their id in usersWithPendingRequests and
@@ -149,7 +148,7 @@ func NewPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, sto
 		pullNewRequestsChan:      make(chan struct{}, 1),
 		executePlansChan:         make(chan deleteRequestWithLogger, 50),
 		workerJobChan:            make(chan workerJob, 50),
-		inProcessRequests:        map[string]DeleteRequest{},
+		inProcessRequests:        newInProcessRequestsCollection(),
 		usersWithPendingRequests: map[string]struct{}{},
 		pendingPlansCount:        map[string]int{},
 	}
@@ -231,9 +230,7 @@ func (p *Purger) workerJobCleanup(job workerJob) {
 		delete(p.pendingPlansCount, job.deleteRequestID)
 		p.pendingPlansCountMtx.Unlock()
 
-		p.inProcessRequestIDsMtx.Lock()
-		delete(p.inProcessRequests, job.userID)
-		p.inProcessRequestIDsMtx.Unlock()
+		p.inProcessRequests.remove(job.userID)
 
 		// request loading of more delete request if
 		// - user has more pending requests and
@@ -367,12 +364,13 @@ func (p *Purger) loadInprocessDeleteRequests() error {
 		return err
 	}
 
-	for _, deleteRequest := range requestsWithBuildingPlanStatus {
+	for i := range requestsWithBuildingPlanStatus {
+		deleteRequest := requestsWithBuildingPlanStatus[i]
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
 
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status building plan")
 
-		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
 		err := p.buildDeletePlan(req)
 		if err != nil {
 			p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
@@ -389,11 +387,12 @@ func (p *Purger) loadInprocessDeleteRequests() error {
 		return err
 	}
 
-	for _, deleteRequest := range requestsWithDeletingStatus {
+	for i := range requestsWithDeletingStatus {
+		deleteRequest := requestsWithDeletingStatus[i]
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 		level.Info(req.logger).Log("msg", "loaded in process delete requests with status deleting")
 
-		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
+		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
 		p.executePlansChan <- req
 	}
 
@@ -408,22 +407,21 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 		return err
 	}
 
-	p.inProcessRequestIDsMtx.RLock()
-	pendingDeleteRequestsCount := len(p.inProcessRequests)
-	p.inProcessRequestIDsMtx.RUnlock()
-
+	pendingDeleteRequestsCount := p.inProcessRequests.len()
 	now := model.Now()
 	oldestPendingRequestCreatedAt := model.Time(0)
 
 	// requests which are still being processed are also considered pending
 	if pendingDeleteRequestsCount != 0 {
-		oldestInProcessRequest := p.getOldestInProcessRequest()
+		oldestInProcessRequest := p.inProcessRequests.getOldest()
 		if oldestInProcessRequest != nil {
 			oldestPendingRequestCreatedAt = oldestInProcessRequest.CreatedAt
 		}
 	}
 
-	for _, deleteRequest := range deleteRequests {
+	for i := range deleteRequests {
+		deleteRequest := deleteRequests[i]
+
 		// adding an extra minute here to avoid a race between cancellation of request and picking of the request for processing
 		if deleteRequest.CreatedAt.Add(p.cfg.DeleteRequestCancelPeriod).Add(time.Minute).After(model.Now()) {
 			continue
@@ -434,11 +432,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 			oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
 		}
 
-		p.inProcessRequestIDsMtx.RLock()
-		inprocessDeleteRequest, ok := p.inProcessRequests[deleteRequest.UserID]
-		p.inProcessRequestIDsMtx.RUnlock()
-
-		if ok {
+		if inprocessDeleteRequest := p.inProcessRequests.get(deleteRequest.UserID); inprocessDeleteRequest != nil {
 			p.usersWithPendingRequestsMtx.Lock()
 			p.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
 			p.usersWithPendingRequestsMtx.Unlock()
@@ -454,10 +448,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 			return err
 		}
 
-		p.inProcessRequestIDsMtx.Lock()
-		p.inProcessRequests[deleteRequest.UserID] = deleteRequest
-		p.inProcessRequestIDsMtx.Unlock()
-
+		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 
 		level.Info(req.logger).Log("msg", "building plan for a new delete request")
@@ -599,20 +590,6 @@ func (p *Purger) removeDeletePlan(ctx context.Context, userID, requestID string,
 	return p.objectClient.DeleteObject(ctx, objectKey)
 }
 
-func (p *Purger) getOldestInProcessRequest() *DeleteRequest {
-	p.inProcessRequestIDsMtx.RLock()
-	defer p.inProcessRequestIDsMtx.RUnlock()
-
-	var oldestRequest *DeleteRequest
-	for _, request := range p.inProcessRequests {
-		if oldestRequest == nil || request.CreatedAt.Before(oldestRequest.CreatedAt) {
-			oldestRequest = &request
-		}
-	}
-
-	return oldestRequest
-}
-
 // returns interval per plan
 func splitByDay(start, end model.Time) []model.Interval {
 	numOfDays := numPlans(start, end)
@@ -714,3 +691,56 @@ func makeDeleteRequestWithLogger(deleteRequest DeleteRequest, l log.Logger) dele
 	logger := log.With(l, "user_id", deleteRequest.UserID, "request_id", deleteRequest.RequestID)
 	return deleteRequestWithLogger{deleteRequest, logger}
 }
+
+// inProcessRequestsCollection stores DeleteRequests which are in process by each user.
+// Currently we only allow processing of one delete request per user so it stores single DeleteRequest per user.
+type inProcessRequestsCollection struct {
+	requests map[string]*DeleteRequest
+	mtx      sync.RWMutex
+}
+
+func newInProcessRequestsCollection() *inProcessRequestsCollection {
+	return &inProcessRequestsCollection{requests: map[string]*DeleteRequest{}}
+}
+
+func (i *inProcessRequestsCollection) set(userID string, request *DeleteRequest) {
+	i.mtx.Lock()
+	defer i.mtx.Unlock()
+
+	i.requests[userID] = request
+}
+
+func (i *inProcessRequestsCollection) get(userID string) *DeleteRequest {
+	i.mtx.RLock()
+	defer i.mtx.RUnlock()
+
+	return i.requests[userID]
+}
+
+func (i *inProcessRequestsCollection) remove(userID string) {
+	i.mtx.Lock()
+	defer i.mtx.Unlock()
+
+	delete(i.requests, userID)
+}
+
+func (i *inProcessRequestsCollection) len() int {
+	i.mtx.RLock()
+	defer i.mtx.RUnlock()
+
+	return len(i.requests)
+}
+
+func (i *inProcessRequestsCollection) getOldest() *DeleteRequest {
+	i.mtx.RLock()
+	defer i.mtx.RUnlock()
+
+	var oldestRequest *DeleteRequest
+	for _, request := range i.requests {
+		if oldestRequest == nil || request.CreatedAt.Before(oldestRequest.CreatedAt) {
+			oldestRequest = request
+		}
+	}
+
+	return oldestRequest
+}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 84039a1f2625f..55cc10b3bb62f 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -364,10 +364,7 @@ func TestPurger_Restarts(t *testing.T) {
 	defer newPurger.StopAsync()
 
 	test.Poll(t, time.Minute, 0, func() interface{} {
-		newPurger.inProcessRequestIDsMtx.RLock()
-		defer newPurger.inProcessRequestIDsMtx.RUnlock()
-
-		return len(newPurger.inProcessRequests)
+		return newPurger.inProcessRequests.len()
 	})
 
 	// check whether data got deleted from the store since delete request has been processed

From b285805453fbca73f44dfbcb080119a828257137 Mon Sep 17 00:00:00 2001
From: Julien Pivotto <roidelapluie@gmail.com>
Date: Thu, 16 Jul 2020 16:20:18 +0200
Subject: [PATCH 556/660] Fix up;oads => uploads typo (#2888)

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
---
 azure/blob_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 2da1aa2683b05..5ca188534fbe2 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -46,7 +46,7 @@ func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagS
 	f.Var(&c.AccountKey, prefix+"azure.account-key", "The Microsoft Azure account key to use.")
 	f.DurationVar(&c.RequestTimeout, prefix+"azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
 	f.IntVar(&c.DownloadBufferSize, prefix+"azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
-	f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for up;oads (default is 256KB)")
+	f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for uploads (default is 256KB)")
 	f.IntVar(&c.UploadBufferCount, prefix+"azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk. (defaults to 1)")
 	f.IntVar(&c.MaxRetries, prefix+"azure.max-retries", 5, "Number of retries for a request which times out.")
 	f.DurationVar(&c.MinRetryDelay, prefix+"azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.")

From 6e3cbe2bcd42924c3c8bf0f654d3b45d535d9ef0 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 20 Jul 2020 08:57:43 +0200
Subject: [PATCH 557/660] Little touch ups to documentation (#2892)

* Little touch ups to documentation

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 azure/blob_storage_client.go | 10 +++++-----
 cassandra/storage_client.go  | 10 +++++-----
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 5ca188534fbe2..3e55032ee9b66 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -41,13 +41,13 @@ func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	f.StringVar(&c.ContainerName, prefix+"azure.container-name", "cortex", "Name of the blob container used to store chunks. Defaults to `cortex`. This container must be created before running cortex.")
+	f.StringVar(&c.ContainerName, prefix+"azure.container-name", "cortex", "Name of the blob container used to store chunks. This container must be created before running cortex.")
 	f.StringVar(&c.AccountName, prefix+"azure.account-name", "", "The Microsoft Azure account name to be used")
 	f.Var(&c.AccountKey, prefix+"azure.account-key", "The Microsoft Azure account key to use.")
-	f.DurationVar(&c.RequestTimeout, prefix+"azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage. Defaults to 30 seconds.")
-	f.IntVar(&c.DownloadBufferSize, prefix+"azure.download-buffer-size", 512000, "Preallocated buffer size for downloads (default is 512KB)")
-	f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for uploads (default is 256KB)")
-	f.IntVar(&c.UploadBufferCount, prefix+"azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk. (defaults to 1)")
+	f.DurationVar(&c.RequestTimeout, prefix+"azure.request-timeout", 30*time.Second, "Timeout for requests made against azure blob storage.")
+	f.IntVar(&c.DownloadBufferSize, prefix+"azure.download-buffer-size", 512000, "Preallocated buffer size for downloads.")
+	f.IntVar(&c.UploadBufferSize, prefix+"azure.upload-buffer-size", 256000, "Preallocated buffer size for uploads.")
+	f.IntVar(&c.UploadBufferCount, prefix+"azure.download-buffer-count", 1, "Number of buffers used to used to upload a chunk.")
 	f.IntVar(&c.MaxRetries, prefix+"azure.max-retries", 5, "Number of retries for a request which times out.")
 	f.DurationVar(&c.MinRetryDelay, prefix+"azure.min-retry-delay", 10*time.Millisecond, "Minimum time to wait before retrying a request.")
 	f.DurationVar(&c.MaxRetryDelay, prefix+"azure.max-retry-delay", 500*time.Millisecond, "Maximum time to wait before retrying a request.")
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index c4a0fa56f2950..7add0f5d70f4c 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -70,13 +70,13 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.Timeout, "cassandra.timeout", 2*time.Second, "Timeout when connecting to cassandra.")
 	f.DurationVar(&cfg.ConnectTimeout, "cassandra.connect-timeout", 5*time.Second, "Initial connection timeout, used during initial dial to server.")
 	f.DurationVar(&cfg.ReconnectInterval, "cassandra.reconnent-interval", 1*time.Second, "Interval to retry connecting to cassandra nodes marked as DOWN.")
-	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. (Default is 0: no retries)")
-	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request. (Default = 100ms)")
-	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request. (Default = 10s)")
-	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. (Default is 0: no limit)")
+	f.IntVar(&cfg.Retries, "cassandra.max-retries", 0, "Number of retries to perform on a request. Set to 0 to disable retries.")
+	f.DurationVar(&cfg.MinBackoff, "cassandra.retry-min-backoff", 100*time.Millisecond, "Minimum time to wait before retrying a failed request.")
+	f.DurationVar(&cfg.MaxBackoff, "cassandra.retry-max-backoff", 10*time.Second, "Maximum time to wait before retrying a failed request.")
+	f.IntVar(&cfg.QueryConcurrency, "cassandra.query-concurrency", 0, "Limit number of concurrent queries to Cassandra. Set to 0 to disable the limit.")
 	f.IntVar(&cfg.NumConnections, "cassandra.num-connections", 2, "Number of TCP connections per host.")
 	f.BoolVar(&cfg.ConvictHosts, "cassandra.convict-hosts-on-failure", true, "Convict hosts of being down on failure.")
-	f.StringVar(&cfg.TableOptions, "cassandra.table-options", "", "Table options used to create index or chunk tables. This value is used as plain text in the table `WITH` like this, \"CREATE TABLE <generated_by_cortex> (...) WITH <cassandra.table-options>\". For details, see https://cortexmetrics.io/docs/production/cassandra. (Default = \"\": use default table options of your Cassandra)")
+	f.StringVar(&cfg.TableOptions, "cassandra.table-options", "", "Table options used to create index or chunk tables. This value is used as plain text in the table `WITH` like this, \"CREATE TABLE <generated_by_cortex> (...) WITH <cassandra.table-options>\". For details, see https://cortexmetrics.io/docs/production/cassandra. By default it will use the default table options of your Cassandra cluster.")
 }
 
 func (cfg *Config) Validate() error {

From a673f740512a58c74ecf1f41b8a4e943f8641abd Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 21 Jul 2020 19:55:46 +0530
Subject: [PATCH 558/660] make fs object client windows compatible and add a
 method to get pathseparator from object clients (#2865)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/s3_storage_client.go         |  4 ++++
 azure/blob_storage_client.go     |  4 ++++
 gcp/gcs_object_client.go         |  4 ++++
 inmemory_storage_client.go       |  4 ++++
 local/fs_object_client.go        | 12 +++++++++---
 local/fs_object_client_test.go   | 22 +++++++++++-----------
 openstack/swift_object_client.go |  4 ++++
 storage_client.go                |  1 +
 8 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 944ad2dab75a5..dd0e72bf8291c 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -325,3 +325,7 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 
 	return storageObjects, commonPrefixes, nil
 }
+
+func (a *S3ObjectClient) PathSeparator() string {
+	return a.delimiter
+}
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 3e55032ee9b66..45d34c2748e84 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -210,3 +210,7 @@ func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error {
 	_, err = blockBlobURL.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
 	return err
 }
+
+func (b *BlobStorage) PathSeparator() string {
+	return b.delimiter
+}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 2029e9e11b26c..fd35d0fa743e7 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -155,3 +155,7 @@ func (s *GCSObjectClient) DeleteObject(ctx context.Context, objectKey string) er
 
 	return nil
 }
+
+func (s *GCSObjectClient) PathSeparator() string {
+	return s.delimiter
+}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 1a7ae5c2dced1..fb420b0215964 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -390,6 +390,10 @@ func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject,
 	return storageObjects, []StorageCommonPrefix{}, nil
 }
 
+func (m *MockStorage) PathSeparator() string {
+	return DirDelim
+}
+
 type mockWriteBatch struct {
 	inserts []struct {
 		tableName, hashValue string
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index dc308d41fb396..340a1b8016d7b 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -34,7 +34,8 @@ func (cfg *FSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 
 // FSObjectClient holds config for filesystem as object store
 type FSObjectClient struct {
-	cfg FSConfig
+	cfg           FSConfig
+	pathSeparator string
 }
 
 // NewFSObjectClient makes a chunk.Client which stores chunks as files in the local filesystem.
@@ -48,7 +49,8 @@ func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 	}
 
 	return &FSObjectClient{
-		cfg: cfg,
+		cfg:           cfg,
+		pathSeparator: string(os.PathSeparator),
 	}, nil
 }
 
@@ -124,7 +126,7 @@ func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 
 			// add the directory only if it is not empty
 			if !empty {
-				commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+chunk.DirDelim))
+				commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+f.pathSeparator))
 			}
 			continue
 		}
@@ -173,6 +175,10 @@ func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) e
 	})
 }
 
+func (f *FSObjectClient) PathSeparator() string {
+	return f.pathSeparator
+}
+
 // copied from https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/objstore/filesystem/filesystem.go#L181
 func isDirEmpty(name string) (ok bool, err error) {
 	f, err := os.Open(name)
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
index 630db0f84d68e..06573d3010e5a 100644
--- a/local/fs_object_client_test.go
+++ b/local/fs_object_client_test.go
@@ -75,12 +75,12 @@ func TestFSObjectClient_List(t *testing.T) {
 	}()
 
 	foldersWithFiles := make(map[string][]string)
-	foldersWithFiles["folder1/"] = []string{"file1", "file2"}
-	foldersWithFiles["folder2/"] = []string{"file3", "file4", "file5"}
+	foldersWithFiles["folder1"] = []string{"file1", "file2"}
+	foldersWithFiles["folder2"] = []string{"file3", "file4", "file5"}
 
 	for folder, files := range foldersWithFiles {
 		for _, filename := range files {
-			err := bucketClient.PutObject(context.Background(), folder+filename, bytes.NewReader([]byte(filename)))
+			err := bucketClient.PutObject(context.Background(), filepath.Join(folder, filename), bytes.NewReader([]byte(filename)))
 			require.NoError(t, err)
 		}
 	}
@@ -105,7 +105,7 @@ func TestFSObjectClient_List(t *testing.T) {
 
 	require.Len(t, commonPrefixes, len(foldersWithFiles))
 	for _, commonPrefix := range commonPrefixes {
-		_, ok := foldersWithFiles[string(commonPrefix)]
+		_, ok := foldersWithFiles[string(commonPrefix)[:len(commonPrefix)-len(bucketClient.PathSeparator())]]
 		require.True(t, ok)
 	}
 
@@ -115,7 +115,7 @@ func TestFSObjectClient_List(t *testing.T) {
 
 		require.Len(t, storageObjects, len(files))
 		for i := range storageObjects {
-			require.Equal(t, storageObjects[i].Key, folder+files[i])
+			require.Equal(t, storageObjects[i].Key, filepath.Join(folder, files[i]))
 		}
 
 		require.Len(t, commonPrefixes, 0)
@@ -136,11 +136,11 @@ func TestFSObjectClient_DeleteObject(t *testing.T) {
 	}()
 
 	foldersWithFiles := make(map[string][]string)
-	foldersWithFiles["folder1/"] = []string{"file1", "file2"}
+	foldersWithFiles["folder1"] = []string{"file1", "file2"}
 
 	for folder, files := range foldersWithFiles {
 		for _, filename := range files {
-			err := bucketClient.PutObject(context.Background(), folder+filename, bytes.NewReader([]byte(filename)))
+			err := bucketClient.PutObject(context.Background(), filepath.Join(folder, filename), bytes.NewReader([]byte(filename)))
 			require.NoError(t, err)
 		}
 	}
@@ -151,15 +151,15 @@ func TestFSObjectClient_DeleteObject(t *testing.T) {
 	require.Len(t, commonPrefixes, len(foldersWithFiles))
 
 	// let us delete file1 from folder1 and check that file1 is gone but folder1 with file2 is still there
-	require.NoError(t, bucketClient.DeleteObject(context.Background(), "folder1/file1"))
-	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1/file1"))
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), filepath.Join("folder1", "file1")))
+	_, err = os.Stat(filepath.Join(fsObjectsDir, filepath.Join("folder1", "file1")))
 	require.True(t, os.IsNotExist(err))
 
-	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1/file2"))
+	_, err = os.Stat(filepath.Join(fsObjectsDir, filepath.Join("folder1", "file2")))
 	require.NoError(t, err)
 
 	// let us delete second file as well and check that folder1 also got removed
-	require.NoError(t, bucketClient.DeleteObject(context.Background(), "folder1/file2"))
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), filepath.Join("folder1", "file2")))
 	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1"))
 	require.True(t, os.IsNotExist(err))
 
diff --git a/openstack/swift_object_client.go b/openstack/swift_object_client.go
index a30dfbbbfd70b..5e19b87eb09e4 100644
--- a/openstack/swift_object_client.go
+++ b/openstack/swift_object_client.go
@@ -176,3 +176,7 @@ func (s *SwiftObjectClient) DeleteObject(ctx context.Context, objectKey string)
 	}
 	return err
 }
+
+func (s *SwiftObjectClient) PathSeparator() string {
+	return string(s.delimiter)
+}
diff --git a/storage_client.go b/storage_client.go
index bb25d4737a3cc..16470639ac1ce 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -67,6 +67,7 @@ type ObjectClient interface {
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
 	List(ctx context.Context, prefix string) ([]StorageObject, []StorageCommonPrefix, error)
 	DeleteObject(ctx context.Context, objectKey string) error
+	PathSeparator() string
 	Stop()
 }
 

From 0ed10416a70c06f01bcd498db856edd0280311f2 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Tue, 21 Jul 2020 13:27:46 -0400
Subject: [PATCH 559/660] Allow for Custom Transport on Ruler s3 Client (#2891)

* stuff

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added Middleware test

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Better naming.  Split New methods to not break existing calls

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Added top level rule storage module

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Fixed test

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Don't start ruler or rulerstorage if not configured

Signed-off-by: Joe Elliott <number101010@gmail.com>

* Made injectRequestMiddleware a configuration option

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 aws/s3_storage_client.go      | 42 ++++++++++++-------
 aws/s3_storage_client_test.go | 77 +++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 15 deletions(-)
 create mode 100644 aws/s3_storage_client_test.go

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index dd0e72bf8291c..eb35948f8abb6 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -35,6 +35,10 @@ var (
 	}, []string{"operation", "status_code"}))
 )
 
+// InjectRequestMiddleware gives users of this client the ability to make arbitrary
+// changes to outgoing requests.
+type InjectRequestMiddleware func(next http.RoundTripper) http.RoundTripper
+
 func init() {
 	s3RequestDuration.Register()
 }
@@ -52,6 +56,8 @@ type S3Config struct {
 	Insecure        bool       `yaml:"insecure"`
 	SSEEncryption   bool       `yaml:"sse_encryption"`
 	HTTPConfig      HTTPConfig `yaml:"http_config"`
+
+	Inject InjectRequestMiddleware `yaml:"-"`
 }
 
 // HTTPConfig stores the http.Transport configuration
@@ -165,22 +171,28 @@ func buildS3Config(cfg S3Config) (*aws.Config, []string, error) {
 	// to maintain backwards compatibility with previous versions of Cortex while providing
 	// more flexible configuration of the http client
 	// https://github.com/weaveworks/common/blob/4b1847531bc94f54ce5cf210a771b2a86cd34118/aws/config.go#L23
+	transport := http.RoundTripper(&http.Transport{
+		Proxy: http.ProxyFromEnvironment,
+		DialContext: (&net.Dialer{
+			Timeout:   30 * time.Second,
+			KeepAlive: 30 * time.Second,
+			DualStack: true,
+		}).DialContext,
+		MaxIdleConns:          100,
+		IdleConnTimeout:       cfg.HTTPConfig.IdleConnTimeout,
+		MaxIdleConnsPerHost:   100,
+		TLSHandshakeTimeout:   3 * time.Second,
+		ExpectContinueTimeout: 1 * time.Second,
+		ResponseHeaderTimeout: time.Duration(cfg.HTTPConfig.ResponseHeaderTimeout),
+		TLSClientConfig:       &tls.Config{InsecureSkipVerify: cfg.HTTPConfig.InsecureSkipVerify},
+	})
+
+	if cfg.Inject != nil {
+		transport = cfg.Inject(transport)
+	}
+
 	s3Config = s3Config.WithHTTPClient(&http.Client{
-		Transport: &http.Transport{
-			Proxy: http.ProxyFromEnvironment,
-			DialContext: (&net.Dialer{
-				Timeout:   30 * time.Second,
-				KeepAlive: 30 * time.Second,
-				DualStack: true,
-			}).DialContext,
-			MaxIdleConns:          100,
-			IdleConnTimeout:       cfg.HTTPConfig.IdleConnTimeout,
-			MaxIdleConnsPerHost:   100,
-			TLSHandshakeTimeout:   3 * time.Second,
-			ExpectContinueTimeout: 1 * time.Second,
-			ResponseHeaderTimeout: time.Duration(cfg.HTTPConfig.ResponseHeaderTimeout),
-			TLSClientConfig:       &tls.Config{InsecureSkipVerify: cfg.HTTPConfig.InsecureSkipVerify},
-		},
+		Transport: transport,
 	})
 
 	// bucketnames
diff --git a/aws/s3_storage_client_test.go b/aws/s3_storage_client_test.go
new file mode 100644
index 0000000000000..e5bfd5a9a4a99
--- /dev/null
+++ b/aws/s3_storage_client_test.go
@@ -0,0 +1,77 @@
+package aws
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+type RoundTripperFunc func(*http.Request) (*http.Response, error)
+
+func (f RoundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+	return f(req)
+}
+
+func TestRequestMiddleware(t *testing.T) {
+	ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		fmt.Fprintln(w, r.Header.Get("echo-me"))
+	}))
+	defer ts.Close()
+
+	cfg := S3Config{
+		Endpoint:         ts.URL,
+		BucketNames:      "buck-o",
+		S3ForcePathStyle: true,
+		Insecure:         true,
+		AccessKeyID:      "key",
+		SecretAccessKey:  "secret",
+	}
+
+	tests := []struct {
+		name     string
+		fn       InjectRequestMiddleware
+		expected string
+	}{
+		{
+			name:     "Test Nil",
+			fn:       nil,
+			expected: "",
+		},
+		{
+			name: "Test Header Injection",
+			fn: func(next http.RoundTripper) http.RoundTripper {
+				return RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
+					req.Header["echo-me"] = []string{"blerg"}
+					return next.RoundTrip(req)
+				})
+			},
+			expected: "blerg",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			cfg.Inject = tt.fn
+			client, err := NewS3ObjectClient(cfg, "/")
+			require.NoError(t, err)
+
+			readCloser, err := client.GetObject(context.Background(), "key")
+			require.NoError(t, err)
+
+			buffer := make([]byte, 100)
+			_, err = readCloser.Read(buffer)
+			if err != io.EOF {
+				require.NoError(t, err)
+			}
+
+			assert.Equal(t, tt.expected, strings.Trim(string(buffer), "\n\x00"))
+		})
+	}
+}

From 2c6a44ab6c9f163def44154ec4a70f91e55bd654 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 22 Jul 2020 20:35:13 +0530
Subject: [PATCH 560/660] fix calling of stop method on series store (#2911)

* fix calling of stop method on series store

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* do not embed fetcher in seriesStore to avoid errors

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk_store.go      | 36 ++++++++++++++++++------------------
 chunk_store_test.go |  4 ++--
 series_store.go     | 12 ++++++------
 3 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index eadfa8121ee06..e802e667598a1 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -90,11 +90,11 @@ func (cfg *StoreConfig) Validate() error {
 type baseStore struct {
 	cfg StoreConfig
 
-	index  IndexClient
-	chunks Client
-	schema BaseSchema
-	limits StoreLimits
-	*Fetcher
+	index   IndexClient
+	chunks  Client
+	schema  BaseSchema
+	limits  StoreLimits
+	fetcher *Fetcher
 }
 
 func newBaseStore(cfg StoreConfig, schema BaseSchema, index IndexClient, chunks Client, limits StoreLimits, chunksCache cache.Cache) (baseStore, error) {
@@ -109,10 +109,17 @@ func newBaseStore(cfg StoreConfig, schema BaseSchema, index IndexClient, chunks
 		chunks:  chunks,
 		schema:  schema,
 		limits:  limits,
-		Fetcher: fetcher,
+		fetcher: fetcher,
 	}, nil
 }
 
+// Stop any background goroutines (ie in the cache.)
+func (c *baseStore) Stop() {
+	c.fetcher.storage.Stop()
+	c.fetcher.Stop()
+	c.index.Stop()
+}
+
 // store implements Store
 type store struct {
 	baseStore
@@ -131,13 +138,6 @@ func newStore(cfg StoreConfig, schema StoreSchema, index IndexClient, chunks Cli
 	}, nil
 }
 
-// Stop any background goroutines (ie in the cache.)
-func (c *store) Stop() {
-	c.storage.Stop()
-	c.Fetcher.Stop()
-	c.index.Stop()
-}
-
 // Put implements ChunkStore
 func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 	for _, chunk := range chunks {
@@ -153,12 +153,12 @@ func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chun
 	log, ctx := spanlogger.New(ctx, "ChunkStore.PutOne")
 	chunks := []Chunk{chunk}
 
-	err := c.storage.PutChunks(ctx, chunks)
+	err := c.fetcher.storage.PutChunks(ctx, chunks)
 	if err != nil {
 		return err
 	}
 
-	if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
+	if cacheErr := c.fetcher.writeBackCache(ctx, chunks); cacheErr != nil {
 		level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
 	}
 
@@ -278,7 +278,7 @@ func (c *store) LabelNamesForMetricName(ctx context.Context, userID string, from
 	level.Debug(log).Log("msg", "Chunks post filtering", "chunks", len(chunks))
 
 	// Now fetch the actual chunk data from Memcache / S3
-	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	allChunks, err := c.fetcher.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		level.Error(log).Log("msg", "FetchChunks", "err", err)
 		return nil, err
@@ -370,7 +370,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, userID string, from, th
 
 	// Now fetch the actual chunk data from Memcache / S3
 	keys := keysFromChunks(filtered)
-	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	allChunks, err := c.fetcher.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		return nil, err
 	}
@@ -648,7 +648,7 @@ func (c *baseStore) reboundChunk(ctx context.Context, userID, chunkID string, pa
 		return ErrParialDeleteChunkNoOverlap
 	}
 
-	chunks, err := c.Fetcher.FetchChunks(ctx, []Chunk{chunk}, []string{chunkID})
+	chunks, err := c.fetcher.FetchChunks(ctx, []Chunk{chunk}, []string{chunkID})
 	if err != nil {
 		if err == ErrStorageObjectNotFound {
 			return nil
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 0a4a80f0924e8..536869f63ad44 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -865,7 +865,7 @@ func TestIndexCachingWorks(t *testing.T) {
 	store := newTestChunkStoreConfig(t, "v9", storeCfg)
 	defer store.Stop()
 
-	storage := store.(CompositeStore).stores[0].Store.(*seriesStore).storage.(*MockStorage)
+	storage := store.(CompositeStore).stores[0].Store.(*seriesStore).fetcher.storage.(*MockStorage)
 
 	fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
 	err := fooChunk1.Encode()
@@ -1326,7 +1326,7 @@ func TestDisableIndexDeduplication(t *testing.T) {
 			store := newTestChunkStoreConfig(t, "v9", storeCfg)
 			defer store.Stop()
 
-			storage := store.(CompositeStore).stores[0].Store.(*seriesStore).storage.(*MockStorage)
+			storage := store.(CompositeStore).stores[0].Store.(*seriesStore).fetcher.storage.(*MockStorage)
 
 			fooChunk1 := dummyChunkFor(model.Time(0).Add(15*time.Second), metric)
 			err := fooChunk1.Encode()
diff --git a/series_store.go b/series_store.go
index 4792ca1ae2e00..3d3373c5dfd4f 100644
--- a/series_store.go
+++ b/series_store.go
@@ -187,7 +187,7 @@ func (c *seriesStore) GetChunkRefs(ctx context.Context, userID string, from, thr
 		return [][]Chunk{}, []*Fetcher{}, nil
 	}
 
-	return [][]Chunk{chunks}, []*Fetcher{c.baseStore.Fetcher}, nil
+	return [][]Chunk{chunks}, []*Fetcher{c.baseStore.fetcher}, nil
 }
 
 // LabelNamesForMetricName retrieves all label names for a metric name.
@@ -251,7 +251,7 @@ func (c *seriesStore) lookupLabelNamesByChunks(ctx context.Context, from, throug
 	chunksPerQuery.Observe(float64(len(filtered)))
 
 	// Now fetch the actual chunk data from Memcache / S3
-	allChunks, err := c.FetchChunks(ctx, filtered, keys)
+	allChunks, err := c.fetcher.FetchChunks(ctx, filtered, keys)
 	if err != nil {
 		level.Error(log).Log("msg", "FetchChunks", "err", err)
 		return nil, err
@@ -424,7 +424,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 	writeChunk := true
 
 	// If this chunk is in cache it must already be in the database so we don't need to write it again
-	found, _, _ := c.cache.Fetch(ctx, []string{chunk.ExternalKey()})
+	found, _, _ := c.fetcher.cache.Fetch(ctx, []string{chunk.ExternalKey()})
 	if len(found) > 0 {
 		writeChunk = false
 		dedupedChunksTotal.Inc()
@@ -444,7 +444,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 		return err
 	}
 
-	if oic, ok := c.storage.(ObjectAndIndexClient); ok {
+	if oic, ok := c.fetcher.storage.(ObjectAndIndexClient); ok {
 		chunks := chunks
 		if !writeChunk {
 			chunks = []Chunk{}
@@ -455,7 +455,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 	} else {
 		// chunk not found, write it.
 		if writeChunk {
-			err := c.storage.PutChunks(ctx, chunks)
+			err := c.fetcher.storage.PutChunks(ctx, chunks)
 			if err != nil {
 				return err
 			}
@@ -467,7 +467,7 @@ func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chun
 
 	// we already have the chunk in the cache so don't write it back to the cache.
 	if writeChunk {
-		if cacheErr := c.writeBackCache(ctx, chunks); cacheErr != nil {
+		if cacheErr := c.fetcher.writeBackCache(ctx, chunks); cacheErr != nil {
 			level.Warn(log).Log("msg", "could not store chunks in chunk cache", "err", cacheErr)
 		}
 	}

From b9064e4caae672f335168bee26d078ceba7c7d89 Mon Sep 17 00:00:00 2001
From: Owen Diehl <ow.diehl@gmail.com>
Date: Thu, 23 Jul 2020 10:17:15 -0400
Subject: [PATCH 561/660] loads schemacfg with correct flagset (#2917)

Signed-off-by: Owen Diehl <ow.diehl@gmail.com>
---
 schema_config.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 2ffc73714ad76..0c923bd7cfe09 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -76,9 +76,9 @@ type SchemaConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
-	flag.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file.")
+	f.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file.")
 	// TODO(gouthamve): Add a metric for this.
-	flag.StringVar(&cfg.legacyFileName, "config-yaml", "", "DEPRECATED(use -schema-config-file) The path to the schema config file.")
+	f.StringVar(&cfg.legacyFileName, "config-yaml", "", "DEPRECATED(use -schema-config-file) The path to the schema config file.")
 }
 
 // loadFromFile loads the schema config from a yaml file

From 32b5a9b4e3ec4145062749d00ce748f30cdcafbb Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 23 Jul 2020 22:11:30 +0530
Subject: [PATCH 562/660] fix panic when using cassandra in multiple periodic
 configs or as a store for both index and delete requests (#2774)

* add purpose label to metric being used for tracing cassandra session and relevant tests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* passing registerer wrapped with purpose to index client factory functions

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add registerer for metrics in dynamodb clients

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* minor nit

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 aws/dynamodb_metrics.go         |  60 ++++++++++++++++++
 aws/dynamodb_storage_client.go  | 104 +++++++++-----------------------
 aws/dynamodb_table_client.go    |  31 +++++-----
 aws/fixtures.go                 |   4 ++
 aws/metrics_autoscaling.go      |   2 +-
 aws/metrics_autoscaling_test.go |   2 +
 cassandra/fixtures.go           |   6 +-
 cassandra/storage_client.go     |  16 ++---
 cassandra/table_client.go       |   5 +-
 schema_config.go                |   6 +-
 storage/factory.go              |  29 +++++----
 storage/factory_test.go         |  46 +++++++++++++-
 12 files changed, 194 insertions(+), 117 deletions(-)
 create mode 100644 aws/dynamodb_metrics.go

diff --git a/aws/dynamodb_metrics.go b/aws/dynamodb_metrics.go
new file mode 100644
index 0000000000000..58533b4414677
--- /dev/null
+++ b/aws/dynamodb_metrics.go
@@ -0,0 +1,60 @@
+package aws
+
+import (
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/weaveworks/common/instrument"
+)
+
+type dynamoDBMetrics struct {
+	dynamoRequestDuration  *instrument.HistogramCollector
+	dynamoConsumedCapacity *prometheus.CounterVec
+	dynamoThrottled        *prometheus.CounterVec
+	dynamoFailures         *prometheus.CounterVec
+	dynamoDroppedRequests  *prometheus.CounterVec
+	dynamoQueryPagesCount  prometheus.Histogram
+}
+
+func newMetrics(r prometheus.Registerer) *dynamoDBMetrics {
+	m := dynamoDBMetrics{}
+
+	m.dynamoRequestDuration = instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_request_duration_seconds",
+		Help:      "Time spent doing DynamoDB requests.",
+
+		// DynamoDB latency seems to range from a few ms to a several seconds and is
+		// important.  So use 9 buckets from 1ms to just over 1 minute (65s).
+		Buckets: prometheus.ExponentialBuckets(0.001, 4, 9),
+	}, []string{"operation", "status_code"}))
+	m.dynamoConsumedCapacity = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_consumed_capacity_total",
+		Help:      "The capacity units consumed by operation.",
+	}, []string{"operation", tableNameLabel})
+	m.dynamoThrottled = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_throttled_total",
+		Help:      "The total number of throttled events.",
+	}, []string{"operation", tableNameLabel})
+	m.dynamoFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_failures_total",
+		Help:      "The total number of errors while storing chunks to the chunk store.",
+	}, []string{tableNameLabel, errorReasonLabel, "operation"})
+	m.dynamoDroppedRequests = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_dropped_requests_total",
+		Help:      "The total number of requests which were dropped due to errors encountered from dynamo.",
+	}, []string{tableNameLabel, errorReasonLabel, "operation"})
+	m.dynamoQueryPagesCount = promauto.With(r).NewHistogram(prometheus.HistogramOpts{
+		Namespace: "cortex",
+		Name:      "dynamo_query_pages_count",
+		Help:      "Number of pages per query.",
+		// Most queries will have one page, however this may increase with fuzzy
+		// metric names.
+		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
+	})
+
+	return &m
+}
diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 126de4e209d2c..b1e25ec8c7e12 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -50,55 +50,6 @@ const (
 	validationException       = "ValidationException"
 )
 
-var (
-	dynamoRequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_request_duration_seconds",
-		Help:      "Time spent doing DynamoDB requests.",
-
-		// DynamoDB latency seems to range from a few ms to a several seconds and is
-		// important.  So use 9 buckets from 1ms to just over 1 minute (65s).
-		Buckets: prometheus.ExponentialBuckets(0.001, 4, 9),
-	}, []string{"operation", "status_code"}))
-	dynamoConsumedCapacity = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_consumed_capacity_total",
-		Help:      "The capacity units consumed by operation.",
-	}, []string{"operation", tableNameLabel})
-	dynamoThrottled = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_throttled_total",
-		Help:      "The total number of throttled events.",
-	}, []string{"operation", tableNameLabel})
-	dynamoFailures = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_failures_total",
-		Help:      "The total number of errors while storing chunks to the chunk store.",
-	}, []string{tableNameLabel, errorReasonLabel, "operation"})
-	dynamoDroppedRequests = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_dropped_requests_total",
-		Help:      "The total number of requests which were dropped due to errors encountered from dynamo.",
-	}, []string{tableNameLabel, errorReasonLabel, "operation"})
-	dynamoQueryPagesCount = prometheus.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "dynamo_query_pages_count",
-		Help:      "Number of pages per query.",
-		// Most queries will have one page, however this may increase with fuzzy
-		// metric names.
-		Buckets: prometheus.ExponentialBuckets(1, 4, 6),
-	})
-)
-
-func init() {
-	dynamoRequestDuration.Register()
-	prometheus.MustRegister(dynamoConsumedCapacity)
-	prometheus.MustRegister(dynamoThrottled)
-	prometheus.MustRegister(dynamoFailures)
-	prometheus.MustRegister(dynamoQueryPagesCount)
-	prometheus.MustRegister(dynamoDroppedRequests)
-}
-
 // DynamoDBConfig specifies config for a DynamoDB database.
 type DynamoDBConfig struct {
 	DynamoDB               flagext.URLValue         `yaml:"dynamodb_url"`
@@ -148,20 +99,22 @@ type dynamoDBStorageClient struct {
 	// of boilerplate.
 	batchGetItemRequestFn   func(ctx context.Context, input *dynamodb.BatchGetItemInput) dynamoDBRequest
 	batchWriteItemRequestFn func(ctx context.Context, input *dynamodb.BatchWriteItemInput) dynamoDBRequest
+
+	metrics *dynamoDBMetrics
 }
 
 // NewDynamoDBIndexClient makes a new DynamoDB-backed IndexClient.
-func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	return newDynamoDBStorageClient(cfg, schemaCfg)
+func NewDynamoDBIndexClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig, reg prometheus.Registerer) (chunk.IndexClient, error) {
+	return newDynamoDBStorageClient(cfg, schemaCfg, reg)
 }
 
 // NewDynamoDBChunkClient makes a new DynamoDB-backed chunk.Client.
-func NewDynamoDBChunkClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
-	return newDynamoDBStorageClient(cfg, schemaCfg)
+func NewDynamoDBChunkClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig, reg prometheus.Registerer) (chunk.Client, error) {
+	return newDynamoDBStorageClient(cfg, schemaCfg, reg)
 }
 
 // newDynamoDBStorageClient makes a new DynamoDB-backed IndexClient and chunk.Client.
-func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig) (*dynamoDBStorageClient, error) {
+func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig, reg prometheus.Registerer) (*dynamoDBStorageClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
@@ -172,6 +125,7 @@ func newDynamoDBStorageClient(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig)
 		schemaCfg:     schemaCfg,
 		DynamoDB:      dynamoDB,
 		writeThrottle: rate.NewLimiter(rate.Limit(cfg.ThrottleLimit), dynamoDBMaxWriteBatchSize),
+		metrics:       newMetrics(reg),
 	}
 	client.batchGetItemRequestFn = client.batchGetItemRequest
 	client.batchWriteItemRequestFn = client.batchWriteItemRequest
@@ -187,9 +141,9 @@ func (a dynamoDBStorageClient) NewWriteBatch() chunk.WriteBatch {
 	return dynamoDBWriteBatch(map[string][]*dynamodb.WriteRequest{})
 }
 
-func logWriteRetry(ctx context.Context, unprocessed dynamoDBWriteBatch) {
+func logWriteRetry(unprocessed dynamoDBWriteBatch, metrics *dynamoDBMetrics) {
 	for table, reqs := range unprocessed {
-		dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs)))
+		metrics.dynamoThrottled.WithLabelValues("DynamoDB.BatchWriteItem", table).Add(float64(len(reqs)))
 		for _, req := range reqs {
 			item := req.PutRequest.Item
 			var hash, rnge string
@@ -225,25 +179,25 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
 
-		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchWriteItem", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchWriteItem", a.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return request.Send()
 		})
 		resp := request.Data().(*dynamodb.BatchWriteItemOutput)
 
 		for _, cc := range resp.ConsumedCapacity {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
+			a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
 		}
 
 		if err != nil {
 			for tableName := range requests {
-				recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem")
+				recordDynamoError(tableName, err, "DynamoDB.BatchWriteItem", a.metrics)
 			}
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
 			// so back off and retry all.
 			if awsErr, ok := err.(awserr.Error); ok && ((awsErr.Code() == dynamodb.ErrCodeProvisionedThroughputExceededException) || request.Retryable()) {
-				logWriteRetry(ctx, requests)
+				logWriteRetry(requests, a.metrics)
 				unprocessed.TakeReqs(requests, -1)
 				_ = a.writeThrottle.WaitN(ctx, len(requests))
 				backoff.Wait()
@@ -256,7 +210,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
-					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchWriteItem").Inc()
+					a.metrics.dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchWriteItem").Inc()
 				}
 				continue
 			}
@@ -268,7 +222,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 		// If there are unprocessed items, retry those items.
 		unprocessedItems := dynamoDBWriteBatch(resp.UnprocessedItems)
 		if len(unprocessedItems) > 0 {
-			logWriteRetry(ctx, unprocessedItems)
+			logWriteRetry(unprocessedItems, a.metrics)
 			_ = a.writeThrottle.WaitN(ctx, unprocessedItems.Len())
 			unprocessed.TakeReqs(unprocessedItems, -1)
 		}
@@ -329,11 +283,11 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 
 	pageCount := 0
 	defer func() {
-		dynamoQueryPagesCount.Observe(float64(pageCount))
+		a.metrics.dynamoQueryPagesCount.Observe(float64(pageCount))
 	}()
 
 	retryer := newRetryer(ctx, a.cfg.backoffConfig)
-	err := instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
+	err := instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", a.metrics.dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
 		if sp := ot.SpanFromContext(innerCtx); sp != nil {
 			sp.SetTag("tableName", query.TableName)
 			sp.SetTag("hashValue", query.HashValue)
@@ -345,12 +299,12 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 			}
 
 			if cc := output.ConsumedCapacity; cc != nil {
-				dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
+				a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
 					Add(float64(*cc.CapacityUnits))
 			}
 
 			return callback(query, &dynamoDBReadResponse{items: output.Items})
-		}, retryer.withRetries, withErrorHandler(query.TableName, "DynamoDB.QueryPages"))
+		}, retryer.withRetries, withErrorHandler(query.TableName, "DynamoDB.QueryPages", a.metrics))
 	})
 	if err != nil {
 		return errors.Wrapf(err, "QueryPages error: table=%v", query.TableName)
@@ -481,19 +435,19 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 			ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
 		})
 
-		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchGetItemPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		err := instrument.CollectedRequest(ctx, "DynamoDB.BatchGetItemPages", a.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return request.Send()
 		})
 		response := request.Data().(*dynamodb.BatchGetItemOutput)
 
 		for _, cc := range response.ConsumedCapacity {
-			dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
+			a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
 				Add(float64(*cc.CapacityUnits))
 		}
 
 		if err != nil {
 			for tableName := range requests {
-				recordDynamoError(tableName, err, "DynamoDB.BatchGetItemPages")
+				recordDynamoError(tableName, err, "DynamoDB.BatchGetItemPages", a.metrics)
 			}
 
 			// If we get provisionedThroughputExceededException, then no items were processed,
@@ -509,7 +463,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
 				for tableName := range requests {
-					dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchGetItemPages").Inc()
+					a.metrics.dynamoDroppedRequests.WithLabelValues(tableName, validationException, "DynamoDB.BatchGetItemPages").Inc()
 				}
 				continue
 			}
@@ -792,21 +746,21 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 	}
 }
 
-func withErrorHandler(tableName, operation string) func(req *request.Request) {
+func withErrorHandler(tableName, operation string, metrics *dynamoDBMetrics) func(req *request.Request) {
 	return func(req *request.Request) {
 		req.Handlers.CompleteAttempt.PushBack(func(req *request.Request) {
 			if req.Error != nil {
-				recordDynamoError(tableName, req.Error, operation)
+				recordDynamoError(tableName, req.Error, operation, metrics)
 			}
 		})
 	}
 }
 
-func recordDynamoError(tableName string, err error, operation string) {
+func recordDynamoError(tableName string, err error, operation string, metrics *dynamoDBMetrics) {
 	if awsErr, ok := err.(awserr.Error); ok {
-		dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
+		metrics.dynamoFailures.WithLabelValues(tableName, awsErr.Code(), operation).Add(float64(1))
 	} else {
-		dynamoFailures.WithLabelValues(tableName, otherError, operation).Add(float64(1))
+		metrics.dynamoFailures.WithLabelValues(tableName, otherError, operation).Add(float64(1))
 	}
 }
 
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index 8d8571f7adf6d..a2ab3b8096bac 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -10,6 +10,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/dynamodb/dynamodbiface"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/weaveworks/common/instrument"
 	"golang.org/x/time/rate"
 
@@ -35,10 +36,11 @@ type dynamoTableClient struct {
 	DynamoDB    dynamodbiface.DynamoDBAPI
 	callManager callManager
 	autoscale   autoscale
+	metrics     *dynamoDBMetrics
 }
 
 // NewDynamoDBTableClient makes a new DynamoTableClient.
-func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
+func NewDynamoDBTableClient(cfg DynamoDBConfig, reg prometheus.Registerer) (chunk.TableClient, error) {
 	dynamoDB, err := dynamoClientFromURL(cfg.DynamoDB.URL)
 	if err != nil {
 		return nil, err
@@ -51,7 +53,7 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 
 	var autoscale autoscale
 	if cfg.Metrics.URL != "" {
-		autoscale, err = newMetrics(cfg)
+		autoscale, err = newMetricsAutoScaling(cfg)
 		if err != nil {
 			return nil, err
 		}
@@ -61,6 +63,7 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig) (chunk.TableClient, error) {
 		DynamoDB:    dynamoDB,
 		callManager: callManager,
 		autoscale:   autoscale,
+		metrics:     newMetrics(reg),
 	}, nil
 }
 
@@ -95,7 +98,7 @@ func (d callManager) backoffAndRetry(ctx context.Context, fn func(context.Contex
 func (d dynamoTableClient) ListTables(ctx context.Context) ([]string, error) {
 	table := []string{}
 	err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "DynamoDB.ListTablesPages", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.ListTablesPages", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			return d.DynamoDB.ListTablesPagesWithContext(ctx, &dynamodb.ListTablesInput{}, func(resp *dynamodb.ListTablesOutput, _ bool) bool {
 				for _, s := range resp.TableNames {
 					table = append(table, *s)
@@ -121,7 +124,7 @@ func chunkTagsToDynamoDB(ts chunk.Tags) []*dynamodb.Tag {
 func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc) error {
 	var tableARN *string
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "DynamoDB.CreateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.CreateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &dynamodb.CreateTableInput{
 				TableName: aws.String(desc.Name),
 				AttributeDefinitions: []*dynamodb.AttributeDefinition{
@@ -179,7 +182,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 	tags := chunkTagsToDynamoDB(desc.Tags)
 	if len(tags) > 0 {
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 					ResourceArn: tableARN,
 					Tags:        tags,
@@ -196,7 +199,7 @@ func (d dynamoTableClient) CreateTable(ctx context.Context, desc chunk.TableDesc
 
 func (d dynamoTableClient) DeleteTable(ctx context.Context, name string) error {
 	if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "DynamoDB.DeleteTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.DeleteTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			input := &dynamodb.DeleteTableInput{TableName: aws.String(name)}
 			_, err := d.DynamoDB.DeleteTableWithContext(ctx, input)
 			if err != nil {
@@ -215,7 +218,7 @@ func (d dynamoTableClient) DeleteTable(ctx context.Context, name string) error {
 func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc chunk.TableDesc, isActive bool, err error) {
 	var tableARN *string
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 				TableName: aws.String(name),
 			})
@@ -248,7 +251,7 @@ func (d dynamoTableClient) DescribeTable(ctx context.Context, name string) (desc
 	}
 
 	err = d.backoffAndRetry(ctx, func(ctx context.Context) error {
-		return instrument.CollectedRequest(ctx, "DynamoDB.ListTagsOfResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+		return instrument.CollectedRequest(ctx, "DynamoDB.ListTagsOfResource", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 			out, err := d.DynamoDB.ListTagsOfResourceWithContext(ctx, &dynamodb.ListTagsOfResourceInput{
 				ResourceArn: tableARN,
 			})
@@ -300,7 +303,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		!expected.UseOnDemandIOMode {
 		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				var dynamoBillingMode string
 				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name),
 					ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
@@ -320,7 +323,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 				return err
 			})
 		}); err != nil {
-			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable")
+			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable", d.metrics)
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
 				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
 			} else {
@@ -331,14 +334,14 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		// moved the enabling of OnDemand mode to it's own block to reduce complexities & interactions with the various
 		// settings used in provisioned mode. Unfortunately the boilerplate wrappers for retry and tracking needed to be copied.
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				level.Info(util.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
 				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name), BillingMode: aws.String(dynamodb.BillingModePayPerRequest)}
 				_, err := d.DynamoDB.UpdateTableWithContext(ctx, updateTableInput)
 				return err
 			})
 		}); err != nil {
-			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable")
+			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable", d.metrics)
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
 				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
 			} else {
@@ -350,7 +353,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 	if !current.Tags.Equals(expected.Tags) {
 		var tableARN *string
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.DescribeTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				out, err := d.DynamoDB.DescribeTableWithContext(ctx, &dynamodb.DescribeTableInput{
 					TableName: aws.String(expected.Name),
 				})
@@ -367,7 +370,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		}
 
 		return d.backoffAndRetry(ctx, func(ctx context.Context) error {
-			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
+			return instrument.CollectedRequest(ctx, "DynamoDB.TagResource", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				_, err := d.DynamoDB.TagResourceWithContext(ctx, &dynamodb.TagResourceInput{
 					ResourceArn: tableARN,
 					Tags:        chunkTagsToDynamoDB(expected.Tags),
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 0d23d9ee31eb7..24ab8cde736a5 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -35,12 +35,14 @@ var Fixtures = []testutils.Fixture{
 			dynamoDB := newMockDynamoDB(0, 0)
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
+				metrics:  newMetrics(nil),
 			}
 			index := &dynamoDBStorageClient{
 				DynamoDB:                dynamoDB,
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaConfig,
+				metrics:                 newMetrics(nil),
 			}
 			object := objectclient.NewClient(&S3ObjectClient{
 				S3:        newMockS3(),
@@ -68,6 +70,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 			schemaCfg := testutils.DefaultSchemaConfig("aws")
 			table := &dynamoTableClient{
 				DynamoDB: dynamoDB,
+				metrics:  newMetrics(nil),
 			}
 			storage := &dynamoDBStorageClient{
 				cfg: DynamoDBConfig{
@@ -84,6 +87,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				batchGetItemRequestFn:   dynamoDB.batchGetItemRequest,
 				batchWriteItemRequestFn: dynamoDB.batchWriteItemRequest,
 				schemaCfg:               schemaCfg,
+				metrics:                 newMetrics(nil),
 			}
 			return storage, storage, table, schemaCfg, testutils.CloserFunc(func() error {
 				table.Stop()
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index 3df859cbdcf5c..fea098c82334a 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -78,7 +78,7 @@ type metricsData struct {
 	readErrorRates       map[string]float64
 }
 
-func newMetrics(cfg DynamoDBConfig) (*metricsData, error) {
+func newMetricsAutoScaling(cfg DynamoDBConfig) (*metricsData, error) {
 	client, err := promApi.NewClient(promApi.Config{Address: cfg.Metrics.URL})
 	if err != nil {
 		return nil, err
diff --git a/aws/metrics_autoscaling_test.go b/aws/metrics_autoscaling_test.go
index 3ebf87456949a..992024e27763b 100644
--- a/aws/metrics_autoscaling_test.go
+++ b/aws/metrics_autoscaling_test.go
@@ -139,6 +139,7 @@ func TestTableManagerMetricsAutoScaling(t *testing.T) {
 			},
 			tableLastUpdated: make(map[string]time.Time),
 		},
+		metrics: newMetrics(nil),
 	}
 
 	indexWriteScale := fixtureWriteScale()
@@ -299,6 +300,7 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 			tableLastUpdated:     make(map[string]time.Time),
 			tableReadLastUpdated: make(map[string]time.Time),
 		},
+		metrics: newMetrics(nil),
 	}
 
 	indexReadScale := fixtureReadScale()
diff --git a/cassandra/fixtures.go b/cassandra/fixtures.go
index feb2df252195a..b55cb16af5f3d 100644
--- a/cassandra/fixtures.go
+++ b/cassandra/fixtures.go
@@ -35,17 +35,17 @@ func (f *fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient,
 	// Get a SchemaConfig with the defaults.
 	schemaConfig := testutils.DefaultSchemaConfig("cassandra")
 
-	storageClient, err := NewStorageClient(cfg, schemaConfig)
+	storageClient, err := NewStorageClient(cfg, schemaConfig, nil)
 	if err != nil {
 		return nil, nil, nil, schemaConfig, nil, err
 	}
 
-	objectClient, err := NewObjectClient(cfg, schemaConfig)
+	objectClient, err := NewObjectClient(cfg, schemaConfig, nil)
 	if err != nil {
 		return nil, nil, nil, schemaConfig, nil, err
 	}
 
-	tableClient, err := NewTableClient(context.Background(), cfg)
+	tableClient, err := NewTableClient(context.Background(), cfg, nil)
 	if err != nil {
 		return nil, nil, nil, schemaConfig, nil, err
 	}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 7add0f5d70f4c..9c328fe08abcf 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -89,7 +89,7 @@ func (cfg *Config) Validate() error {
 	return nil
 }
 
-func (cfg *Config) session(name string) (*gocql.Session, error) {
+func (cfg *Config) session(name string, reg prometheus.Registerer) (*gocql.Session, error) {
 	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
 	if err != nil {
 		return nil, errors.WithStack(err)
@@ -107,7 +107,7 @@ func (cfg *Config) session(name string) (*gocql.Session, error) {
 	cluster.NumConns = cfg.NumConnections
 	cluster.Logger = log.With(pkgutil.Logger, "module", "gocql", "client", name)
 	cluster.Registerer = prometheus.WrapRegistererWith(
-		prometheus.Labels{"client": name}, prometheus.DefaultRegisterer)
+		prometheus.Labels{"client": name}, reg)
 	if cfg.Retries > 0 {
 		cluster.RetryPolicy = &gocql.ExponentialBackoffRetryPolicy{
 			NumRetries: cfg.Retries,
@@ -222,15 +222,15 @@ type StorageClient struct {
 }
 
 // NewStorageClient returns a new StorageClient.
-func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig) (*StorageClient, error) {
+func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (*StorageClient, error) {
 	pkgutil.WarnExperimentalUse("Cassandra Backend")
 
-	readSession, err := cfg.session("index-read")
+	readSession, err := cfg.session("index-read", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
 
-	writeSession, err := cfg.session("index-write")
+	writeSession, err := cfg.session("index-write", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
@@ -407,15 +407,15 @@ type ObjectClient struct {
 }
 
 // NewObjectClient returns a new ObjectClient.
-func NewObjectClient(cfg Config, schemaCfg chunk.SchemaConfig) (*ObjectClient, error) {
+func NewObjectClient(cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (*ObjectClient, error) {
 	pkgutil.WarnExperimentalUse("Cassandra Backend")
 
-	readSession, err := cfg.session("chunks-read")
+	readSession, err := cfg.session("chunks-read", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
 
-	writeSession, err := cfg.session("chunks-write")
+	writeSession, err := cfg.session("chunks-write", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
diff --git a/cassandra/table_client.go b/cassandra/table_client.go
index ee242e354c760..fc269e26409df 100644
--- a/cassandra/table_client.go
+++ b/cassandra/table_client.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/gocql/gocql"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 )
@@ -16,8 +17,8 @@ type tableClient struct {
 }
 
 // NewTableClient returns a new TableClient.
-func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	session, err := cfg.session("table-manager")
+func NewTableClient(ctx context.Context, cfg Config, registerer prometheus.Registerer) (chunk.TableClient, error) {
+	session, err := cfg.session("table-manager", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
 	}
diff --git a/schema_config.go b/schema_config.go
index 0c923bd7cfe09..612c12c5f97c9 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -49,7 +49,7 @@ type DayTime struct {
 
 // MarshalYAML implements yaml.Marshaller.
 func (d DayTime) MarshalYAML() (interface{}, error) {
-	return d.Time.Time().Format("2006-01-02"), nil
+	return d.String(), nil
 }
 
 // UnmarshalYAML implements yaml.Unmarshaller.
@@ -66,6 +66,10 @@ func (d *DayTime) UnmarshalYAML(unmarshal func(interface{}) error) error {
 	return nil
 }
 
+func (d *DayTime) String() string {
+	return d.Time.Time().Format("2006-01-02")
+}
+
 // SchemaConfig contains the config for our chunk index schemas
 type SchemaConfig struct {
 	Configs []PeriodConfig `yaml:"configs"`
diff --git a/storage/factory.go b/storage/factory.go
index 8d87a7d34f39f..1629ead9247fc 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -153,7 +153,10 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 	stores := chunk.NewCompositeStore(cacheGenNumLoader)
 
 	for _, s := range schemaCfg.Configs {
-		index, err := NewIndexClient(s.IndexType, cfg, schemaCfg)
+		indexClientReg := prometheus.WrapRegistererWith(
+			prometheus.Labels{"component": "index-store-" + s.From.String()}, reg)
+
+		index, err := NewIndexClient(s.IndexType, cfg, schemaCfg, indexClientReg)
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating index client")
 		}
@@ -163,7 +166,11 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		if objectStoreType == "" {
 			objectStoreType = s.IndexType
 		}
-		chunks, err := NewChunkClient(objectStoreType, cfg, schemaCfg)
+
+		chunkClientReg := prometheus.WrapRegistererWith(
+			prometheus.Labels{"component": "chunk-store-" + s.From.String()}, reg)
+
+		chunks, err := NewChunkClient(objectStoreType, cfg, schemaCfg, chunkClientReg)
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating object client")
 		}
@@ -180,7 +187,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 }
 
 // NewIndexClient makes a new index client of the desired type.
-func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
+func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (chunk.IndexClient, error) {
 	if indexClientFactory, ok := customIndexStores[name]; ok {
 		if indexClientFactory.indexClientFactoryFunc != nil {
 			return indexClientFactory.indexClientFactoryFunc()
@@ -199,7 +206,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBIndexClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBIndexClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
 	case "gcp":
 		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable":
@@ -208,7 +215,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		cfg.GCPStorageConfig.DistributeKeys = true
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "cassandra":
-		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg)
+		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg, registerer)
 	case "boltdb":
 		return local.NewBoltDBIndexClient(cfg.BoltDBConfig)
 	case "grpc-store":
@@ -219,7 +226,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 }
 
 // NewChunkClient makes a new chunk.Client of the desired types.
-func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
+func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (chunk.Client, error) {
 	switch name {
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
@@ -233,7 +240,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg)
+		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
 	case "azure":
 		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim))
 	case "gcp":
@@ -245,7 +252,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig) (chun
 	case "swift":
 		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim))
 	case "cassandra":
-		return cassandra.NewObjectClient(cfg.CassandraStorageConfig, schemaCfg)
+		return cassandra.NewObjectClient(cfg.CassandraStorageConfig, schemaCfg, registerer)
 	case "filesystem":
 		store, err := local.NewFSObjectClient(cfg.FSConfig)
 		if err != nil {
@@ -267,7 +274,7 @@ func newChunkClientFromStore(store chunk.ObjectClient, err error) (chunk.Client,
 }
 
 // NewTableClient makes a new table client based on the configuration.
-func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
+func NewTableClient(name string, cfg Config, registerer prometheus.Registerer) (chunk.TableClient, error) {
 	if indexClientFactory, ok := customIndexStores[name]; ok {
 		if indexClientFactory.tableClientFactoryFunc != nil {
 			return indexClientFactory.tableClientFactoryFunc()
@@ -285,11 +292,11 @@ func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 		if len(path) > 0 {
 			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
-		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig)
+		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig, registerer)
 	case "gcp", "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
 	case "cassandra":
-		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig)
+		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig, registerer)
 	case "boltdb":
 		return local.NewTableClient(cfg.BoltDBConfig.Directory)
 	case "grpc-store":
diff --git a/storage/factory_test.go b/storage/factory_test.go
index 0b16dc7b2a2ce..e2b09c0e0a2cc 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -5,11 +5,14 @@ import (
 	"os"
 	"reflect"
 	"testing"
+	"time"
 
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
 	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
@@ -135,7 +138,7 @@ func TestCustomIndexClient(t *testing.T) {
 			RegisterIndexStore(tc.indexClientName, tc.indexClientFactories.indexClientFactoryFunc, tc.indexClientFactories.tableClientFactoryFunc)
 		}
 
-		indexClient, err := NewIndexClient(tc.indexClientName, cfg, schemaCfg)
+		indexClient, err := NewIndexClient(tc.indexClientName, cfg, schemaCfg, nil)
 		if tc.errorExpected {
 			require.Error(t, err)
 		} else {
@@ -143,7 +146,7 @@ func TestCustomIndexClient(t *testing.T) {
 			require.Equal(t, tc.expectedIndexClientType, reflect.TypeOf(indexClient))
 		}
 
-		tableClient, err := NewTableClient(tc.indexClientName, cfg)
+		tableClient, err := NewTableClient(tc.indexClientName, cfg, nil)
 		if tc.errorExpected {
 			require.Error(t, err)
 		} else {
@@ -154,6 +157,45 @@ func TestCustomIndexClient(t *testing.T) {
 	}
 }
 
+func TestCassandraInMultipleSchemas(t *testing.T) {
+	addresses := os.Getenv("CASSANDRA_TEST_ADDRESSES")
+	if addresses == "" {
+		return
+	}
+
+	// cassandra config
+	var cassandraCfg cassandra.Config
+	flagext.DefaultValues(&cassandraCfg)
+	cassandraCfg.Addresses = addresses
+	cassandraCfg.Keyspace = "test"
+	cassandraCfg.Consistency = "QUORUM"
+	cassandraCfg.ReplicationFactor = 1
+
+	// build schema with cassandra in multiple periodic configs
+	schemaCfg := chunk.DefaultSchemaConfig("cassandra", "v1", model.Now().Add(-7*24*time.Hour))
+	newSchemaCfg := schemaCfg.Configs[0]
+	newSchemaCfg.Schema = "v2"
+	newSchemaCfg.From = chunk.DayTime{Time: model.Now()}
+
+	schemaCfg.Configs = append(schemaCfg.Configs, newSchemaCfg)
+
+	var (
+		cfg         Config
+		storeConfig chunk.StoreConfig
+		defaults    validation.Limits
+	)
+	flagext.DefaultValues(&cfg, &storeConfig, &defaults)
+	cfg.CassandraStorageConfig = cassandraCfg
+
+	limits, err := validation.NewOverrides(defaults, nil)
+	require.NoError(t, err)
+
+	store, err := NewStore(cfg, storeConfig, schemaCfg, limits, prometheus.NewRegistry(), nil)
+	require.NoError(t, err)
+
+	store.Stop()
+}
+
 // useful for cleaning up state after tests
 func unregisterAllCustomIndexStores() {
 	customIndexStores = map[string]indexStoreFactories{}

From e85298e8d5dcbb0add6e19f2c99b4b4557707ec1 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Mon, 27 Jul 2020 09:26:18 -0400
Subject: [PATCH 563/660] Remove global metrics registry and global logger in
 cache package (#2903)

* remove global metrics registry and util.Logger in cache package

Signed-off-by: Ben Ye <yb532204897@gmail.com>

* address comments and update changelog

Signed-off-by: Ben Ye <yb532204897@gmail.com>

* use util.Logger instead of noop logger

Signed-off-by: Ben Ye <yb532204897@gmail.com>

* use nil prometheus registry and address some nits

Signed-off-by: Ben Ye <yb532204897@gmail.com>
---
 cache/background.go                  |  38 ++++----
 cache/background_test.go             |   2 +-
 cache/cache.go                       |  19 ++--
 cache/cache_test.go                  |  11 ++-
 cache/fifo_cache.go                  | 138 +++++++++++++--------------
 cache/fifo_cache_test.go             |   5 +-
 cache/instrumented.go                |  75 +++++++--------
 cache/memcached.go                   |  33 +++----
 cache/memcached_client.go            |  27 +++---
 cache/memcached_test.go              |  11 ++-
 cache/redis_cache.go                 |  17 ++--
 cache/redis_cache_test.go            |   3 +-
 cache/snappy.go                      |  13 +--
 chunk_store_test.go                  |  12 ++-
 storage/caching_fixtures.go          |   9 +-
 storage/caching_index_client.go      |  10 +-
 storage/caching_index_client_test.go |  26 +++--
 storage/factory.go                   |  19 +++-
 storage/factory_test.go              |   6 +-
 storage/index_client_test.go         |   3 +-
 20 files changed, 252 insertions(+), 225 deletions(-)

diff --git a/cache/background.go b/cache/background.go
index 994c49b745239..bfdfb748d894b 100644
--- a/cache/background.go
+++ b/cache/background.go
@@ -11,19 +11,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 )
 
-var (
-	droppedWriteBack = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "cache_dropped_background_writes_total",
-		Help:      "Total count of dropped write backs to cache.",
-	}, []string{"name"})
-	queueLength = promauto.NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: "cortex",
-		Name:      "cache_background_queue_length",
-		Help:      "Length of the cache background write queue.",
-	}, []string{"name"})
-)
-
 // BackgroundConfig is config for a Background Cache.
 type BackgroundConfig struct {
 	WriteBackGoroutines int `yaml:"writeback_goroutines"`
@@ -54,14 +41,25 @@ type backgroundWrite struct {
 }
 
 // NewBackground returns a new Cache that does stores on background goroutines.
-func NewBackground(name string, cfg BackgroundConfig, cache Cache) Cache {
+func NewBackground(name string, cfg BackgroundConfig, cache Cache, reg prometheus.Registerer) Cache {
 	c := &backgroundCache{
-		Cache:            cache,
-		quit:             make(chan struct{}),
-		bgWrites:         make(chan backgroundWrite, cfg.WriteBackBuffer),
-		name:             name,
-		droppedWriteBack: droppedWriteBack.WithLabelValues(name),
-		queueLength:      queueLength.WithLabelValues(name),
+		Cache:    cache,
+		quit:     make(chan struct{}),
+		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
+		name:     name,
+		droppedWriteBack: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "cortex",
+			Name:        "cache_dropped_background_writes_total",
+			Help:        "Total count of dropped write backs to cache.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
+
+		queueLength: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+			Namespace:   "cortex",
+			Name:        "cache_background_queue_length",
+			Help:        "Length of the cache background write queue.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
 	}
 
 	c.wg.Add(cfg.WriteBackGoroutines)
diff --git a/cache/background_test.go b/cache/background_test.go
index e8acaa50ea3f3..06e69bf28be44 100644
--- a/cache/background_test.go
+++ b/cache/background_test.go
@@ -10,7 +10,7 @@ func TestBackground(t *testing.T) {
 	c := cache.NewBackground("mock", cache.BackgroundConfig{
 		WriteBackGoroutines: 1,
 		WriteBackBuffer:     100,
-	}, cache.NewMockCache())
+	}, cache.NewMockCache(), nil)
 
 	keys, chunks := fillCache(t, c)
 	cache.Flush(c)
diff --git a/cache/cache.go b/cache/cache.go
index e400e88a32b27..dbbc6b2e8c4fa 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -6,6 +6,7 @@ import (
 	"flag"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/prometheus/client_golang/prometheus"
 )
 
@@ -60,7 +61,7 @@ func (cfg *Config) Validate() error {
 }
 
 // New creates a new Cache using Config.
-func New(cfg Config) (Cache, error) {
+func New(cfg Config, reg prometheus.Registerer, logger log.Logger) (Cache, error) {
 	if cfg.Cache != nil {
 		return cfg.Cache, nil
 	}
@@ -72,8 +73,8 @@ func New(cfg Config) (Cache, error) {
 			cfg.Fifocache.Validity = cfg.DefaultValidity
 		}
 
-		if cache := NewFifoCache(cfg.Prefix+"fifocache", cfg.Fifocache); cache != nil {
-			caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache))
+		if cache := NewFifoCache(cfg.Prefix+"fifocache", cfg.Fifocache, reg, logger); cache != nil {
+			caches = append(caches, Instrument(cfg.Prefix+"fifocache", cache, reg))
 		}
 	}
 
@@ -86,11 +87,11 @@ func New(cfg Config) (Cache, error) {
 			cfg.Memcache.Expiration = cfg.DefaultValidity
 		}
 
-		client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix, prometheus.DefaultRegisterer)
-		cache := NewMemcached(cfg.Memcache, client, cfg.Prefix)
+		client := NewMemcachedClient(cfg.MemcacheClient, cfg.Prefix, reg, logger)
+		cache := NewMemcached(cfg.Memcache, client, cfg.Prefix, reg, logger)
 
 		cacheName := cfg.Prefix + "memcache"
-		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
+		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache, reg), reg))
 	}
 
 	if cfg.Redis.Endpoint != "" {
@@ -98,13 +99,13 @@ func New(cfg Config) (Cache, error) {
 			cfg.Redis.Expiration = cfg.DefaultValidity
 		}
 		cacheName := cfg.Prefix + "redis"
-		cache := NewRedisCache(cfg.Redis, cacheName, nil)
-		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache)))
+		cache := NewRedisCache(cfg.Redis, cacheName, nil, logger)
+		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache, reg), reg))
 	}
 
 	cache := NewTiered(caches)
 	if len(caches) > 1 {
-		cache = Instrument(cfg.Prefix+"tiered", cache)
+		cache = Instrument(cfg.Prefix+"tiered", cache, reg)
 	}
 	return cache, nil
 }
diff --git a/cache/cache_test.go b/cache/cache_test.go
index 36faa5fd4fb4b..3500a6cbd0fab 100644
--- a/cache/cache_test.go
+++ b/cache/cache_test.go
@@ -8,6 +8,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
@@ -158,7 +159,8 @@ func testCache(t *testing.T, cache cache.Cache) {
 
 func TestMemcache(t *testing.T) {
 	t.Run("Unbatched", func(t *testing.T) {
-		cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache(), "test")
+		cache := cache.NewMemcached(cache.MemcachedConfig{}, newMockMemcache(),
+			"test", nil, log.NewNopLogger())
 		testCache(t, cache)
 	})
 
@@ -166,17 +168,18 @@ func TestMemcache(t *testing.T) {
 		cache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 3,
-		}, newMockMemcache(), "test")
+		}, newMockMemcache(), "test", nil, log.NewNopLogger())
 		testCache(t, cache)
 	})
 }
 
 func TestFifoCache(t *testing.T) {
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 1e3, Validity: 1 * time.Hour})
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 1e3, Validity: 1 * time.Hour},
+		nil, log.NewNopLogger())
 	testCache(t, cache)
 }
 
 func TestSnappyCache(t *testing.T) {
-	cache := cache.NewSnappy(cache.NewMockCache())
+	cache := cache.NewSnappy(cache.NewMockCache(), log.NewNopLogger())
 	testCache(t, cache)
 }
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index ca331de77e68d..81432d1a1e5b7 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -9,6 +9,7 @@ import (
 	"unsafe"
 
 	"github.com/dustin/go-humanize"
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -18,64 +19,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
-var (
-	cacheEntriesAdded = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "added_total",
-		Help:      "The total number of Put calls on the cache",
-	}, []string{"cache"})
-
-	cacheEntriesAddedNew = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "added_new_total",
-		Help:      "The total number of new entries added to the cache",
-	}, []string{"cache"})
-
-	cacheEntriesEvicted = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "evicted_total",
-		Help:      "The total number of evicted entries",
-	}, []string{"cache"})
-
-	cacheEntriesCurrent = promauto.NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "entries",
-		Help:      "The total number of entries",
-	}, []string{"cache"})
-
-	cacheTotalGets = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "gets_total",
-		Help:      "The total number of Get calls",
-	}, []string{"cache"})
-
-	cacheTotalMisses = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "misses_total",
-		Help:      "The total number of Get calls that had no valid entry",
-	}, []string{"cache"})
-
-	cacheStaleGets = promauto.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "stale_gets_total",
-		Help:      "The total number of Get calls that had an entry which expired",
-	}, []string{"cache"})
-
-	cacheMemoryBytes = promauto.NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: "querier",
-		Subsystem: "cache",
-		Name:      "memory_bytes",
-		Help:      "The current cache size in bytes",
-	}, []string{"cache"})
-)
-
 const (
 	elementSize    = int(unsafe.Sizeof(list.Element{}))
 	elementPrtSize = int(unsafe.Sizeof(&list.Element{}))
@@ -149,20 +92,19 @@ type cacheEntry struct {
 }
 
 // NewFifoCache returns a new initialised FifoCache of size.
-// TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
-func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
+func NewFifoCache(name string, cfg FifoCacheConfig, reg prometheus.Registerer, logger log.Logger) *FifoCache {
 	util.WarnExperimentalUse("In-memory (FIFO) cache")
 
 	if cfg.DeprecatedSize > 0 {
 		flagext.DeprecatedFlagsUsed.Inc()
-		level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag fifocache.size, use fifocache.max-size-items or fifocache.max-size-bytes instead", "cache", name)
+		level.Warn(logger).Log("msg", "running with DEPRECATED flag fifocache.size, use fifocache.max-size-items or fifocache.max-size-bytes instead", "cache", name)
 		cfg.MaxSizeItems = cfg.DeprecatedSize
 	}
 	maxSizeBytes, _ := parsebytes(cfg.MaxSizeBytes)
 
 	if maxSizeBytes == 0 && cfg.MaxSizeItems == 0 {
 		// zero cache capacity - no need to create cache
-		level.Warn(util.Logger).Log("msg", "neither fifocache.max-size-bytes nor fifocache.max-size-items is set", "cache", name)
+		level.Warn(logger).Log("msg", "neither fifocache.max-size-bytes nor fifocache.max-size-items is set", "cache", name)
 		return nil
 	}
 	return &FifoCache{
@@ -172,15 +114,69 @@ func NewFifoCache(name string, cfg FifoCacheConfig) *FifoCache {
 		entries:      make(map[string]*list.Element),
 		lru:          list.New(),
 
-		// TODO(bwplotka): There might be simple cache.Cache wrapper for those.
-		entriesAdded:    cacheEntriesAdded.WithLabelValues(name),
-		entriesAddedNew: cacheEntriesAddedNew.WithLabelValues(name),
-		entriesEvicted:  cacheEntriesEvicted.WithLabelValues(name),
-		entriesCurrent:  cacheEntriesCurrent.WithLabelValues(name),
-		totalGets:       cacheTotalGets.WithLabelValues(name),
-		totalMisses:     cacheTotalMisses.WithLabelValues(name),
-		staleGets:       cacheStaleGets.WithLabelValues(name),
-		memoryBytes:     cacheMemoryBytes.WithLabelValues(name),
+		entriesAdded: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "added_total",
+			Help:        "The total number of Put calls on the cache",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		entriesAddedNew: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "added_new_total",
+			Help:        "The total number of new entries added to the cache",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		entriesEvicted: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "evicted_total",
+			Help:        "The total number of evicted entries",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		entriesCurrent: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "entries",
+			Help:        "The total number of entries",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		totalGets: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "gets_total",
+			Help:        "The total number of Get calls",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		totalMisses: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "misses_total",
+			Help:        "The total number of Get calls that had no valid entry",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		staleGets: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "stale_gets_total",
+			Help:        "The total number of Get calls that had an entry which expired",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
+
+		memoryBytes: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
+			Namespace:   "querier",
+			Subsystem:   "cache",
+			Name:        "memory_bytes",
+			Help:        "The current cache size in bytes",
+			ConstLabels: prometheus.Labels{"cache": name},
+		}),
 	}
 }
 
diff --git a/cache/fifo_cache_test.go b/cache/fifo_cache_test.go
index f41d5813a6ccb..d8988e72c4868 100644
--- a/cache/fifo_cache_test.go
+++ b/cache/fifo_cache_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/prometheus/client_golang/prometheus/testutil"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -37,7 +38,7 @@ func TestFifoCacheEviction(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		c := NewFifoCache(test.name, test.cfg)
+		c := NewFifoCache(test.name, test.cfg, nil, log.NewNopLogger())
 		ctx := context.Background()
 
 		// Check put / get works
@@ -185,7 +186,7 @@ func TestFifoCacheExpiry(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		c := NewFifoCache(test.name, test.cfg)
+		c := NewFifoCache(test.name, test.cfg, nil, log.NewNopLogger())
 		ctx := context.Background()
 
 		c.Store(ctx,
diff --git a/cache/instrumented.go b/cache/instrumented.go
index c5c43b21cec18..ca27d4a3b4e40 100644
--- a/cache/instrumented.go
+++ b/cache/instrumented.go
@@ -6,58 +6,52 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	instr "github.com/weaveworks/common/instrument"
 )
 
-var (
-	requestDuration = instr.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "cache_request_duration_seconds",
-		Help:      "Total time spent in seconds doing cache requests.",
-		// Cache requests are very quick: smallest bucket is 16us, biggest is 1s.
-		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code"}))
-
-	fetchedKeys = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "cache_fetched_keys",
-		Help:      "Total count of keys requested from cache.",
-	}, []string{"name"})
-
-	hits = prometheus.NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
-		Name:      "cache_hits",
-		Help:      "Total count of keys found in cache.",
-	}, []string{"name"})
-
-	valueSize = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+// Instrument returns an instrumented cache.
+func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache {
+	valueSize := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "cache_value_size_bytes",
 		Help:      "Size of values in the cache.",
 		// Cached chunks are generally in the KBs, but cached index can
 		// get big.  Histogram goes from 1KB to 4MB.
 		// 1024 * 4^(7-1) = 4MB
-		Buckets: prometheus.ExponentialBuckets(1024, 4, 7),
-	}, []string{"name", "method"})
-)
+		Buckets:     prometheus.ExponentialBuckets(1024, 4, 7),
+		ConstLabels: prometheus.Labels{"name": name},
+	}, []string{"method"})
 
-func init() {
-	requestDuration.Register()
-	prometheus.MustRegister(fetchedKeys)
-	prometheus.MustRegister(hits)
-	prometheus.MustRegister(valueSize)
-}
-
-// Instrument returns an instrumented cache.
-func Instrument(name string, cache Cache) Cache {
 	return &instrumentedCache{
 		name:  name,
 		Cache: cache,
 
-		fetchedKeys:      fetchedKeys.WithLabelValues(name),
-		hits:             hits.WithLabelValues(name),
-		storedValueSize:  valueSize.WithLabelValues(name, "store"),
-		fetchedValueSize: valueSize.WithLabelValues(name, "fetch"),
+		requestDuration: instr.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
+			Namespace: "cortex",
+			Name:      "cache_request_duration_seconds",
+			Help:      "Total time spent in seconds doing cache requests.",
+			// Cache requests are very quick: smallest bucket is 16us, biggest is 1s.
+			Buckets:     prometheus.ExponentialBuckets(0.000016, 4, 8),
+			ConstLabels: prometheus.Labels{"name": name},
+		}, []string{"method", "status_code"})),
+
+		fetchedKeys: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "cortex",
+			Name:        "cache_fetched_keys",
+			Help:        "Total count of keys requested from cache.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
+
+		hits: promauto.With(reg).NewCounter(prometheus.CounterOpts{
+			Namespace:   "cortex",
+			Name:        "cache_hits",
+			Help:        "Total count of keys found in cache.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
+
+		storedValueSize:  valueSize.WithLabelValues("store"),
+		fetchedValueSize: valueSize.WithLabelValues("fetch"),
 	}
 }
 
@@ -67,6 +61,7 @@ type instrumentedCache struct {
 
 	fetchedKeys, hits                 prometheus.Counter
 	storedValueSize, fetchedValueSize prometheus.Observer
+	requestDuration                   *instr.HistogramCollector
 }
 
 func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
@@ -75,7 +70,7 @@ func (i *instrumentedCache) Store(ctx context.Context, keys []string, bufs [][]b
 	}
 
 	method := i.name + ".store"
-	_ = instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
+	_ = instr.CollectedRequest(ctx, method, i.requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys", len(keys)))
 		i.Cache.Store(ctx, keys, bufs)
@@ -91,7 +86,7 @@ func (i *instrumentedCache) Fetch(ctx context.Context, keys []string) ([]string,
 		method  = i.name + ".fetch"
 	)
 
-	_ = instr.CollectedRequest(ctx, method, requestDuration, instr.ErrorCode, func(ctx context.Context) error {
+	_ = instr.CollectedRequest(ctx, method, i.requestDuration, instr.ErrorCode, func(ctx context.Context) error {
 		sp := ot.SpanFromContext(ctx)
 		sp.LogFields(otlog.Int("keys requested", len(keys)))
 
diff --git a/cache/memcached.go b/cache/memcached.go
index 0b14180e11f48..c2101e6916836 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -9,6 +9,7 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
@@ -19,16 +20,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
-var (
-	memcacheRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "memcache_request_duration_seconds",
-		Help:      "Total time spent in seconds doing memcache requests.",
-		// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
-		Buckets: prometheus.ExponentialBuckets(0.000016, 4, 8),
-	}, []string{"method", "status_code", "name"})
-)
-
 type observableVecCollector struct {
 	v prometheus.ObserverVec
 }
@@ -64,20 +55,26 @@ type Memcached struct {
 
 	wg      sync.WaitGroup
 	inputCh chan *work
+
+	logger log.Logger
 }
 
 // NewMemcached makes a new Memcache.
-// TODO(bwplotka): Fix metrics, get them out of globals, separate or allow prefixing.
-// TODO(bwplotka): Remove globals & util packages from cache package entirely (e.g util.Logger).
-func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string) *Memcached {
+func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg prometheus.Registerer, logger log.Logger) *Memcached {
 	c := &Memcached{
 		cfg:      cfg,
 		memcache: client,
 		name:     name,
+		logger:   logger,
 		requestDuration: observableVecCollector{
-			v: memcacheRequestDuration.MustCurryWith(prometheus.Labels{
-				"name": name,
-			}),
+			v: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
+				Namespace: "cortex",
+				Name:      "memcache_request_duration_seconds",
+				Help:      "Total time spent in seconds doing memcache requests.",
+				// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
+				Buckets:     prometheus.ExponentialBuckets(0.000016, 4, 8),
+				ConstLabels: prometheus.Labels{"name": name},
+			}, []string{"method", "status_code"}),
 		},
 	}
 
@@ -161,7 +158,7 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 		// Memcached returns partial results even on error.
 		if err != nil {
 			sp.LogFields(otlog.Error(err))
-			level.Error(util.Logger).Log("msg", "Failed to get keys from memcached", "err", err)
+			level.Error(c.logger).Log("msg", "Failed to get keys from memcached", "err", err)
 		}
 		return err
 	})
@@ -234,7 +231,7 @@ func (c *Memcached) Store(ctx context.Context, keys []string, bufs [][]byte) {
 			return c.memcache.Set(&item)
 		})
 		if err != nil {
-			level.Error(util.Logger).Log("msg", "failed to put to memcached", "name", c.name, "err", err)
+			level.Error(c.logger).Log("msg", "failed to put to memcached", "name", c.name, "err", err)
 		}
 	}
 }
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index df0969dc78f3b..6a0b52a0ff5cd 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -20,14 +21,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
-var (
-	memcacheServersDiscovered = promauto.NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: "cortex",
-		Name:      "memcache_client_servers",
-		Help:      "The number of memcache servers discovered.",
-	}, []string{"name"})
-)
-
 // MemcachedClient interface exists for mocking memcacheClient.
 type MemcachedClient interface {
 	GetMulti(keys []string) (map[string]*memcache.Item, error)
@@ -55,6 +48,8 @@ type memcachedClient struct {
 	wait sync.WaitGroup
 
 	numServers prometheus.Gauge
+
+	logger log.Logger
 }
 
 // MemcachedClientConfig defines how a MemcachedClient should be constructed.
@@ -81,7 +76,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
 // from SRV and updates the server list on a regular basis.
-func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer) MemcachedClient {
+func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Registerer, logger log.Logger) MemcachedClient {
 	var selector serverSelector
 	if cfg.ConsistentHash {
 		selector = &MemcachedJumpHashSelector{}
@@ -102,10 +97,16 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
-		provider:   dns.NewProvider(util.Logger, dnsProviderRegisterer, dns.GolangResolverType),
+		logger:     logger,
+		provider:   dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
 		quit:       make(chan struct{}),
 
-		numServers: memcacheServersDiscovered.WithLabelValues(name),
+		numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
+			Namespace:   "cortex",
+			Name:        "memcache_client_servers",
+			Help:        "The number of memcache servers discovered.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
 	}
 
 	if len(cfg.Addresses) > 0 {
@@ -115,7 +116,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 
 	err := newClient.updateMemcacheServers()
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
+		level.Error(logger).Log("msg", "error setting memcache servers to host", "host", cfg.Host, "err", err)
 	}
 
 	newClient.wait.Add(1)
@@ -153,7 +154,7 @@ func (c *memcachedClient) updateLoop(updateInterval time.Duration) {
 		case <-ticker.C:
 			err := c.updateMemcacheServers()
 			if err != nil {
-				level.Warn(util.Logger).Log("msg", "error updating memcache servers", "err", err)
+				level.Warn(c.logger).Log("msg", "error updating memcache servers", "err", err)
 			}
 		case <-c.quit:
 			ticker.Stop()
diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index f6f2b83725f5c..5bf8b6e2aaba9 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -7,6 +7,7 @@ import (
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
+	"github.com/go-kit/kit/log"
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
@@ -15,7 +16,8 @@ import (
 func TestMemcached(t *testing.T) {
 	t.Run("unbatched", func(t *testing.T) {
 		client := newMockMemcache()
-		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client, "test")
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client,
+			"test", nil, log.NewNopLogger())
 
 		testMemcache(t, memcache)
 	})
@@ -25,7 +27,7 @@ func TestMemcached(t *testing.T) {
 		memcache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 5,
-		}, client, "test")
+		}, client, "test", nil, log.NewNopLogger())
 
 		testMemcache(t, memcache)
 	})
@@ -90,7 +92,8 @@ func (c *mockMemcacheFailing) GetMulti(keys []string) (map[string]*memcache.Item
 func TestMemcacheFailure(t *testing.T) {
 	t.Run("unbatched", func(t *testing.T) {
 		client := newMockMemcacheFailing()
-		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client, "test")
+		memcache := cache.NewMemcached(cache.MemcachedConfig{}, client,
+			"test", nil, log.NewNopLogger())
 
 		testMemcacheFailing(t, memcache)
 	})
@@ -100,7 +103,7 @@ func TestMemcacheFailure(t *testing.T) {
 		memcache := cache.NewMemcached(cache.MemcachedConfig{
 			BatchSize:   10,
 			Parallelism: 5,
-		}, client, "test")
+		}, client, "test", nil, log.NewNopLogger())
 
 		testMemcacheFailing(t, memcache)
 	})
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index fac33bb4589b5..382290e30ba86 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -5,6 +5,7 @@ import (
 	"flag"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/gomodule/redigo/redis"
 
@@ -18,6 +19,7 @@ type RedisCache struct {
 	expiration int
 	timeout    time.Duration
 	pool       *redis.Pool
+	logger     log.Logger
 }
 
 // RedisConfig defines how a RedisCache should be constructed.
@@ -49,7 +51,7 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 }
 
 // NewRedisCache creates a new RedisCache
-func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
+func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool, logger log.Logger) *RedisCache {
 	util.WarnExperimentalUse("Redis cache")
 	// pool != nil only in unit tests
 	if pool == nil {
@@ -82,10 +84,11 @@ func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool) *RedisCache {
 		timeout:    cfg.Timeout,
 		name:       name,
 		pool:       pool,
+		logger:     logger,
 	}
 
 	if err := cache.ping(context.Background()); err != nil {
-		level.Error(util.Logger).Log("msg", "error connecting to redis", "endpoint", cfg.Endpoint, "err", err)
+		level.Error(logger).Log("msg", "error connecting to redis", "endpoint", cfg.Endpoint, "err", err)
 	}
 
 	return cache
@@ -96,7 +99,7 @@ func (c *RedisCache) Fetch(ctx context.Context, keys []string) (found []string,
 	data, err := c.mget(ctx, keys)
 
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "failed to get from redis", "name", c.name, "err", err)
+		level.Error(c.logger).Log("msg", "failed to get from redis", "name", c.name, "err", err)
 		missed = make([]string, len(keys))
 		copy(missed, keys)
 		return
@@ -116,7 +119,7 @@ func (c *RedisCache) Fetch(ctx context.Context, keys []string) (found []string,
 func (c *RedisCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 	err := c.mset(ctx, keys, bufs, c.expiration)
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "failed to put to redis", "name", c.name, "err", err)
+		level.Error(c.logger).Log("msg", "failed to put to redis", "name", c.name, "err", err)
 	}
 }
 
@@ -126,7 +129,7 @@ func (c *RedisCache) Stop() {
 }
 
 // mset adds key-value pairs to the cache.
-func (c *RedisCache) mset(ctx context.Context, keys []string, bufs [][]byte, ttl int) error {
+func (c *RedisCache) mset(_ context.Context, keys []string, bufs [][]byte, ttl int) error {
 	conn := c.pool.Get()
 	defer conn.Close()
 
@@ -143,7 +146,7 @@ func (c *RedisCache) mset(ctx context.Context, keys []string, bufs [][]byte, ttl
 }
 
 // mget retrieves values from the cache.
-func (c *RedisCache) mget(ctx context.Context, keys []string) ([][]byte, error) {
+func (c *RedisCache) mget(_ context.Context, keys []string) ([][]byte, error) {
 	intf := make([]interface{}, len(keys))
 	for i, key := range keys {
 		intf[i] = key
@@ -155,7 +158,7 @@ func (c *RedisCache) mget(ctx context.Context, keys []string) ([][]byte, error)
 	return redis.ByteSlices(redis.DoWithTimeout(conn, c.timeout, "MGET", intf...))
 }
 
-func (c *RedisCache) ping(ctx context.Context) error {
+func (c *RedisCache) ping(_ context.Context) error {
 	conn := c.pool.Get()
 	defer conn.Close()
 
diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
index 1330cde97fc53..1511c11e96bb9 100644
--- a/cache/redis_cache_test.go
+++ b/cache/redis_cache_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/gomodule/redigo/redis"
 	"github.com/rafaeljusto/redigomock"
 	"github.com/stretchr/testify/require"
@@ -53,7 +54,7 @@ func TestRedisCache(t *testing.T) {
 	conn.Command("MGET", missIntf...).ExpectError(nil)
 
 	// mock the cache
-	c := cache.NewRedisCache(cfg, "mock", pool)
+	c := cache.NewRedisCache(cfg, "mock", pool, log.NewNopLogger())
 	ctx := context.Background()
 
 	c.Store(ctx, keys, bufs)
diff --git a/cache/snappy.go b/cache/snappy.go
index 2fc2308f4844f..d2ee606eda279 100644
--- a/cache/snappy.go
+++ b/cache/snappy.go
@@ -3,20 +3,21 @@ package cache
 import (
 	"context"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/golang/snappy"
-
-	"github.com/cortexproject/cortex/pkg/util"
 )
 
 type snappyCache struct {
-	next Cache
+	next   Cache
+	logger log.Logger
 }
 
 // NewSnappy makes a new snappy encoding cache wrapper.
-func NewSnappy(next Cache) Cache {
+func NewSnappy(next Cache, logger log.Logger) Cache {
 	return &snappyCache{
-		next: next,
+		next:   next,
+		logger: logger,
 	}
 }
 
@@ -35,7 +36,7 @@ func (s *snappyCache) Fetch(ctx context.Context, keys []string) ([]string, [][]b
 	for _, buf := range bufs {
 		d, err := snappy.Decode(nil, buf)
 		if err != nil {
-			level.Error(util.Logger).Log("msg", "failed to decode cache entry", "err", err)
+			level.Error(s.logger).Log("msg", "failed to decode cache entry", "err", err)
 			return nil, nil, keys
 		}
 		ds = append(ds, d)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 536869f63ad44..09fb7846a7c74 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -10,7 +10,9 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql/parser"
@@ -51,7 +53,7 @@ var stores = []struct {
 			flagext.DefaultValues(&storeCfg)
 			storeCfg.WriteDedupeCacheConfig.Cache = cache.NewFifoCache("test", cache.FifoCacheConfig{
 				MaxSizeItems: 500,
-			})
+			}, prometheus.NewRegistry(), log.NewNopLogger())
 			return storeCfg
 		},
 	},
@@ -91,9 +93,11 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg Schema
 	overrides, err := validation.NewOverrides(limits, nil)
 	require.NoError(t, err)
 
-	chunksCache, err := cache.New(storeCfg.ChunkCacheConfig)
+	reg := prometheus.NewRegistry()
+	logger := log.NewNopLogger()
+	chunksCache, err := cache.New(storeCfg.ChunkCacheConfig, reg, logger)
 	require.NoError(t, err)
-	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig)
+	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig, reg, logger)
 	require.NoError(t, err)
 
 	store := NewCompositeStore(nil)
@@ -1320,7 +1324,7 @@ func TestDisableIndexDeduplication(t *testing.T) {
 			storeCfg := storeMaker.configFn()
 			storeCfg.ChunkCacheConfig.Cache = cache.NewFifoCache("chunk-cache", cache.FifoCacheConfig{
 				MaxSizeItems: 5,
-			})
+			}, prometheus.NewRegistry(), log.NewNopLogger())
 			storeCfg.DisableIndexDeduplication = disableIndexDeduplication
 
 			store := newTestChunkStoreConfig(t, "v9", storeCfg)
diff --git a/storage/caching_fixtures.go b/storage/caching_fixtures.go
index ee032300b8089..6c14f4962852c 100644
--- a/storage/caching_fixtures.go
+++ b/storage/caching_fixtures.go
@@ -1,9 +1,12 @@
 package storage
 
 import (
-	io "io"
+	"io"
 	"time"
 
+	"github.com/go-kit/kit/log"
+	"github.com/prometheus/client_golang/prometheus"
+
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 
@@ -25,10 +28,12 @@ func (f fixture) Clients() (chunk.IndexClient, chunk.Client, chunk.TableClient,
 		return nil, nil, nil, chunk.SchemaConfig{}, nil, err
 	}
 	indexClient, chunkClient, tableClient, schemaConfig, closer, err := f.fixture.Clients()
+	reg := prometheus.NewRegistry()
+	logger := log.NewNopLogger()
 	indexClient = newCachingIndexClient(indexClient, cache.NewFifoCache("index-fifo", cache.FifoCacheConfig{
 		MaxSizeItems: 500,
 		Validity:     5 * time.Minute,
-	}), 5*time.Minute, limits)
+	}, reg, logger), 5*time.Minute, limits, logger)
 	return indexClient, chunkClient, tableClient, schemaConfig, closer, err
 }
 
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index cf9e37f1ca8d0..6408a5c5eab08 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -5,6 +5,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/gogo/protobuf/proto"
 	"github.com/prometheus/client_golang/prometheus"
@@ -14,7 +15,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -46,18 +46,20 @@ type cachingIndexClient struct {
 	cache    cache.Cache
 	validity time.Duration
 	limits   StoreLimits
+	logger   log.Logger
 }
 
-func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits StoreLimits) chunk.IndexClient {
+func newCachingIndexClient(client chunk.IndexClient, c cache.Cache, validity time.Duration, limits StoreLimits, logger log.Logger) chunk.IndexClient {
 	if c == nil || cache.IsEmptyTieredCache(c) {
 		return client
 	}
 
 	return &cachingIndexClient{
 		IndexClient: client,
-		cache:       cache.NewSnappy(c),
+		cache:       cache.NewSnappy(c, logger),
 		validity:    validity,
 		limits:      limits,
+		logger:      logger,
 	}
 }
 
@@ -226,7 +228,7 @@ func (s *cachingIndexClient) cacheStore(ctx context.Context, keys []string, batc
 		hashed = append(hashed, cache.HashKey(keys[i]))
 		out, err := proto.Marshal(&batches[i])
 		if err != nil {
-			level.Warn(util.Logger).Log("msg", "error marshalling ReadBatch", "err", err)
+			level.Warn(s.logger).Log("msg", "error marshalling ReadBatch", "err", err)
 			cacheEncodeErrs.Inc()
 			return
 		}
diff --git a/storage/caching_index_client_test.go b/storage/caching_index_client_test.go
index 3b01e0688d852..2bfc5176dc2fe 100644
--- a/storage/caching_index_client_test.go
+++ b/storage/caching_index_client_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/user"
@@ -40,8 +41,9 @@ func TestCachingStorageClientBasic(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
+	logger := log.NewNopLogger()
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second}, nil, logger)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits, logger)
 	queries := []chunk.IndexQuery{{
 		TableName: "table",
 		HashValue: "baz",
@@ -71,8 +73,9 @@ func TestTempCachingStorageClient(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
+	logger := log.NewNopLogger()
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second}, nil, logger)
+	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits, logger)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo"},
 		{TableName: "table", HashValue: "bar"},
@@ -129,8 +132,9 @@ func TestPermCachingStorageClient(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits)
+	logger := log.NewNopLogger()
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second}, nil, logger)
+	client := newCachingIndexClient(store, cache, 100*time.Millisecond, limits, logger)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", Immutable: true},
 		{TableName: "table", HashValue: "bar", Immutable: true},
@@ -180,8 +184,9 @@ func TestCachingStorageClientEmptyResponse(t *testing.T) {
 	store := &mockStore{}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
+	logger := log.NewNopLogger()
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second}, nil, logger)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits, logger)
 	queries := []chunk.IndexQuery{{TableName: "table", HashValue: "foo"}}
 	err = client.QueryPages(ctx, queries, func(query chunk.IndexQuery, batch chunk.ReadBatch) bool {
 		assert.False(t, batch.Iterator().Next())
@@ -218,8 +223,9 @@ func TestCachingStorageClientCollision(t *testing.T) {
 	}
 	limits, err := defaultLimits()
 	require.NoError(t, err)
-	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second})
-	client := newCachingIndexClient(store, cache, 1*time.Second, limits)
+	logger := log.NewNopLogger()
+	cache := cache.NewFifoCache("test", cache.FifoCacheConfig{MaxSizeItems: 10, Validity: 10 * time.Second}, nil, logger)
+	client := newCachingIndexClient(store, cache, 1*time.Second, limits, logger)
 	queries := []chunk.IndexQuery{
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("bar")},
 		{TableName: "table", HashValue: "foo", RangeValuePrefix: []byte("baz")},
diff --git a/storage/factory.go b/storage/factory.go
index 1629ead9247fc..d489a1997c8ac 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -7,6 +7,7 @@ import (
 	"strings"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -114,22 +115,30 @@ func (cfg *Config) Validate() error {
 }
 
 // NewStore makes the storage clients based on the configuration.
-func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConfig, limits StoreLimits, reg prometheus.Registerer, cacheGenNumLoader chunk.CacheGenNumLoader) (chunk.Store, error) {
+func NewStore(
+	cfg Config,
+	storeCfg chunk.StoreConfig,
+	schemaCfg chunk.SchemaConfig,
+	limits StoreLimits,
+	reg prometheus.Registerer,
+	cacheGenNumLoader chunk.CacheGenNumLoader,
+	logger log.Logger,
+) (chunk.Store, error) {
 	chunkMetrics := newChunkClientMetrics(reg)
 
-	indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig)
+	indexReadCache, err := cache.New(cfg.IndexQueriesCacheConfig, reg, logger)
 	if err != nil {
 		return nil, err
 	}
 
-	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig)
+	writeDedupeCache, err := cache.New(storeCfg.WriteDedupeCacheConfig, reg, logger)
 	if err != nil {
 		return nil, err
 	}
 
 	chunkCacheCfg := storeCfg.ChunkCacheConfig
 	chunkCacheCfg.Prefix = "chunks"
-	chunksCache, err := cache.New(chunkCacheCfg)
+	chunksCache, err := cache.New(chunkCacheCfg, reg, logger)
 	if err != nil {
 		return nil, err
 	}
@@ -160,7 +169,7 @@ func NewStore(cfg Config, storeCfg chunk.StoreConfig, schemaCfg chunk.SchemaConf
 		if err != nil {
 			return nil, errors.Wrap(err, "error creating index client")
 		}
-		index = newCachingIndexClient(index, indexReadCache, cfg.IndexCacheValidity, limits)
+		index = newCachingIndexClient(index, indexReadCache, cfg.IndexCacheValidity, limits, logger)
 
 		objectStoreType := s.ObjectType
 		if objectStoreType == "" {
diff --git a/storage/factory_test.go b/storage/factory_test.go
index e2b09c0e0a2cc..13141238cef09 100644
--- a/storage/factory_test.go
+++ b/storage/factory_test.go
@@ -7,7 +7,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/prometheus/client_golang/prometheus"
+	"github.com/go-kit/kit/log"
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
@@ -42,7 +42,7 @@ func TestFactoryStop(t *testing.T) {
 	limits, err := validation.NewOverrides(defaults, nil)
 	require.NoError(t, err)
 
-	store, err := NewStore(cfg, storeConfig, schemaConfig, limits, nil, nil)
+	store, err := NewStore(cfg, storeConfig, schemaConfig, limits, nil, nil, log.NewNopLogger())
 	require.NoError(t, err)
 
 	store.Stop()
@@ -190,7 +190,7 @@ func TestCassandraInMultipleSchemas(t *testing.T) {
 	limits, err := validation.NewOverrides(defaults, nil)
 	require.NoError(t, err)
 
-	store, err := NewStore(cfg, storeConfig, schemaCfg, limits, prometheus.NewRegistry(), nil)
+	store, err := NewStore(cfg, storeConfig, schemaCfg, limits, nil, nil, log.NewNopLogger())
 	require.NoError(t, err)
 
 	store.Stop()
diff --git a/storage/index_client_test.go b/storage/index_client_test.go
index 40514346a4035..d83a8a8b66680 100644
--- a/storage/index_client_test.go
+++ b/storage/index_client_test.go
@@ -6,6 +6,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/stretchr/testify/require"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -204,7 +205,7 @@ func TestCardinalityLimit(t *testing.T) {
 		limits, err := defaultLimits()
 		require.NoError(t, err)
 
-		client = newCachingIndexClient(client, cache.NewMockCache(), time.Minute, limits)
+		client = newCachingIndexClient(client, cache.NewMockCache(), time.Minute, limits, log.NewNopLogger())
 		batch := client.NewWriteBatch()
 		for i := 0; i < 10; i++ {
 			batch.Add(tableName, "bar", []byte(strconv.Itoa(i)), []byte(strconv.Itoa(i)))

From 459b79c9527ccc63d41890e98fb563cb7bea088a Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 28 Jul 2020 16:19:24 +0200
Subject: [PATCH 564/660] Cleaned up blocks storage config (#2937)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 storage/factory.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index d489a1997c8ac..de4cc6cc41512 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -29,7 +29,7 @@ import (
 // Supported storage engines
 const (
 	StorageEngineChunks = "chunks"
-	StorageEngineTSDB   = "tsdb"
+	StorageEngineBlocks = "blocks"
 )
 
 type indexStoreFactories struct {
@@ -92,14 +92,14 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.Swift.RegisterFlags(f)
 	cfg.GrpcConfig.RegisterFlags(f)
 
-	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or tsdb. Be aware tsdb is experimental and shouldn't be used in production.")
+	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or blocks. Be aware that blocks storage is experimental and shouldn't be used in production.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }
 
 // Validate config and returns error on failure
 func (cfg *Config) Validate() error {
-	if cfg.Engine != StorageEngineChunks && cfg.Engine != StorageEngineTSDB {
+	if cfg.Engine != StorageEngineChunks && cfg.Engine != StorageEngineBlocks {
 		return errors.New("unsupported storage engine")
 	}
 	if err := cfg.CassandraStorageConfig.Validate(); err != nil {

From 9a099c16c3e698445b9b2ac752489969c88129e0 Mon Sep 17 00:00:00 2001
From: Javier Palomo Almena <javier.palomo.almena@gmail.com>
Date: Thu, 30 Jul 2020 14:18:00 +0200
Subject: [PATCH 565/660] Replace usage of sync/atomic with uber-go/atomic
 (#2951)

* chunk/cache: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* util: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* util/services: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* ingester: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* querier/frontend: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* querier/queryrange: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* ring: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* ring/kv/memberlist: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* storage/tsdb: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* storagegateway: Replace usage of sync/atomic with uber-go/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>

* faillint: Ensure we are not using sync/atomic

Signed-off-by: Javier Palomo <javier.palomo.almena@gmail.com>
---
 cache/memcached_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index 5bf8b6e2aaba9..ee5596f2afd50 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -3,12 +3,12 @@ package cache_test
 import (
 	"context"
 	"errors"
-	"sync/atomic"
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/go-kit/kit/log"
 	"github.com/stretchr/testify/require"
+	"go.uber.org/atomic"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
@@ -71,7 +71,7 @@ func testMemcache(t *testing.T, memcache *cache.Memcached) {
 // mockMemcache whose calls fail 1/3rd of the time.
 type mockMemcacheFailing struct {
 	*mockMemcache
-	calls uint64
+	calls atomic.Uint64
 }
 
 func newMockMemcacheFailing() *mockMemcacheFailing {
@@ -81,7 +81,7 @@ func newMockMemcacheFailing() *mockMemcacheFailing {
 }
 
 func (c *mockMemcacheFailing) GetMulti(keys []string) (map[string]*memcache.Item, error) {
-	calls := atomic.AddUint64(&c.calls, 1)
+	calls := c.calls.Inc()
 	if calls%3 == 0 {
 		return nil, errors.New("fail")
 	}

From c4d691efc6b2e5b331a0d834abb28543a73c3a6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Fri, 31 Jul 2020 13:46:57 +0200
Subject: [PATCH 566/660] Translate errors to PromQL errors by wrapping
 queryable. (#2941)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Initial version.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Move PromQL API error translation to querier registration.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Remove extra use of promql.ErrStorage.

All unknown errors are translated to promql.ErrStorage
already at API level of the querier.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Updated CHANGELOG.md entry.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Treat context.Canceled as user error and return 422.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for context.DeadlineExceeded.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store_utils.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index c18a38a167252..061a9b1c638a1 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -162,6 +162,7 @@ func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string
 	}
 
 	if err != nil {
+		// Don't rely on Cortex error translation here.
 		return nil, promql.ErrStorage{Err: err}
 	}
 

From 48dcb9e48110046991c2114fd85a0b8544fabbd9 Mon Sep 17 00:00:00 2001
From: Jay Batra <jaybatra73@gmail.com>
Date: Fri, 31 Jul 2020 17:25:26 +0530
Subject: [PATCH 567/660]  Add validation check for chunks and index in schema
 config  (#2732)

* Add validation check for chunks and index Prefix

This change would add a check if chunks and index prefix
in schema config exists.

Signed-off-by: Jay Batra <jaybatra73@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 schema_config.go      | 28 +++++++++++--
 schema_config_test.go | 91 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index 612c12c5f97c9..405dac5359e15 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -25,9 +25,10 @@ const (
 )
 
 var (
-	errInvalidSchemaVersion = errors.New("invalid schema version")
-	errInvalidTablePeriod   = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
-	errConfigFileNotSet     = errors.New("schema config file needs to be set")
+	errInvalidSchemaVersion    = errors.New("invalid schema version")
+	errInvalidTablePeriod      = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
+	errConfigFileNotSet        = errors.New("schema config file needs to be set")
+	errConfigChunkPrefixNotSet = errors.New("schema config for chunks is missing the 'prefix' setting")
 )
 
 // PeriodConfig defines the schema and tables to use for a period of time
@@ -148,6 +149,22 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 	}
 }
 
+func validateChunks(cfg PeriodConfig) error {
+	objectStore := cfg.IndexType
+	if cfg.ObjectType != "" {
+		objectStore = cfg.ObjectType
+	}
+	switch objectStore {
+	case "cassandra", "aws-dynamo", "bigtable-hashed", "gcp", "gcp-columnkey", "bigtable", "grpc-store":
+		if cfg.ChunkTables.Prefix == "" {
+			return errConfigChunkPrefixNotSet
+		}
+		return nil
+	default:
+		return nil
+	}
+}
+
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() (BaseSchema, error) {
 	buckets, bucketsPeriod := cfg.createBucketsFunc()
@@ -209,6 +226,11 @@ func (cfg *PeriodConfig) applyDefaults() {
 
 // Validate the period config.
 func (cfg PeriodConfig) validate() error {
+	validateError := validateChunks(cfg)
+	if validateError != nil {
+		return validateError
+	}
+
 	_, err := cfg.CreateSchema()
 	return err
 }
diff --git a/schema_config_test.go b/schema_config_test.go
index a449d90c0a849..e625f7dbe4e75 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -462,6 +462,97 @@ func TestSchemaConfig_Validate(t *testing.T) {
 			},
 			err: nil,
 		},
+		"should fail if chunks prefix is missing on IndexType: aws-dynamo": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "aws-dynamo",
+						ObjectType:  "aws-dynamo",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: cassandra": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "cassandra",
+						ObjectType:  "cassandra",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: bigtable-hashed": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "bigtable-hashed",
+						ObjectType:  "bigtable-hashed",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: gcp": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "gcp",
+						ObjectType:  "gcp",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: gcp-columnkey": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "gcp-columnkey",
+						ObjectType:  "gcp-columnkey",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: bigtable": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "bigtable",
+						ObjectType:  "bigtable",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
+		"should fail if chunks prefix is missing on IndexType: grpc-store": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						Schema:      "v10",
+						IndexType:   "grpc-store",
+						ObjectType:  "grpc-store",
+						IndexTables: PeriodicTableConfig{Period: 24 * time.Hour},
+					},
+				},
+			},
+			err: errConfigChunkPrefixNotSet,
+		},
 	}
 
 	for testName, testData := range tests {

From d3724c79bea9e2fa02a318ce9f81ec73b3265822 Mon Sep 17 00:00:00 2001
From: Wei He <weihe924stephen@gmail.com>
Date: Fri, 31 Jul 2020 23:41:09 +0900
Subject: [PATCH 568/660] Support Snappy compression for gRPC (#2940)

* Support Snappy compression for gRPC
fix: #2898

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* changelog
Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* Fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>

* fix

Signed-off-by: Wing924 <weihe924stephen@gmail.com>
---
 gcp/bigtable_index_client.go | 6 +++++-
 storage/factory.go           | 3 +++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index f5822bdc5a7e9..434bb40c75422 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -11,9 +11,9 @@ import (
 	"time"
 
 	"cloud.google.com/go/bigtable"
+	"github.com/go-kit/kit/log"
 	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
-
 	"github.com/pkg/errors"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -54,6 +54,10 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.GRPCClientConfig.RegisterFlagsWithPrefix("bigtable", f)
 }
 
+func (cfg *Config) Validate(log log.Logger) error {
+	return cfg.GRPCClientConfig.Validate(log)
+}
+
 // storageClientColumnKey implements chunk.storageClient for GCP.
 type storageClientColumnKey struct {
 	cfg       Config
diff --git a/storage/factory.go b/storage/factory.go
index de4cc6cc41512..d63b1d958b1f8 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -105,6 +105,9 @@ func (cfg *Config) Validate() error {
 	if err := cfg.CassandraStorageConfig.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Cassandra Storage config")
 	}
+	if err := cfg.GCPStorageConfig.Validate(util.Logger); err != nil {
+		return errors.Wrap(err, "invalid GCP Storage Storage config")
+	}
 	if err := cfg.Swift.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Swift Storage config")
 	}

From d68fc892d2b66bcd13e928cd06aa8e6be565dcc7 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 3 Aug 2020 20:07:40 +0530
Subject: [PATCH 569/660] make a copy of KVs coming from boltdb which are only
 valid until the transaction is valid (#2971)

* make a copy of KVs coming from boltdb which are only valid until the transaction is valid

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 0ef1a401a1451..5ac25651ea8f0 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -271,8 +271,14 @@ func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk
 				break
 			}
 
-			batch.rangeValue = k[len(rowPrefix):]
-			batch.value = v
+			// make a copy since k, v are only valid for the life of the transaction.
+			// See: https://godoc.org/github.com/boltdb/bolt#Cursor.Seek
+			batch.rangeValue = make([]byte, len(k)-len(rowPrefix))
+			copy(batch.rangeValue, k[len(rowPrefix):])
+
+			batch.value = make([]byte, len(v))
+			copy(batch.value, v)
+
 			if !callback(query, &batch) {
 				break
 			}

From 40df1635e60fff50298a377ec4bff067d6cb1fb2 Mon Sep 17 00:00:00 2001
From: Binbin Zou <42572980+zbbkeepgoing@users.noreply.github.com>
Date: Tue, 11 Aug 2020 13:58:28 +0800
Subject: [PATCH 570/660] Support azure storage of
 azurechina,azuregerman,azureusgov (#2988)

* Support azure storage of azurechina,azuregerman,azureusgov

Signed-off-by: Binbin Zou <binbin.zou@BinbinZous-MacBook-Pro.local>

* change pr number

Signed-off-by: Binbin Zou <binbin.zou@BinbinZous-MacBook-Pro.local>

* Optimize the code

Signed-off-by: Binbin Zou <binbin.zou@BinbinZous-MacBook-Pro.local>

Co-authored-by: Binbin Zou <binbin.zou@BinbinZous-MacBook-Pro.local>
---
 azure/blob_storage_client.go | 53 +++++++++++++++++++++++++++++++++---
 storage/factory.go           |  3 ++
 2 files changed, 52 insertions(+), 4 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 45d34c2748e84..ba45008e80c12 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -17,11 +17,39 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
-const blobURLFmt = "https://%s.blob.core.windows.net/%s/%s"
-const containerURLFmt = "https://%s.blob.core.windows.net/%s"
+const (
+	// Environment
+	azureGlobal       = "AzureGlobal"
+	azureChinaCloud   = "AzureChinaCloud"
+	azureGermanCloud  = "AzureGermanCloud"
+	azureUSGovernment = "AzureUSGovernment"
+)
+
+var (
+	supportedEnvironments = []string{azureGlobal, azureChinaCloud, azureGermanCloud, azureUSGovernment}
+	endpoints             = map[string]struct{ blobURLFmt, containerURLFmt string }{
+		azureGlobal: {
+			"https://%s.blob.core.windows.net/%s/%s",
+			"https://%s.blob.core.windows.net/%s",
+		},
+		azureChinaCloud: {
+			"https://%s.blob.core.chinacloudapi.cn/%s/%s",
+			"https://%s.blob.core.chinacloudapi.cn/%s",
+		},
+		azureGermanCloud: {
+			"https://%s.blob.core.cloudapi.de/%s/%s",
+			"https://%s.blob.core.cloudapi.de/%s",
+		},
+		azureUSGovernment: {
+			"https://%s.blob.core.usgovcloudapi.net/%s/%s",
+			"https://%s.blob.core.usgovcloudapi.net/%s",
+		},
+	}
+)
 
 // BlobStorageConfig defines the configurable flags that can be defined when using azure blob storage.
 type BlobStorageConfig struct {
+	Environment        string         `yaml:"environment"`
 	ContainerName      string         `yaml:"container_name"`
 	AccountName        string         `yaml:"account_name"`
 	AccountKey         flagext.Secret `yaml:"account_key"`
@@ -41,6 +69,7 @@ func (c *BlobStorageConfig) RegisterFlags(f *flag.FlagSet) {
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
 func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&c.Environment, prefix+"azure.environment", azureGlobal, fmt.Sprintf("Azure Cloud environment. Supported values are: %s.", strings.Join(supportedEnvironments, ", ")))
 	f.StringVar(&c.ContainerName, prefix+"azure.container-name", "cortex", "Name of the blob container used to store chunks. This container must be created before running cortex.")
 	f.StringVar(&c.AccountName, prefix+"azure.account-name", "", "The Microsoft Azure account name to be used")
 	f.Var(&c.AccountKey, prefix+"azure.account-key", "The Microsoft Azure account key to use.")
@@ -123,7 +152,7 @@ func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
 	blobID = strings.Replace(blobID, ":", "-", -1)
 
 	//generate url for new chunk blob
-	u, err := url.Parse(fmt.Sprintf(blobURLFmt, b.cfg.AccountName, b.cfg.ContainerName, blobID))
+	u, err := url.Parse(fmt.Sprintf(b.selectBlobURLFmt(), b.cfg.AccountName, b.cfg.ContainerName, blobID))
 	if err != nil {
 		return azblob.BlockBlobURL{}, err
 	}
@@ -137,7 +166,7 @@ func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
 }
 
 func (b *BlobStorage) buildContainerURL() (azblob.ContainerURL, error) {
-	u, err := url.Parse(fmt.Sprintf(containerURLFmt, b.cfg.AccountName, b.cfg.ContainerName))
+	u, err := url.Parse(fmt.Sprintf(b.selectContainerURLFmt(), b.cfg.AccountName, b.cfg.ContainerName))
 	if err != nil {
 		return azblob.ContainerURL{}, err
 	}
@@ -214,3 +243,19 @@ func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error {
 func (b *BlobStorage) PathSeparator() string {
 	return b.delimiter
 }
+
+// Validate the config.
+func (c *BlobStorageConfig) Validate() error {
+	if !util.StringsContain(supportedEnvironments, c.Environment) {
+		return fmt.Errorf("unsupported Azure blob storage environment: %s, please select one of: %s ", c.Environment, strings.Join(supportedEnvironments, ", "))
+	}
+	return nil
+}
+
+func (b *BlobStorage) selectBlobURLFmt() string {
+	return endpoints[b.cfg.Environment].blobURLFmt
+}
+
+func (b *BlobStorage) selectContainerURLFmt() string {
+	return endpoints[b.cfg.Environment].containerURLFmt
+}
diff --git a/storage/factory.go b/storage/factory.go
index d63b1d958b1f8..32d51ad1fc47f 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -114,6 +114,9 @@ func (cfg *Config) Validate() error {
 	if err := cfg.IndexQueriesCacheConfig.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Index Queries Cache config")
 	}
+	if err := cfg.AzureStorageConfig.Validate(); err != nil {
+		return errors.Wrap(err, "invalid Azure Storage config")
+	}
 	return nil
 }
 

From f5961e6e254522184d20ff007819f8ea24f11d8d Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 12 Aug 2020 19:16:05 +0530
Subject: [PATCH 571/660] integration test for deletion of series (#2946)

* integration test for deletion of series

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* Reuse existing timeout

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed Println()

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Assert on Purger metrics too

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed time.Sleep() from delete series integration test

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Moved CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/request_handler.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/purger/request_handler.go b/purger/request_handler.go
index c8475800aee5e..b3eb5be4014fa 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -113,6 +113,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	}
 
 	dm.metrics.deleteRequestsReceivedTotal.WithLabelValues(userID).Inc()
+	w.WriteHeader(http.StatusNoContent)
 }
 
 // GetAllDeleteRequestsHandler handles get all delete requests
@@ -177,5 +178,5 @@ func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter
 		return
 	}
 
-	w.WriteHeader(http.StatusOK)
+	w.WriteHeader(http.StatusNoContent)
 }

From 05519440a3e1fb48de64e96d36d4abdb7659abc3 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 12 Aug 2020 07:08:30 -0700
Subject: [PATCH 572/660] Be consistent with span logging, set error=true and
 use otlog.Error (#2970)

* Make use of spanlogger when functions that generate spans result in an
error, this is so we consistently have the error=true tag set.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Add spanlogger Error to errcheck exclude

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Review cleanup.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Address more review feedback.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* We need nolint comments on these lines if we want to assign the ctx in
attempt to avoid use of the incorrect ctx in the future.

Signed-off-by: Callum Styan <callumstyan@gmail.com>

* Globally exclude SpanLogger.Error() errcheck

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/memcached.go           | 15 ++++++++-------
 gcp/bigtable_index_client.go |  8 ++++----
 util/parallel_chunk_fetch.go | 12 ++++++------
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index c2101e6916836..b8b6cea7d7a2a 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -11,13 +11,13 @@ import (
 	"github.com/bradfitz/gomemcache/memcache"
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	opentracing "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	instr "github.com/weaveworks/common/instrument"
 
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
 type observableVecCollector struct {
@@ -146,19 +146,20 @@ func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, b
 
 func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
 	var items map[string]*memcache.Item
-	err := instr.CollectedRequest(ctx, "Memcache.GetMulti", c.requestDuration, memcacheStatusCode, func(innerCtx context.Context) error {
-		sp := opentracing.SpanFromContext(innerCtx)
-		sp.LogFields(otlog.Int("keys requested", len(keys)))
+	const method = "Memcache.GetMulti"
+	err := instr.CollectedRequest(ctx, method, c.requestDuration, memcacheStatusCode, func(innerCtx context.Context) error {
+		log, _ := spanlogger.New(innerCtx, method)
+		log.LogFields(otlog.Int("keys requested", len(keys)))
 
 		var err error
 		items, err = c.memcache.GetMulti(keys)
 
-		sp.LogFields(otlog.Int("keys found", len(items)))
+		log.LogFields(otlog.Int("keys found", len(items)))
 
 		// Memcached returns partial results even on error.
 		if err != nil {
-			sp.LogFields(otlog.Error(err))
-			level.Error(c.logger).Log("msg", "Failed to get keys from memcached", "err", err)
+			log.Error(err)
+			level.Error(log).Log("msg", "Failed to get keys from memcached", "err", err)
 		}
 		return err
 	})
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 434bb40c75422..7ea6bd0784e8e 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -13,13 +13,13 @@ import (
 	"cloud.google.com/go/bigtable"
 	"github.com/go-kit/kit/log"
 	ot "github.com/opentracing/opentracing-go"
-	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/grpcclient"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
 const (
@@ -324,8 +324,8 @@ func (s *storageClientV1) QueryPages(ctx context.Context, queries []chunk.IndexQ
 func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, callback chunk_util.Callback) error {
 	const null = string('\xff')
 
-	sp, ctx := ot.StartSpanFromContext(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
-	defer sp.Finish()
+	log, ctx := spanlogger.New(ctx, "QueryPages", ot.Tag{Key: "tableName", Value: query.TableName}, ot.Tag{Key: "hashValue", Value: query.HashValue})
+	defer log.Finish()
 
 	table := s.client.Open(query.TableName)
 
@@ -358,7 +358,7 @@ func (s *storageClientV1) query(ctx context.Context, query chunk.IndexQuery, cal
 		return true
 	})
 	if err != nil {
-		sp.LogFields(otlog.String("error", err.Error()))
+		log.Error(err)
 		return errors.WithStack(err)
 	}
 	return nil
diff --git a/util/parallel_chunk_fetch.go b/util/parallel_chunk_fetch.go
index cd0163ee68428..2a68959adc2c3 100644
--- a/util/parallel_chunk_fetch.go
+++ b/util/parallel_chunk_fetch.go
@@ -4,10 +4,10 @@ import (
 	"context"
 	"sync"
 
-	ot "github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
 const maxParallel = 1000
@@ -20,9 +20,9 @@ var decodeContextPool = sync.Pool{
 
 // GetParallelChunks fetches chunks in parallel (up to maxParallel).
 func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context.Context, *chunk.DecodeContext, chunk.Chunk) (chunk.Chunk, error)) ([]chunk.Chunk, error) {
-	sp, ctx := ot.StartSpanFromContext(ctx, "GetParallelChunks")
-	defer sp.Finish()
-	sp.LogFields(otlog.Int("chunks requested", len(chunks)))
+	log, ctx := spanlogger.New(ctx, "GetParallelChunks")
+	defer log.Finish()
+	log.LogFields(otlog.Int("chunks requested", len(chunks)))
 
 	queuedChunks := make(chan chunk.Chunk)
 
@@ -62,9 +62,9 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 		}
 	}
 
-	sp.LogFields(otlog.Int("chunks fetched", len(result)))
+	log.LogFields(otlog.Int("chunks fetched", len(result)))
 	if lastErr != nil {
-		sp.LogFields(otlog.Error(lastErr))
+		log.Error(lastErr)
 	}
 
 	// Return any chunks we did receive: a partial result may be useful

From 949f94b49d07483c1549169d62a547c52bf0a1ec Mon Sep 17 00:00:00 2001
From: Jack Baldry <jdbaldry@users.noreply.github.com>
Date: Fri, 14 Aug 2020 08:32:20 +0100
Subject: [PATCH 573/660] Expose dynamodb backoff config (#3026)

* Expose dynamodb backoff config

Signed-off-by: Jack Baldry <jack.baldry@grafana.com>

* Update CHANGELOG

Signed-off-by: Jack Baldry <jack.baldry@grafana.com>
---
 aws/dynamodb_storage_client.go | 14 +++++++-------
 aws/dynamodb_table_client.go   |  2 +-
 aws/fixtures.go                |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index b1e25ec8c7e12..eb23c8c66162e 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -58,7 +58,7 @@ type DynamoDBConfig struct {
 	Metrics                MetricsAutoScalingConfig `yaml:"metrics"`
 	ChunkGangSize          int                      `yaml:"chunk_gang_size"`
 	ChunkGetMaxParallelism int                      `yaml:"chunk_get_max_parallelism"`
-	backoffConfig          util.BackoffConfig
+	BackoffConfig          util.BackoffConfig       `yaml:"backoff_config"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet
@@ -69,9 +69,9 @@ func (cfg *DynamoDBConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Float64Var(&cfg.ThrottleLimit, "dynamodb.throttle-limit", 10.0, "DynamoDB rate cap to back off when throttled.")
 	f.IntVar(&cfg.ChunkGangSize, "dynamodb.chunk-gang-size", 10, "Number of chunks to group together to parallelise fetches (zero to disable)")
 	f.IntVar(&cfg.ChunkGetMaxParallelism, "dynamodb.chunk.get-max-parallelism", 32, "Max number of chunk-get operations to start in parallel")
-	f.DurationVar(&cfg.backoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
-	f.DurationVar(&cfg.backoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
-	f.IntVar(&cfg.backoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
+	f.DurationVar(&cfg.BackoffConfig.MinBackoff, "dynamodb.min-backoff", 100*time.Millisecond, "Minimum backoff time")
+	f.DurationVar(&cfg.BackoffConfig.MaxBackoff, "dynamodb.max-backoff", 50*time.Second, "Maximum backoff time")
+	f.IntVar(&cfg.BackoffConfig.MaxRetries, "dynamodb.max-retries", 20, "Maximum number of times to retry an operation")
 	cfg.Metrics.RegisterFlags(f)
 }
 
@@ -167,7 +167,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 	outstanding := input.(dynamoDBWriteBatch)
 	unprocessed := dynamoDBWriteBatch{}
 
-	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
+	backoff := util.NewBackoff(ctx, a.cfg.BackoffConfig)
 
 	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBWriteBatch{}
@@ -286,7 +286,7 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 		a.metrics.dynamoQueryPagesCount.Observe(float64(pageCount))
 	}()
 
-	retryer := newRetryer(ctx, a.cfg.backoffConfig)
+	retryer := newRetryer(ctx, a.cfg.BackoffConfig)
 	err := instrument.CollectedRequest(ctx, "DynamoDB.QueryPages", a.metrics.dynamoRequestDuration, instrument.ErrorCode, func(innerCtx context.Context) error {
 		if sp := ot.SpanFromContext(innerCtx); sp != nil {
 			sp.SetTag("tableName", query.TableName)
@@ -423,7 +423,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 
 	result := []chunk.Chunk{}
 	unprocessed := dynamoDBReadRequest{}
-	backoff := util.NewBackoff(ctx, a.cfg.backoffConfig)
+	backoff := util.NewBackoff(ctx, a.cfg.BackoffConfig)
 
 	for outstanding.Len()+unprocessed.Len() > 0 && backoff.Ongoing() {
 		requests := dynamoDBReadRequest{}
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index a2ab3b8096bac..de1798b7eb80c 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -48,7 +48,7 @@ func NewDynamoDBTableClient(cfg DynamoDBConfig, reg prometheus.Registerer) (chun
 
 	callManager := callManager{
 		limiter:       rate.NewLimiter(rate.Limit(cfg.APILimit), 1),
-		backoffConfig: cfg.backoffConfig,
+		backoffConfig: cfg.BackoffConfig,
 	}
 
 	var autoscale autoscale
diff --git a/aws/fixtures.go b/aws/fixtures.go
index 24ab8cde736a5..6c8d0ec054fd2 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -76,7 +76,7 @@ func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fix
 				cfg: DynamoDBConfig{
 					ChunkGangSize:          gangsize,
 					ChunkGetMaxParallelism: maxParallelism,
-					backoffConfig: util.BackoffConfig{
+					BackoffConfig: util.BackoffConfig{
 						MinBackoff: 1 * time.Millisecond,
 						MaxBackoff: 5 * time.Millisecond,
 						MaxRetries: 20,

From 51e601b8123c5e24b826217ca4cae25b57e348b3 Mon Sep 17 00:00:00 2001
From: Joe Elliott <number101010@gmail.com>
Date: Fri, 14 Aug 2020 12:53:59 -0400
Subject: [PATCH 574/660] Close spanlogger (#3043)

* Added some missing span finishes

Signed-off-by: Joe Elliott <number101010@gmail.com>

* removed unnecessary one

Signed-off-by: Joe Elliott <number101010@gmail.com>
---
 cache/memcached.go | 1 +
 chunk_store.go     | 1 +
 series_store.go    | 1 +
 3 files changed, 3 insertions(+)

diff --git a/cache/memcached.go b/cache/memcached.go
index b8b6cea7d7a2a..9eb07e0354fdc 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -149,6 +149,7 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 	const method = "Memcache.GetMulti"
 	err := instr.CollectedRequest(ctx, method, c.requestDuration, memcacheStatusCode, func(innerCtx context.Context) error {
 		log, _ := spanlogger.New(innerCtx, method)
+		defer log.Finish()
 		log.LogFields(otlog.Int("keys requested", len(keys)))
 
 		var err error
diff --git a/chunk_store.go b/chunk_store.go
index e802e667598a1..58d37698a6ece 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -151,6 +151,7 @@ func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 // PutOne implements ChunkStore
 func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.PutOne")
+	defer log.Finish()
 	chunks := []Chunk{chunk}
 
 	err := c.fetcher.storage.PutChunks(ctx, chunks)
diff --git a/series_store.go b/series_store.go
index 3d3373c5dfd4f..0711954846215 100644
--- a/series_store.go
+++ b/series_store.go
@@ -421,6 +421,7 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 // PutOne implements ChunkStore
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.PutOne")
+	defer log.Finish()
 	writeChunk := true
 
 	// If this chunk is in cache it must already be in the database so we don't need to write it again

From 3a3ba0392540f4070a375c40202dff1627d5ae0d Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Mon, 17 Aug 2020 15:01:31 -0400
Subject: [PATCH 575/660] change: increase default cassandra RF to 3 and update
 docs (#3007)

* change: increase default cassandra RF to 3 and update docs

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update changelog

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* update integration tests to use an RF on 1

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 cassandra/storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 9c328fe08abcf..0c9c248790790 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -57,7 +57,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.IntVar(&cfg.Port, "cassandra.port", 9042, "Port that Cassandra is running on")
 	f.StringVar(&cfg.Keyspace, "cassandra.keyspace", "", "Keyspace to use in Cassandra.")
 	f.StringVar(&cfg.Consistency, "cassandra.consistency", "QUORUM", "Consistency level for Cassandra.")
-	f.IntVar(&cfg.ReplicationFactor, "cassandra.replication-factor", 1, "Replication factor to use in Cassandra.")
+	f.IntVar(&cfg.ReplicationFactor, "cassandra.replication-factor", 3, "Replication factor to use in Cassandra.")
 	f.BoolVar(&cfg.DisableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
 	f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
 	f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")

From c5baf43745a1aa8e961be7d4bf75dace6522c374 Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 18 Aug 2020 12:56:00 +0200
Subject: [PATCH 576/660] Make tests build on go1.15 (#3052)

My machine was upgraded to go1.15 and tests were failing with
"conversion from int to string yields a string of one rune, not a string
of digits (did you mean fmt.Sprint(x)?)"

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>
---
 cache/memcached_test.go | 19 ++++++++++---------
 schema_test.go          |  4 ++--
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/cache/memcached_test.go b/cache/memcached_test.go
index ee5596f2afd50..70b18fd4a7666 100644
--- a/cache/memcached_test.go
+++ b/cache/memcached_test.go
@@ -3,6 +3,7 @@ package cache_test
 import (
 	"context"
 	"errors"
+	"fmt"
 	"testing"
 
 	"github.com/bradfitz/gomemcache/memcache"
@@ -43,26 +44,26 @@ func testMemcache(t *testing.T, memcache *cache.Memcached) {
 
 	// Insert 1000 keys skipping all multiples of 5.
 	for i := 0; i < numKeys; i++ {
-		keysIncMissing = append(keysIncMissing, string(i))
+		keysIncMissing = append(keysIncMissing, fmt.Sprint(i))
 		if i%5 == 0 {
 			continue
 		}
 
-		keys = append(keys, string(i))
-		bufs = append(bufs, []byte(string(i)))
+		keys = append(keys, fmt.Sprint(i))
+		bufs = append(bufs, []byte(fmt.Sprint(i)))
 	}
 	memcache.Store(ctx, keys, bufs)
 
 	found, bufs, missing := memcache.Fetch(ctx, keysIncMissing)
 	for i := 0; i < numKeys; i++ {
 		if i%5 == 0 {
-			require.Equal(t, string(i), missing[0])
+			require.Equal(t, fmt.Sprint(i), missing[0])
 			missing = missing[1:]
 			continue
 		}
 
-		require.Equal(t, string(i), found[0])
-		require.Equal(t, string(i), string(bufs[0]))
+		require.Equal(t, fmt.Sprint(i), found[0])
+		require.Equal(t, fmt.Sprint(i), string(bufs[0]))
 		found = found[1:]
 		bufs = bufs[1:]
 	}
@@ -118,12 +119,12 @@ func testMemcacheFailing(t *testing.T, memcache *cache.Memcached) {
 	bufs := make([][]byte, 0, numKeys)
 	// Insert 1000 keys skipping all multiples of 5.
 	for i := 0; i < numKeys; i++ {
-		keysIncMissing = append(keysIncMissing, string(i))
+		keysIncMissing = append(keysIncMissing, fmt.Sprint(i))
 		if i%5 == 0 {
 			continue
 		}
-		keys = append(keys, string(i))
-		bufs = append(bufs, []byte(string(i)))
+		keys = append(keys, fmt.Sprint(i))
+		bufs = append(bufs, []byte(fmt.Sprint(i)))
 	}
 	memcache.Store(ctx, keys, bufs)
 
diff --git a/schema_test.go b/schema_test.go
index fd7ef57c36280..9386b6c884858 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -409,8 +409,8 @@ func TestV10IndexQueries(t *testing.T) {
 			res = append(res, IndexQuery{
 				TableName:       "tbl",
 				HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, "hash", "metric", "label"),
-				RangeValueStart: []byte(string(i)),
-				ValueEqual:      []byte(string(i)),
+				RangeValueStart: []byte(fmt.Sprint(i)),
+				ValueEqual:      []byte(fmt.Sprint(i)),
 			})
 		}
 		return res

From 1aa34aa2e21cd6024233610f5e72cfb0ba13b451 Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 19 Aug 2020 16:13:57 +0100
Subject: [PATCH 577/660] Simpler instrumentation for non-batched memcached
 (#3046)

Previously it would record a `Memcached.Get` operation with a nested
`Memcached.GetMulti`; since they are in one-to-one correspondance this is
pointless.

Skip the outer data collection for the non-batched case, and rename the
batched one to `Memcached.GetBatched` which is more descriptive.

These names are used for metric labels and for tracing span names.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 cache/memcached.go | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index 9eb07e0354fdc..8b89d3e1fe5cc 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -132,12 +132,11 @@ func memcacheStatusCode(err error) string {
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *Memcached) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	_ = instr.CollectedRequest(ctx, "Memcache.Get", c.requestDuration, memcacheStatusCode, func(ctx context.Context) error {
-		if c.cfg.BatchSize == 0 {
-			found, bufs, missed = c.fetch(ctx, keys)
-			return nil
-		}
-
+	if c.cfg.BatchSize == 0 {
+		found, bufs, missed = c.fetch(ctx, keys)
+		return
+	}
+	_ = instr.CollectedRequest(ctx, "Memcache.GetBatched", c.requestDuration, memcacheStatusCode, func(ctx context.Context) error {
 		found, bufs, missed = c.fetchKeysBatched(ctx, keys)
 		return nil
 	})

From a98cfa42090ef16c0923e0fc2634ef5184a4fa3b Mon Sep 17 00:00:00 2001
From: Goutham Veeramachaneni <gouthamve@gmail.com>
Date: Tue, 25 Aug 2020 11:58:12 +0200
Subject: [PATCH 578/660] Move regex opt to after lookup (#2973)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Move regex opt to after lookup

fixes #2906

When we use the caching client (which is what is used in most cases), we
load the entire row (tableName+HashKey) irrespective of what the
rangeKey parameters are. Which means with the optimisation, we are
loading the same row multiple times and then operating on the same data.
This PR moves the optimisation to after the data is loaded which should
be faster.

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add benchmark

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Add changelog entry

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

* Address feedback

Signed-off-by: Goutham Veeramachaneni <gouthamve@gmail.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 chunk_store.go      |  31 ++++---
 chunk_store_test.go | 205 ++++++--------------------------------------
 2 files changed, 45 insertions(+), 191 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 58d37698a6ece..6fa8ea9de5e66 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -460,16 +460,6 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	} else if matcher.Type == labels.MatchEqual {
 		labelName = matcher.Name
 		queries, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, matcher.Value)
-	} else if matcher.Type == labels.MatchRegexp && len(FindSetMatches(matcher.Value)) > 0 {
-		set := FindSetMatches(matcher.Value)
-		for _, v := range set {
-			var qs []IndexQuery
-			qs, err = c.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, matcher.Name, v)
-			if err != nil {
-				break
-			}
-			queries = append(queries, qs...)
-		}
 	} else {
 		labelName = matcher.Name
 		queries, err = c.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, matcher.Name)
@@ -550,6 +540,14 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
 		return nil, nil
 	}
 
+	matchSet := map[string]struct{}{}
+	if matcher != nil && matcher.Type == labels.MatchRegexp {
+		set := FindSetMatches(matcher.Value)
+		for _, v := range set {
+			matchSet[v] = struct{}{}
+		}
+	}
+
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
 		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
@@ -557,6 +555,19 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
 			return nil, err
 		}
 
+		// If the matcher is like a set (=~"a|b|c|d|...") and
+		// the label value is not in that set move on.
+		if len(matchSet) > 0 {
+			if _, ok := matchSet[string(labelValue)]; !ok {
+				continue
+			}
+
+			// If its in the set, then add it to set, we don't need to run
+			// matcher on it again.
+			result = append(result, chunkKey)
+			continue
+		}
+
 		if matcher != nil && !matcher.Matches(string(labelValue)) {
 			continue
 		}
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 09fb7846a7c74..1bab30c7a9848 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -5,8 +5,6 @@ import (
 	"fmt"
 	"math/rand"
 	"reflect"
-	"sort"
-	"sync"
 	"testing"
 	"time"
 
@@ -23,7 +21,6 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -507,6 +504,10 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 			`foo{bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
 		},
+		{
+			`foo{bar=~"beeping|baz"}`,
+			[]Chunk{chunk1},
+		},
 		{
 			`foo{toms="code", bar=~"beep|baz"}`,
 			[]Chunk{chunk1, chunk2},
@@ -546,177 +547,6 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	}
 }
 
-// TestChunkStore_verifyRegexSetOptimizations tests if chunks are fetched correctly when we have the metric name
-func TestChunkStore_verifyRegexSetOptimizations(t *testing.T) {
-	ctx := context.Background()
-	now := model.Now()
-
-	testCases := []struct {
-		query  string
-		expect []string
-	}{
-		{
-			`foo`,
-			[]string{"foo"},
-		},
-		{
-			`foo{bar="baz"}`,
-			[]string{"foo{bar=\"baz\"}"},
-		},
-		{
-			`foo{bar!="baz"}`,
-			[]string{"foo"},
-		},
-		{
-			`foo{toms="code", bar="beep"}`,
-			[]string{"foo{bar=\"beep\"}", "foo{toms=\"code\"}"},
-		},
-		{
-			`foo{bar=~"beep"}`,
-			[]string{"foo{bar=\"beep\"}"},
-		},
-		{
-			`foo{bar=~"beep|baz"}`,
-			[]string{"foo{bar=\"baz\"}", "foo{bar=\"beep\"}"},
-		},
-		{
-			`foo{toms="code", bar=~"beep|baz"}`,
-			[]string{"foo{bar=\"baz\"}", "foo{bar=\"beep\"}", "foo{toms=\"code\"}"},
-		},
-		{
-			`foo{bar=~".+"}`,
-			[]string{"foo{bar}"},
-		},
-	}
-
-	for _, schema := range schemas {
-		var storeCfg StoreConfig
-		flagext.DefaultValues(&storeCfg)
-
-		schemaCfg := DefaultSchemaConfig("", schema, 0)
-		schemaObj, err := schemaCfg.Configs[0].CreateSchema()
-		require.NoError(t, err)
-
-		var mockSchema = &mockBaseSchema{schema: schemaObj}
-
-		switch s := schemaObj.(type) {
-		case StoreSchema:
-			schemaObj = mockStoreSchema{mockBaseSchema: mockSchema, schema: s}
-		case SeriesStoreSchema:
-			schemaObj = mockSeriesStoreSchema{mockBaseSchema: mockSchema, schema: s}
-		}
-
-		store := newTestChunkStoreConfigWithMockStorage(t, schemaCfg, schemaObj, storeCfg)
-		defer store.Stop()
-
-		from := now.Add(-time.Hour)
-		through := now
-
-		for _, tc := range testCases {
-			t.Run(fmt.Sprintf("%s / %s", tc.query, schema), func(t *testing.T) {
-				// reset queries for test
-				mockSchema.resetQueries()
-
-				t.Log("========= Running query", tc.query, "with schema", schema)
-				matchers, err := parser.ParseMetricSelector(tc.query)
-				if err != nil {
-					t.Fatal(err)
-				}
-
-				_, err = store.Get(ctx, userID, from, through, matchers...)
-				require.NoError(t, err)
-
-				qs := mockSchema.getQueries()
-				sort.Strings(qs)
-
-				if !reflect.DeepEqual(tc.expect, qs) {
-					t.Fatalf("%s: wrong queries - %s", tc.query, test.Diff(tc.expect, qs))
-				}
-			})
-		}
-	}
-}
-
-type mockBaseSchema struct {
-	schema BaseSchema
-
-	mu      sync.Mutex
-	queries []string
-}
-
-func (m *mockBaseSchema) getQueries() []string {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	return m.queries
-}
-
-func (m *mockBaseSchema) resetQueries() {
-	m.mu.Lock()
-	defer m.mu.Unlock()
-	m.queries = nil
-}
-
-func (m *mockBaseSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
-	m.mu.Lock()
-	m.queries = append(m.queries, metricName)
-	m.mu.Unlock()
-
-	return m.schema.GetReadQueriesForMetric(from, through, userID, metricName)
-}
-
-func (m *mockBaseSchema) GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error) {
-	m.mu.Lock()
-	m.queries = append(m.queries, fmt.Sprintf("%s{%s}", metricName, labelName))
-	m.mu.Unlock()
-
-	return m.schema.GetReadQueriesForMetricLabel(from, through, userID, metricName, labelName)
-}
-
-func (m *mockBaseSchema) GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
-	m.mu.Lock()
-	m.queries = append(m.queries, fmt.Sprintf("%s{%s=%q}", metricName, labelName, labelValue))
-	m.mu.Unlock()
-	return m.schema.GetReadQueriesForMetricLabelValue(from, through, userID, metricName, labelName, labelValue)
-}
-
-func (m *mockBaseSchema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
-	return m.schema.FilterReadQueries(queries, shard)
-}
-
-type mockStoreSchema struct {
-	*mockBaseSchema
-	schema StoreSchema
-}
-
-func (m mockStoreSchema) GetWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return m.schema.GetWriteEntries(from, through, userID, metricName, labels, chunkID)
-}
-
-type mockSeriesStoreSchema struct {
-	*mockBaseSchema
-	schema SeriesStoreSchema
-}
-
-func (m mockSeriesStoreSchema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]string, [][]IndexEntry, error) {
-	return m.schema.GetCacheKeysAndLabelWriteEntries(from, through, userID, metricName, labels, chunkID)
-}
-
-func (m mockSeriesStoreSchema) GetChunkWriteEntries(from, through model.Time, userID string, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	return m.schema.GetChunkWriteEntries(from, through, userID, metricName, labels, chunkID)
-}
-
-func (m mockSeriesStoreSchema) GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	return m.schema.GetChunksForSeries(from, through, userID, seriesID)
-}
-
-func (m mockSeriesStoreSchema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) {
-	return m.schema.GetLabelNamesForSeries(from, through, userID, seriesID)
-}
-
-func (m mockSeriesStoreSchema) GetSeriesDeleteEntries(from, through model.Time, userID string, metric labels.Labels, hasChunksForIntervalFunc hasChunksForIntervalFunc) ([]IndexEntry, error) {
-	return m.schema.GetSeriesDeleteEntries(from, through, userID, metric, hasChunksForIntervalFunc)
-}
-
 func mustNewLabelMatcher(matchType labels.MatchType, name string, value string) *labels.Matcher {
 	return labels.MustNewMatcher(matchType, name, value)
 }
@@ -1006,13 +836,13 @@ func TestStoreMaxLookBack(t *testing.T) {
 	require.Equal(t, now, chunks[0].Through)
 }
 
-func benchmarkParseIndexEntries(i int64, b *testing.B) {
+func benchmarkParseIndexEntries(i int64, regex string, b *testing.B) {
 	b.ReportAllocs()
 	b.StopTimer()
 	store := &store{}
 	ctx := context.Background()
 	entries := generateIndexEntries(i)
-	matcher, err := labels.NewMatcher(labels.MatchRegexp, "", ".*")
+	matcher, err := labels.NewMatcher(labels.MatchRegexp, "", regex)
 	if err != nil {
 		b.Fatal(err)
 	}
@@ -1022,16 +852,29 @@ func benchmarkParseIndexEntries(i int64, b *testing.B) {
 		if err != nil {
 			b.Fatal(err)
 		}
-		if len(keys) != len(entries)/2 {
+		if regex == ".*" && len(keys) != len(entries)/2 {
 			b.Fatalf("expected keys:%d got:%d", len(entries)/2, len(keys))
 		}
 	}
 }
 
-func BenchmarkParseIndexEntries500(b *testing.B)   { benchmarkParseIndexEntries(500, b) }
-func BenchmarkParseIndexEntries2500(b *testing.B)  { benchmarkParseIndexEntries(2500, b) }
-func BenchmarkParseIndexEntries10000(b *testing.B) { benchmarkParseIndexEntries(10000, b) }
-func BenchmarkParseIndexEntries50000(b *testing.B) { benchmarkParseIndexEntries(50000, b) }
+func BenchmarkParseIndexEntries500(b *testing.B)   { benchmarkParseIndexEntries(500, ".*", b) }
+func BenchmarkParseIndexEntries2500(b *testing.B)  { benchmarkParseIndexEntries(2500, ".*", b) }
+func BenchmarkParseIndexEntries10000(b *testing.B) { benchmarkParseIndexEntries(10000, ".*", b) }
+func BenchmarkParseIndexEntries50000(b *testing.B) { benchmarkParseIndexEntries(50000, ".*", b) }
+
+func BenchmarkParseIndexEntriesRegexSet500(b *testing.B) {
+	benchmarkParseIndexEntries(500, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
+}
+func BenchmarkParseIndexEntriesRegexSet2500(b *testing.B) {
+	benchmarkParseIndexEntries(2500, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
+}
+func BenchmarkParseIndexEntriesRegexSet10000(b *testing.B) {
+	benchmarkParseIndexEntries(10000, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
+}
+func BenchmarkParseIndexEntriesRegexSet50000(b *testing.B) {
+	benchmarkParseIndexEntries(50000, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
+}
 
 func generateIndexEntries(n int64) []IndexEntry {
 	res := make([]IndexEntry, 0, n)

From d283e6f323d2faf3cab0dc75861def8075c46e8c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Tue, 25 Aug 2020 14:19:26 +0100
Subject: [PATCH 579/660] Add a circuit-breaker to memcached dial calls (#3051)

* Add a circuit-breaker to memcached dial calls

If the server is unavailable, this will avoid Cortex hammering it with
new connection requests and flooding the log.

We use a 3rd-party fork of the gomemcached library so we can set a
custom dialer function - see https://github.com/bradfitz/gomemcache/pull/86
(this branch is slightly behind the version we used before, but only
in code we don't call)

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>
---
 cache/memcached_client.go | 66 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 6a0b52a0ff5cd..9d456fd652c2d 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -16,6 +16,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
+	"github.com/sony/gobreaker"
 	"github.com/thanos-io/thanos/pkg/discovery/dns"
 
 	"github.com/cortexproject/cortex/pkg/util"
@@ -35,6 +36,8 @@ type serverSelector interface {
 // memcachedClient is a memcache client that gets its server list from SRV
 // records, and periodically updates that ServerList.
 type memcachedClient struct {
+	sync.Mutex
+	name string
 	*memcache.Client
 	serverList serverSelector
 
@@ -44,6 +47,11 @@ type memcachedClient struct {
 	addresses []string
 	provider  *dns.Provider
 
+	cbs        map[ /*address*/ string]*gobreaker.CircuitBreaker
+	cbFailures uint
+	cbTimeout  time.Duration
+	cbInterval time.Duration
+
 	quit chan struct{}
 	wait sync.WaitGroup
 
@@ -61,6 +69,9 @@ type MemcachedClientConfig struct {
 	MaxIdleConns   int           `yaml:"max_idle_conns"`
 	UpdateInterval time.Duration `yaml:"update_interval"`
 	ConsistentHash bool          `yaml:"consistent_hash"`
+	CBFailures     uint          `yaml:"circuit_breaker_consecutive_failures"`
+	CBTimeout      time.Duration `yaml:"circuit_breaker_timeout"`  // reset error count after this long
+	CBInterval     time.Duration `yaml:"circuit_breaker_interval"` // remain closed for this long after CBFailures errors
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -72,6 +83,9 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
+	f.UintVar(&cfg.CBFailures, prefix+"memcached.cb.failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
+	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.cb.timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
+	f.DurationVar(&cfg.CBInterval, prefix+"memcached.cb.interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
@@ -93,12 +107,17 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 	}, r))
 
 	newClient := &memcachedClient{
+		name:       name,
 		Client:     client,
 		serverList: selector,
 		hostname:   cfg.Host,
 		service:    cfg.Service,
 		logger:     logger,
 		provider:   dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
+		cbs:        make(map[string]*gobreaker.CircuitBreaker),
+		cbFailures: cfg.CBFailures,
+		cbInterval: cfg.CBInterval,
+		cbTimeout:  cfg.CBTimeout,
 		quit:       make(chan struct{}),
 
 		numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
@@ -108,6 +127,9 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 			ConstLabels: prometheus.Labels{"name": name},
 		}),
 	}
+	if cfg.CBFailures > 0 {
+		newClient.Client.DialTimeout = newClient.dialViaCircuitBreaker
+	}
 
 	if len(cfg.Addresses) > 0 {
 		util.WarnExperimentalUse("DNS-based memcached service discovery")
@@ -124,6 +146,36 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 	return newClient
 }
 
+func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
+	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to)
+}
+
+func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {
+	c.Lock()
+	cb := c.cbs[address]
+	if cb == nil {
+		cb = gobreaker.NewCircuitBreaker(gobreaker.Settings{
+			Name:          c.name + ":" + address,
+			Interval:      c.cbInterval,
+			Timeout:       c.cbTimeout,
+			OnStateChange: c.circuitBreakerStateChange,
+			ReadyToTrip: func(counts gobreaker.Counts) bool {
+				return uint(counts.ConsecutiveFailures) > c.cbFailures
+			},
+		})
+		c.cbs[address] = cb
+	}
+	c.Unlock()
+
+	conn, err := cb.Execute(func() (interface{}, error) {
+		return net.DialTimeout(network, address, timeout)
+	})
+	if err != nil {
+		return nil, err
+	}
+	return conn.(net.Conn), nil
+}
+
 // Stop the memcache client.
 func (c *memcachedClient) Stop() {
 	close(c.quit)
@@ -186,6 +238,20 @@ func (c *memcachedClient) updateMemcacheServers() error {
 		}
 	}
 
+	if len(servers) > 0 {
+		// Copy across circuit-breakers for current set of addresses, thus
+		// leaving behind any for servers we won't talk to again
+		c.Lock()
+		newCBs := make(map[string]*gobreaker.CircuitBreaker, len(servers))
+		for _, address := range servers {
+			if cb, exists := c.cbs[address]; exists {
+				newCBs[address] = cb
+			}
+		}
+		c.cbs = newCBs
+		c.Unlock()
+	}
+
 	// ServerList deterministically maps keys to _index_ of the server list.
 	// Since DNS returns records in different order each time, we sort to
 	// guarantee best possible match between nodes.

From ed9d60609f34da7b929c15a2a1d43bd832cca293 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 26 Aug 2020 17:49:15 +0200
Subject: [PATCH 580/660] Fixed CLI flags for memcached circuit breaker (#3086)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cache/memcached_client.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 9d456fd652c2d..231ab18f7bb20 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -83,9 +83,9 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
-	f.UintVar(&cfg.CBFailures, prefix+"memcached.cb.failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
-	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.cb.timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
-	f.DurationVar(&cfg.CBInterval, prefix+"memcached.cb.interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
+	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
+	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
+	f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list

From 18d70e00aba0e7f0f825503a8c96d7e632e2ef2c Mon Sep 17 00:00:00 2001
From: Jay Batra <jaybatra73@gmail.com>
Date: Fri, 28 Aug 2020 18:05:18 +0530
Subject: [PATCH 581/660] Use custom bucket size for indexEntriesPerChunk
 (#3021)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Use custom bucket size for indexEntriesPerChunk

Currently, the variable registers a new Histogram using
default bucket sizes that have floating size as well{0.01, 0.5, 1}.
Since this variable measures quantities which is always a whole number,
the default buckets go unused and are not necessarily required.
This PR uses custom bucket for variable indexEntriesPerChunk with
sizes {1,2,5}.

Signed-off-by: Jay Batra <jaybatra73@gmail.com>

* Update CHANGELOG

Signed-off-by: Jay Batra <jaybatra73@gmail.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 series_store.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/series_store.go b/series_store.go
index 0711954846215..18ceab0915cea 100644
--- a/series_store.go
+++ b/series_store.go
@@ -36,7 +36,7 @@ var (
 		Namespace: "cortex",
 		Name:      "chunk_store_index_lookups_per_query",
 		Help:      "Distribution of #index lookups per query.",
-		Buckets:   prometheus.DefBuckets,
+		Buckets:   prometheus.ExponentialBuckets(1, 2, 5),
 	})
 	preIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
 		Namespace: "cortex",

From be7cd8e8e500b4f9bcee33ed75392e57c5bd16da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 2 Sep 2020 10:57:58 +0200
Subject: [PATCH 582/660] Blocksconvert suite of tools for converting existing
 chunks data to blocks (#3092)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 gcp/bigtable_index_client.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 7ea6bd0784e8e..6bcd8d23085b8 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -116,7 +116,7 @@ func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client
 			// We hash the row key and prepend it back to the key for better distribution.
 			// We preserve the existing key to make migrations and o11y easier.
 			if cfg.DistributeKeys {
-				hashValue = hashPrefix(hashValue) + "-" + hashValue
+				hashValue = HashPrefix(hashValue) + "-" + hashValue
 			}
 
 			return hashValue, string(rangeValue)
@@ -124,9 +124,9 @@ func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client
 	}
 }
 
-// hashPrefix calculates a 64bit hash of the input string and hex-encodes
+// HashPrefix calculates a 64bit hash of the input string and hex-encodes
 // the result, taking care to zero pad etc.
-func hashPrefix(input string) string {
+func HashPrefix(input string) string {
 	prefix := hashAdd(hashNew(), input)
 	var encodedUint64 [8]byte
 	binary.LittleEndian.PutUint64(encodedUint64[:], prefix)

From c6639771b5e02477e8c09d6798ad8714a6c09d1c Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 3 Sep 2020 16:42:02 +0530
Subject: [PATCH 583/660] retry processing of failed delete requests (#2926)

* retry processing of failed delete requests

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* fix tests by adding a mode to mockstorage

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add changelog entry

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 inmemory_storage_client.go |  52 +++++++++++++
 objectclient/client.go     |   6 +-
 purger/purger.go           | 150 ++++++++++++++++++++++++++++---------
 purger/purger_test.go      |  74 ++++++++++++++++++
 testutils/testutils.go     |  12 ++-
 5 files changed, 255 insertions(+), 39 deletions(-)

diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index fb420b0215964..8ec2397e8bc5f 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -3,6 +3,7 @@ package chunk
 import (
 	"bytes"
 	"context"
+	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -14,6 +15,16 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
+type MockStorageMode int
+
+var errPermissionDenied = errors.New("permission denied")
+
+const (
+	MockStorageModeReadWrite = 0
+	MockStorageModeReadOnly  = 1
+	MockStorageModeWriteOnly = 2
+)
+
 // MockStorage is a fake in-memory StorageClient.
 type MockStorage struct {
 	mtx     sync.RWMutex
@@ -22,6 +33,7 @@ type MockStorage struct {
 
 	numIndexWrites int
 	numChunkWrites int
+	mode           MockStorageMode
 }
 
 type mockTable struct {
@@ -46,6 +58,10 @@ func NewMockStorage() *MockStorage {
 func (*MockStorage) Stop() {
 }
 
+func (m *MockStorage) SetMode(mode MockStorageMode) {
+	m.mode = mode
+}
+
 // ListTables implements StorageClient.
 func (m *MockStorage) ListTables(_ context.Context) ([]string, error) {
 	m.mtx.RLock()
@@ -135,6 +151,10 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	if m.mode == MockStorageModeReadOnly {
+		return errPermissionDenied
+	}
+
 	mockBatch := *batch.(*mockWriteBatch)
 	seenWrites := map[string]bool{}
 
@@ -209,6 +229,10 @@ func (m *MockStorage) QueryPages(ctx context.Context, queries []IndexQuery, call
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	if m.mode == MockStorageModeWriteOnly {
+		return errPermissionDenied
+	}
+
 	for _, query := range queries {
 		err := m.query(ctx, query, func(b ReadBatch) bool {
 			return callback(query, b)
@@ -302,6 +326,10 @@ func (m *MockStorage) PutChunks(_ context.Context, chunks []Chunk) error {
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	if m.mode == MockStorageModeReadOnly {
+		return errPermissionDenied
+	}
+
 	m.numChunkWrites += len(chunks)
 
 	for i := range chunks {
@@ -319,6 +347,10 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	if m.mode == MockStorageModeWriteOnly {
+		return nil, errPermissionDenied
+	}
+
 	decodeContext := NewDecodeContext()
 	result := []Chunk{}
 	for _, chunk := range chunkSet {
@@ -337,6 +369,10 @@ func (m *MockStorage) GetChunks(ctx context.Context, chunkSet []Chunk) ([]Chunk,
 
 // DeleteChunk implements StorageClient.
 func (m *MockStorage) DeleteChunk(ctx context.Context, userID, chunkID string) error {
+	if m.mode == MockStorageModeReadOnly {
+		return errPermissionDenied
+	}
+
 	return m.DeleteObject(ctx, chunkID)
 }
 
@@ -344,6 +380,10 @@ func (m *MockStorage) GetObject(ctx context.Context, objectKey string) (io.ReadC
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	if m.mode == MockStorageModeWriteOnly {
+		return nil, errPermissionDenied
+	}
+
 	buf, ok := m.objects[objectKey]
 	if !ok {
 		return nil, ErrStorageObjectNotFound
@@ -358,6 +398,10 @@ func (m *MockStorage) PutObject(ctx context.Context, objectKey string, object io
 		return err
 	}
 
+	if m.mode == MockStorageModeReadOnly {
+		return errPermissionDenied
+	}
+
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
@@ -369,6 +413,10 @@ func (m *MockStorage) DeleteObject(ctx context.Context, objectKey string) error
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	if m.mode == MockStorageModeReadOnly {
+		return errPermissionDenied
+	}
+
 	if _, ok := m.objects[objectKey]; !ok {
 		return ErrStorageObjectNotFound
 	}
@@ -381,6 +429,10 @@ func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject,
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
+	if m.mode == MockStorageModeWriteOnly {
+		return nil, nil, errPermissionDenied
+	}
+
 	storageObjects := make([]StorageObject, 0, len(m.objects))
 	for key := range m.objects {
 		// ToDo: Store mtime when we have mtime based use-cases for storage objects
diff --git a/objectclient/client.go b/objectclient/client.go
index a3d3e24d262fb..9f7b4a1152be7 100644
--- a/objectclient/client.go
+++ b/objectclient/client.go
@@ -111,5 +111,9 @@ func (o *Client) getChunk(ctx context.Context, decodeContext *chunk.DecodeContex
 
 // GetChunks retrieves the specified chunks from the configured backend
 func (o *Client) DeleteChunk(ctx context.Context, userID, chunkID string) error {
-	return o.store.DeleteObject(ctx, chunkID)
+	key := chunkID
+	if o.keyEncoder != nil {
+		key = o.keyEncoder(key)
+	}
+	return o.store.DeleteObject(ctx, key)
 }
diff --git a/purger/purger.go b/purger/purger.go
index 5d2df4a4f60da..8f419e1f59f87 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -12,6 +12,7 @@ import (
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/gogo/protobuf/proto"
+	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
@@ -26,9 +27,11 @@ import (
 )
 
 const (
-	millisecondPerDay = int64(24 * time.Hour / time.Millisecond)
-	statusSuccess     = "success"
-	statusFail        = "fail"
+	millisecondPerDay           = int64(24 * time.Hour / time.Millisecond)
+	statusSuccess               = "success"
+	statusFail                  = "fail"
+	loadRequestsInterval        = time.Hour
+	retryFailedRequestsInterval = 15 * time.Minute
 )
 
 type purgerMetrics struct {
@@ -186,15 +189,20 @@ func (p *Purger) loop(ctx context.Context) error {
 	// load requests on startup instead of waiting for first ticker
 	loadRequests()
 
-	loadRequestsTicker := time.NewTicker(time.Hour)
+	loadRequestsTicker := time.NewTicker(loadRequestsInterval)
 	defer loadRequestsTicker.Stop()
 
+	retryFailedRequestsTicker := time.NewTicker(retryFailedRequestsInterval)
+	defer retryFailedRequestsTicker.Stop()
+
 	for {
 		select {
 		case <-loadRequestsTicker.C:
 			loadRequests()
 		case <-p.pullNewRequestsChan:
 			loadRequests()
+		case <-retryFailedRequestsTicker.C:
+			p.retryFailedRequests()
 		case <-ctx.Done():
 			return nil
 		}
@@ -207,6 +215,25 @@ func (p *Purger) stop(_ error) error {
 	return nil
 }
 
+func (p *Purger) retryFailedRequests() {
+	userIDsWithFailedRequest := p.inProcessRequests.listUsersWithFailedRequest()
+
+	for _, userID := range userIDsWithFailedRequest {
+		deleteRequest := p.inProcessRequests.get(userID)
+		if deleteRequest == nil {
+			level.Error(util.Logger).Log("msg", "expected an in-process delete request", "user", userID)
+			continue
+		}
+
+		p.inProcessRequests.unsetFailedRequestForUser(userID)
+		err := p.resumeStalledRequest(*deleteRequest)
+		if err != nil {
+			reqWithLogger := makeDeleteRequestWithLogger(*deleteRequest, util.Logger)
+			level.Error(reqWithLogger.logger).Log("msg", "failed to resume failed request", "err", err)
+		}
+	}
+}
+
 func (p *Purger) workerJobCleanup(job workerJob) {
 	err := p.removeDeletePlan(context.Background(), job.userID, job.deleteRequestID, job.planNo)
 	if err != nil {
@@ -296,9 +323,15 @@ func (p *Purger) worker() {
 	}
 }
 
-func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Logger) error {
+func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Logger) (err error) {
 	logger = log.With(logger, "plan_no", planNo)
 
+	defer func() {
+		if err != nil {
+			p.inProcessRequests.setFailedRequestForUser(userID)
+		}
+	}()
+
 	plan, err := p.getDeletePlan(context.Background(), userID, requestID, planNo)
 	if err != nil {
 		if err == chunk.ErrStorageObjectNotFound {
@@ -354,45 +387,54 @@ func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Lo
 
 	level.Info(logger).Log("msg", "finished execution of plan")
 
-	return nil
+	return
 }
 
 // we need to load all in process delete requests on startup to finish them first
 func (p *Purger) loadInprocessDeleteRequests() error {
-	requestsWithBuildingPlanStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan)
+	inprocessRequests, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusBuildingPlan)
 	if err != nil {
 		return err
 	}
 
-	for i := range requestsWithBuildingPlanStatus {
-		deleteRequest := requestsWithBuildingPlanStatus[i]
-		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+	requestsWithDeletingStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting)
+	if err != nil {
+		return err
+	}
+
+	inprocessRequests = append(inprocessRequests, requestsWithDeletingStatus...)
+
+	for i := range inprocessRequests {
+		deleteRequest := inprocessRequests[i]
 		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
+		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 
-		level.Info(req.logger).Log("msg", "loaded in process delete requests with status building plan")
+		level.Info(req.logger).Log("msg", "resuming in process delete requests", "status", deleteRequest.Status)
+		err = p.resumeStalledRequest(deleteRequest)
+		if err != nil {
+			level.Error(req.logger).Log("msg", "failed to resume stalled request", "err", err)
+		}
+
+	}
 
+	return nil
+}
+
+func (p *Purger) resumeStalledRequest(deleteRequest DeleteRequest) error {
+	req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+
+	if deleteRequest.Status == StatusBuildingPlan {
 		err := p.buildDeletePlan(req)
 		if err != nil {
 			p.metrics.deleteRequestsProcessingFailures.WithLabelValues(deleteRequest.UserID).Inc()
-			level.Error(req.logger).Log("msg", "error building delete plan", "err", err)
-			continue
+			return errors.Wrap(err, "failed to build delete plan")
 		}
 
-		level.Info(req.logger).Log("msg", "sending delete request for execution")
-		p.executePlansChan <- req
+		deleteRequest.Status = StatusDeleting
 	}
 
-	requestsWithDeletingStatus, err := p.deleteStore.GetDeleteRequestsByStatus(context.Background(), StatusDeleting)
-	if err != nil {
-		return err
-	}
-
-	for i := range requestsWithDeletingStatus {
-		deleteRequest := requestsWithDeletingStatus[i]
-		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
-		level.Info(req.logger).Log("msg", "loaded in process delete requests with status deleting")
-
-		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
+	if deleteRequest.Status == StatusDeleting {
+		level.Info(req.logger).Log("msg", "sending delete request for execution")
 		p.executePlansChan <- req
 	}
 
@@ -448,6 +490,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 			return err
 		}
 
+		deleteRequest.Status = StatusBuildingPlan
 		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
 		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
 
@@ -483,10 +526,19 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 // A days plan will include chunk ids and labels of all the chunks which are supposed to be deleted.
 // Chunks are grouped together by labels to avoid storing labels repetitively.
 // After building delete plans it updates status of delete request to StatusDeleting and sends it for execution
-func (p *Purger) buildDeletePlan(req deleteRequestWithLogger) error {
+func (p *Purger) buildDeletePlan(req deleteRequestWithLogger) (err error) {
 	ctx := context.Background()
 	ctx = user.InjectOrgID(ctx, req.UserID)
 
+	defer func() {
+		if err != nil {
+			p.inProcessRequests.setFailedRequestForUser(req.UserID)
+		} else {
+			req.Status = StatusDeleting
+			p.inProcessRequests.set(req.UserID, &req.DeleteRequest)
+		}
+	}()
+
 	perDayTimeRange := splitByDay(req.StartTime, req.EndTime)
 	level.Info(req.logger).Log("msg", "building delete plan", "num_plans", len(perDayTimeRange))
 
@@ -531,21 +583,21 @@ func (p *Purger) buildDeletePlan(req deleteRequestWithLogger) error {
 		plans[i] = pb
 	}
 
-	err := p.putDeletePlans(ctx, req.UserID, req.RequestID, plans)
+	err = p.putDeletePlans(ctx, req.UserID, req.RequestID, plans)
 	if err != nil {
-		return err
+		return
 	}
 
 	err = p.deleteStore.UpdateStatus(ctx, req.UserID, req.RequestID, StatusDeleting)
 	if err != nil {
-		return err
+		return
 	}
 
 	p.metrics.deleteRequestsChunksSelectedTotal.WithLabelValues(req.UserID).Add(float64(len(includedChunkIDs)))
 
 	level.Info(req.logger).Log("msg", "built delete plans", "num_plans", len(perDayTimeRange))
 
-	return nil
+	return
 }
 
 func (p *Purger) putDeletePlans(ctx context.Context, userID, requestID string, plans [][]byte) error {
@@ -695,12 +747,16 @@ func makeDeleteRequestWithLogger(deleteRequest DeleteRequest, l log.Logger) dele
 // inProcessRequestsCollection stores DeleteRequests which are in process by each user.
 // Currently we only allow processing of one delete request per user so it stores single DeleteRequest per user.
 type inProcessRequestsCollection struct {
-	requests map[string]*DeleteRequest
-	mtx      sync.RWMutex
+	requests                map[string]*DeleteRequest
+	usersWithFailedRequests map[string]struct{}
+	mtx                     sync.RWMutex
 }
 
 func newInProcessRequestsCollection() *inProcessRequestsCollection {
-	return &inProcessRequestsCollection{requests: map[string]*DeleteRequest{}}
+	return &inProcessRequestsCollection{
+		requests:                map[string]*DeleteRequest{},
+		usersWithFailedRequests: map[string]struct{}{},
+	}
 }
 
 func (i *inProcessRequestsCollection) set(userID string, request *DeleteRequest) {
@@ -744,3 +800,29 @@ func (i *inProcessRequestsCollection) getOldest() *DeleteRequest {
 
 	return oldestRequest
 }
+
+func (i *inProcessRequestsCollection) setFailedRequestForUser(userID string) {
+	i.mtx.Lock()
+	defer i.mtx.Unlock()
+
+	i.usersWithFailedRequests[userID] = struct{}{}
+}
+
+func (i *inProcessRequestsCollection) unsetFailedRequestForUser(userID string) {
+	i.mtx.Lock()
+	defer i.mtx.Unlock()
+
+	delete(i.usersWithFailedRequests, userID)
+}
+
+func (i *inProcessRequestsCollection) listUsersWithFailedRequest() []string {
+	i.mtx.RLock()
+	defer i.mtx.RUnlock()
+
+	userIDs := make([]string, 0, len(i.usersWithFailedRequests))
+	for userID := range i.usersWithFailedRequests {
+		userIDs = append(userIDs, userID)
+	}
+
+	return userIDs
+}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 55cc10b3bb62f..567f1ad6da311 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"sort"
+	"strings"
 	"testing"
 	"time"
 
@@ -442,6 +443,79 @@ func TestPurger_Metrics(t *testing.T) {
 	require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
 }
 
+func TestPurger_retryFailedRequests(t *testing.T) {
+	// setup chunks store
+	indexMockStorage := chunk.NewMockStorage()
+	chunksMockStorage := chunk.NewMockStorage()
+
+	deleteStore := setupTestDeleteStore(t)
+	chunkStore, err := testutils.SetupTestChunkStoreWithClients(indexMockStorage, chunksMockStorage, indexMockStorage)
+	require.NoError(t, err)
+
+	// create a purger instance
+	purgerMockStorage := chunk.NewMockStorage()
+	purger, _ := setupPurger(t, deleteStore, chunkStore, purgerMockStorage)
+	require.NoError(t, services.StartAndAwaitRunning(context.Background(), purger))
+
+	defer func() {
+		require.NoError(t, services.StopAndAwaitTerminated(context.Background(), purger))
+	}()
+
+	// add some chunks
+	chunks, err := buildChunks(0, model.Time(0).Add(3*24*time.Hour), 1)
+	require.NoError(t, err)
+
+	require.NoError(t, chunkStore.Put(context.Background(), chunks))
+
+	// add a request to delete some chunks
+	err = deleteStore.addDeleteRequest(context.Background(), userID, model.Now().Add(-25*time.Hour), model.Time(0).Add(24*time.Hour),
+		model.Time(0).Add(2*24*time.Hour), []string{"foo"})
+	require.NoError(t, err)
+
+	// change purgerMockStorage to allow only reads. This would fail putting plans to the storage and hence fail build plans operation.
+	purgerMockStorage.SetMode(chunk.MockStorageModeReadOnly)
+
+	// pull requests to process and ensure that it has failed.
+	err = purger.pullDeleteRequestsToPlanDeletes()
+	require.Error(t, err)
+	require.True(t, strings.Contains(err.Error(), "permission denied"))
+
+	// there must be 1 delete request in process and the userID must be in failed requests list.
+	require.NotNil(t, purger.inProcessRequests.get(userID))
+	require.Len(t, purger.inProcessRequests.listUsersWithFailedRequest(), 1)
+
+	// now allow writes to purgerMockStorage to allow building plans to succeed.
+	purgerMockStorage.SetMode(chunk.MockStorageModeReadWrite)
+
+	// but change mode of chunksMockStorage to read only which would deny permission to delete any chunks and in turn
+	// fail to execute delete plans.
+	chunksMockStorage.SetMode(chunk.MockStorageModeReadOnly)
+
+	// retry processing of failed requests
+	purger.retryFailedRequests()
+
+	// the delete request status should now change to StatusDeleting since the building of plan should have succeeded.
+	test.Poll(t, time.Second, StatusDeleting, func() interface{} {
+		return purger.inProcessRequests.get(userID).Status
+	})
+	// the request should have failed again since we did not give permission to delete chunks.
+	test.Poll(t, time.Second, 1, func() interface{} {
+		return len(purger.inProcessRequests.listUsersWithFailedRequest())
+	})
+
+	// now allow writes to chunksMockStorage so the requests do not fail anymore.
+	chunksMockStorage.SetMode(chunk.MockStorageModeReadWrite)
+
+	// retry processing of failed requests.
+	purger.retryFailedRequests()
+	// there must be no in process requests anymore.
+	test.Poll(t, time.Second, true, func() interface{} {
+		return purger.inProcessRequests.get(userID) == nil
+	})
+	// there must be no users having failed requests.
+	require.Len(t, purger.inProcessRequests.listUsersWithFailedRequest(), 0)
+}
+
 func getNonDeletedIntervals(originalInterval, deletedInterval model.Interval) []model.Interval {
 	nonDeletedIntervals := []model.Interval{}
 	if deletedInterval.Start > originalInterval.Start {
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 02f3db1e5c74f..31583b9faa89e 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -108,14 +108,13 @@ func dummyChunkFor(from, through model.Time, metric labels.Labels) chunk.Chunk {
 	return chunk
 }
 
-func SetupTestChunkStore() (chunk.Store, error) {
+func SetupTestChunkStoreWithClients(indexClient chunk.IndexClient, chunksClient chunk.Client, tableClient chunk.TableClient) (chunk.Store, error) {
 	var (
 		tbmConfig chunk.TableManagerConfig
 		schemaCfg = chunk.DefaultSchemaConfig("", "v10", 0)
 	)
 	flagext.DefaultValues(&tbmConfig)
-	storage := chunk.NewMockStorage()
-	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, storage, nil, nil, nil)
+	tableManager, err := chunk.NewTableManager(tbmConfig, schemaCfg, 12*time.Hour, tableClient, nil, nil, nil)
 	if err != nil {
 		return nil, err
 	}
@@ -137,7 +136,7 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	flagext.DefaultValues(&storeCfg)
 
 	store := chunk.NewCompositeStore(nil)
-	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], storage, storage, overrides, cache.NewNoopCache(), cache.NewNoopCache())
+	err = store.AddPeriod(storeCfg, schemaCfg.Configs[0], indexClient, chunksClient, overrides, cache.NewNoopCache(), cache.NewNoopCache())
 	if err != nil {
 		return nil, err
 	}
@@ -145,6 +144,11 @@ func SetupTestChunkStore() (chunk.Store, error) {
 	return store, nil
 }
 
+func SetupTestChunkStore() (chunk.Store, error) {
+	storage := chunk.NewMockStorage()
+	return SetupTestChunkStoreWithClients(storage, storage, storage)
+}
+
 func SetupTestObjectStore() (chunk.ObjectClient, error) {
 	return chunk.NewMockStorage(), nil
 }

From 04be3f9d51c9f402e5c406d4294e1551dbc02a5f Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 14 Sep 2020 10:52:52 +0200
Subject: [PATCH 584/660] Honor configured Cassandra consistency when creating
 the keyspace (#3105)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 cassandra/storage_client.go      | 14 ++---
 cassandra/storage_client_test.go | 88 ++++++++++++++++++++++----------
 2 files changed, 67 insertions(+), 35 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 0c9c248790790..26129af855dd9 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -90,15 +90,9 @@ func (cfg *Config) Validate() error {
 }
 
 func (cfg *Config) session(name string, reg prometheus.Registerer) (*gocql.Session, error) {
-	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
-	if err != nil {
-		return nil, errors.WithStack(err)
-	}
-
 	cluster := gocql.NewCluster(strings.Split(cfg.Addresses, ",")...)
 	cluster.Port = cfg.Port
 	cluster.Keyspace = cfg.Keyspace
-	cluster.Consistency = consistency
 	cluster.BatchObserver = observer{}
 	cluster.QueryObserver = observer{}
 	cluster.Timeout = cfg.Timeout
@@ -118,7 +112,7 @@ func (cfg *Config) session(name string, reg prometheus.Registerer) (*gocql.Sessi
 	if !cfg.ConvictHosts {
 		cluster.ConvictionPolicy = noopConvictionPolicy{}
 	}
-	if err = cfg.setClusterConfig(cluster); err != nil {
+	if err := cfg.setClusterConfig(cluster); err != nil {
 		return nil, errors.WithStack(err)
 	}
 
@@ -141,6 +135,12 @@ func (cfg *Config) session(name string, reg prometheus.Registerer) (*gocql.Sessi
 
 // apply config settings to a cassandra ClusterConfig
 func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
+	consistency, err := gocql.ParseConsistencyWrapper(cfg.Consistency)
+	if err != nil {
+		return errors.Wrap(err, "unable to parse the configured consistency")
+	}
+
+	cluster.Consistency = consistency
 	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
 
 	if cfg.SSL {
diff --git a/cassandra/storage_client_test.go b/cassandra/storage_client_test.go
index 83c4ed1718943..0ef25791fba0b 100644
--- a/cassandra/storage_client_test.go
+++ b/cassandra/storage_client_test.go
@@ -11,26 +11,24 @@ import (
 )
 
 func TestConfig_setClusterConfig_noAuth(t *testing.T) {
-	cqlCfg := gocql.NewCluster()
-	cfg := Config{
-		Auth: false,
-	}
+	cfg := defaultConfig()
+	cfg.Auth = false
 	require.NoError(t, cfg.Validate())
 
+	cqlCfg := gocql.NewCluster()
 	err := cfg.setClusterConfig(cqlCfg)
 	require.NoError(t, err)
 	assert.Nil(t, cqlCfg.Authenticator)
 }
 
 func TestConfig_setClusterConfig_authWithPassword(t *testing.T) {
-	cqlCfg := gocql.NewCluster()
-	cfg := Config{
-		Auth:     true,
-		Username: "user",
-		Password: flagext.Secret{Value: "pass"},
-	}
+	cfg := defaultConfig()
+	cfg.Auth = true
+	cfg.Username = "user"
+	cfg.Password = flagext.Secret{Value: "pass"}
 	require.NoError(t, cfg.Validate())
 
+	cqlCfg := gocql.NewCluster()
 	err := cfg.setClusterConfig(cqlCfg)
 	require.NoError(t, err)
 	assert.NotNil(t, cqlCfg.Authenticator)
@@ -39,14 +37,13 @@ func TestConfig_setClusterConfig_authWithPassword(t *testing.T) {
 }
 
 func TestConfig_setClusterConfig_authWithPasswordFile_withoutTrailingNewline(t *testing.T) {
-	cqlCfg := gocql.NewCluster()
-	cfg := Config{
-		Auth:         true,
-		Username:     "user",
-		PasswordFile: "testdata/password-without-trailing-newline.txt",
-	}
+	cfg := defaultConfig()
+	cfg.Auth = true
+	cfg.Username = "user"
+	cfg.PasswordFile = "testdata/password-without-trailing-newline.txt"
 	require.NoError(t, cfg.Validate())
 
+	cqlCfg := gocql.NewCluster()
 	err := cfg.setClusterConfig(cqlCfg)
 	require.NoError(t, err)
 	assert.NotNil(t, cqlCfg.Authenticator)
@@ -55,14 +52,13 @@ func TestConfig_setClusterConfig_authWithPasswordFile_withoutTrailingNewline(t *
 }
 
 func TestConfig_setClusterConfig_authWithPasswordFile_withTrailingNewline(t *testing.T) {
-	cqlCfg := gocql.NewCluster()
-	cfg := Config{
-		Auth:         true,
-		Username:     "user",
-		PasswordFile: "testdata/password-with-trailing-newline.txt",
-	}
+	cfg := defaultConfig()
+	cfg.Auth = true
+	cfg.Username = "user"
+	cfg.PasswordFile = "testdata/password-with-trailing-newline.txt"
 	require.NoError(t, cfg.Validate())
 
+	cqlCfg := gocql.NewCluster()
 	err := cfg.setClusterConfig(cqlCfg)
 	require.NoError(t, err)
 	assert.NotNil(t, cqlCfg.Authenticator)
@@ -71,11 +67,47 @@ func TestConfig_setClusterConfig_authWithPasswordFile_withTrailingNewline(t *tes
 }
 
 func TestConfig_setClusterConfig_authWithPasswordAndPasswordFile(t *testing.T) {
-	cfg := Config{
-		Auth:         true,
-		Username:     "user",
-		Password:     flagext.Secret{Value: "pass"},
-		PasswordFile: "testdata/password-with-trailing-newline.txt",
-	}
+	cfg := defaultConfig()
+	cfg.Auth = true
+	cfg.Username = "user"
+	cfg.Password = flagext.Secret{Value: "pass"}
+	cfg.PasswordFile = "testdata/password-with-trailing-newline.txt"
 	assert.Error(t, cfg.Validate())
 }
+
+func TestConfig_setClusterConfig_consistency(t *testing.T) {
+	tests := map[string]struct {
+		cfg                 Config
+		expectedConsistency string
+	}{
+		"default config should set default consistency": {
+			cfg:                 defaultConfig(),
+			expectedConsistency: "QUORUM",
+		},
+		"should honor configured consistency": {
+			cfg: func() Config {
+				cfg := defaultConfig()
+				cfg.Consistency = "LOCAL_QUORUM"
+				return cfg
+			}(),
+			expectedConsistency: "LOCAL_QUORUM",
+		},
+	}
+
+	for testName, testData := range tests {
+		t.Run(testName, func(t *testing.T) {
+			require.NoError(t, testData.cfg.Validate())
+
+			cqlCfg := gocql.NewCluster()
+			err := testData.cfg.setClusterConfig(cqlCfg)
+			require.NoError(t, err)
+			assert.Equal(t, testData.expectedConsistency, cqlCfg.Consistency.String())
+		})
+	}
+}
+
+func defaultConfig() Config {
+	cfg := Config{}
+	flagext.DefaultValues(&cfg)
+	return cfg
+}

From 11f5407e6d710132219455b87371ab1b03d704e4 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 14 Sep 2020 19:02:44 +0530
Subject: [PATCH 585/660] add store method for getting fetcher for a chunk
 (#3164)

* add store method for getting fetcher for a chunk

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* add validation in schema to have from time in increasing other, accept a single timestamp for getting chunk fetcher

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* suggested change from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* Update pkg/chunk/composite_store.go

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 chunk_store.go          |  4 +++
 composite_store.go      | 23 +++++++++----
 composite_store_test.go | 74 ++++++++++++++++++++++++++++++++---------
 schema_config.go        | 15 ++++++---
 schema_config_test.go   | 44 ++++++++++++++++++++++++
 5 files changed, 134 insertions(+), 26 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 6fa8ea9de5e66..8746a21dcfa8c 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -714,3 +714,7 @@ func (c *store) DeleteSeriesIDs(ctx context.Context, from, through model.Time, u
 	// SeriesID is something which is only used in SeriesStore so we need not do anything here
 	return nil
 }
+
+func (c *baseStore) GetChunkFetcher(_ model.Time) *Fetcher {
+	return c.fetcher
+}
diff --git a/composite_store.go b/composite_store.go
index 46e055fcb1308..a3c5a22b20efa 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -32,6 +32,7 @@ type Store interface {
 	GetChunkRefs(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([][]Chunk, []*Fetcher, error)
 	LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error)
 	LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error)
+	GetChunkFetcher(tm model.Time) *Fetcher
 
 	// DeleteChunk deletes a chunks index entry and then deletes the actual chunk from chunk storage.
 	// It takes care of chunks which are deleting partially by creating and inserting a new chunk first and then deleting the original chunk
@@ -174,6 +175,22 @@ func (c compositeStore) GetChunkRefs(ctx context.Context, userID string, from, t
 	return chunkIDs, fetchers, err
 }
 
+func (c compositeStore) GetChunkFetcher(tm model.Time) *Fetcher {
+	// find the schema with the lowest start _after_ tm
+	j := sort.Search(len(c.stores), func(j int) bool {
+		return c.stores[j].start > tm
+	})
+
+	// reduce it by 1 because we want a schema with start <= tm
+	j--
+
+	if 0 <= j && j < len(c.stores) {
+		return c.stores[j].GetChunkFetcher(tm)
+	}
+
+	return nil
+}
+
 // DeleteSeriesIDs deletes series IDs from index in series store
 func (c CompositeStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
 	return c.forStores(ctx, userID, from, through, func(innerCtx context.Context, from, through model.Time, store Store) error {
@@ -233,12 +250,6 @@ func (c compositeStore) forStores(ctx context.Context, userID string, from, thro
 			nextSchemaStarts = c.stores[i+1].start
 		}
 
-		// If the next schema starts at the same time as this one,
-		// skip this one.
-		if nextSchemaStarts == c.stores[i].start {
-			continue
-		}
-
 		end := min(through, nextSchemaStarts-1)
 		err := callback(ctx, start, end, c.stores[i].Store)
 		if err != nil {
diff --git a/composite_store_test.go b/composite_store_test.go
index e1d7a95707a2e..b43e2e4df18d3 100644
--- a/composite_store_test.go
+++ b/composite_store_test.go
@@ -45,6 +45,10 @@ func (m mockStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time
 	return nil
 }
 
+func (m mockStore) GetChunkFetcher(tm model.Time) *Fetcher {
+	return nil
+}
+
 func (m mockStore) Stop() {}
 
 func TestCompositeStore(t *testing.T) {
@@ -124,22 +128,6 @@ func TestCompositeStore(t *testing.T) {
 			},
 		},
 
-		// Test we get only one result when two schema start at same time
-		{
-			compositeStore{
-				stores: []compositeStoreEntry{
-					{model.TimeFromUnix(0), mockStore(1)},
-					{model.TimeFromUnix(10), mockStore(2)},
-					{model.TimeFromUnix(10), mockStore(3)},
-				},
-			},
-			0, 165,
-			[]result{
-				{model.TimeFromUnix(0), model.TimeFromUnix(10) - 1, mockStore(1)},
-				{model.TimeFromUnix(10), model.TimeFromUnix(165), mockStore(3)},
-			},
-		},
-
 		// Test all the various combination we can get when there are three schemas
 		{
 			cs, 34, 65,
@@ -247,3 +235,57 @@ func TestCompositeStoreLabels(t *testing.T) {
 	}
 
 }
+
+type mockStoreGetChunkFetcher struct {
+	mockStore
+	chunkFetcher *Fetcher
+}
+
+func (m mockStoreGetChunkFetcher) GetChunkFetcher(tm model.Time) *Fetcher {
+	return m.chunkFetcher
+}
+
+func TestCompositeStore_GetChunkFetcher(t *testing.T) {
+	cs := compositeStore{
+		stores: []compositeStoreEntry{
+			{model.TimeFromUnix(10), mockStoreGetChunkFetcher{mockStore(0), &Fetcher{}}},
+			{model.TimeFromUnix(20), mockStoreGetChunkFetcher{mockStore(1), &Fetcher{}}},
+		},
+	}
+
+	for _, tc := range []struct {
+		name            string
+		tm              model.Time
+		expectedFetcher *Fetcher
+	}{
+		{
+			name: "no matching store",
+			tm:   model.TimeFromUnix(0),
+		},
+		{
+			name:            "first store",
+			tm:              model.TimeFromUnix(10),
+			expectedFetcher: cs.stores[0].Store.(mockStoreGetChunkFetcher).chunkFetcher,
+		},
+		{
+			name:            "still first store",
+			tm:              model.TimeFromUnix(11),
+			expectedFetcher: cs.stores[0].Store.(mockStoreGetChunkFetcher).chunkFetcher,
+		},
+		{
+			name:            "second store",
+			tm:              model.TimeFromUnix(20),
+			expectedFetcher: cs.stores[1].Store.(mockStoreGetChunkFetcher).chunkFetcher,
+		},
+		{
+			name:            "still second store",
+			tm:              model.TimeFromUnix(21),
+			expectedFetcher: cs.stores[1].Store.(mockStoreGetChunkFetcher).chunkFetcher,
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Same(t, tc.expectedFetcher, cs.GetChunkFetcher(tc.tm))
+		})
+	}
+
+}
diff --git a/schema_config.go b/schema_config.go
index 405dac5359e15..b5d5b20c45890 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -25,10 +25,11 @@ const (
 )
 
 var (
-	errInvalidSchemaVersion    = errors.New("invalid schema version")
-	errInvalidTablePeriod      = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
-	errConfigFileNotSet        = errors.New("schema config file needs to be set")
-	errConfigChunkPrefixNotSet = errors.New("schema config for chunks is missing the 'prefix' setting")
+	errInvalidSchemaVersion     = errors.New("invalid schema version")
+	errInvalidTablePeriod       = errors.New("the table period must be a multiple of 24h (1h for schema v1)")
+	errConfigFileNotSet         = errors.New("schema config file needs to be set")
+	errConfigChunkPrefixNotSet  = errors.New("schema config for chunks is missing the 'prefix' setting")
+	errSchemaIncreasingFromTime = errors.New("from time in schemas must be distinct and in increasing order")
 )
 
 // PeriodConfig defines the schema and tables to use for a period of time
@@ -120,6 +121,12 @@ func (cfg *SchemaConfig) Validate() error {
 		if err := periodCfg.validate(); err != nil {
 			return err
 		}
+
+		if i+1 < len(cfg.Configs) {
+			if cfg.Configs[i].From.Time.Unix() >= cfg.Configs[i+1].From.Time.Unix() {
+				return errSchemaIncreasingFromTime
+			}
+		}
 	}
 	return nil
 }
diff --git a/schema_config_test.go b/schema_config_test.go
index e625f7dbe4e75..814d3446321d0 100644
--- a/schema_config_test.go
+++ b/schema_config_test.go
@@ -553,6 +553,50 @@ func TestSchemaConfig_Validate(t *testing.T) {
 			},
 			err: errConfigChunkPrefixNotSet,
 		},
+		"invalid schema with same from time configs": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						From:   MustParseDayTime("1970-01-01"),
+						Schema: "v9",
+					},
+					{
+						From:   MustParseDayTime("1970-01-01"),
+						Schema: "v10",
+					},
+				},
+			},
+			err: errSchemaIncreasingFromTime,
+		},
+		"invalid schema with from time not in increasing order": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						From:   MustParseDayTime("1970-01-02"),
+						Schema: "v9",
+					},
+					{
+						From:   MustParseDayTime("1970-01-01"),
+						Schema: "v10",
+					},
+				},
+			},
+			err: errSchemaIncreasingFromTime,
+		},
+		"valid schema with different from time configs": {
+			config: &SchemaConfig{
+				Configs: []PeriodConfig{
+					{
+						From:   MustParseDayTime("1970-01-01"),
+						Schema: "v9",
+					},
+					{
+						From:   MustParseDayTime("1970-01-02"),
+						Schema: "v10",
+					},
+				},
+			},
+		},
 	}
 
 	for testName, testData := range tests {

From 9c55b2722ae5fd87969e0e7752d8a95ec23caf78 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Mon, 14 Sep 2020 07:00:38 -0700
Subject: [PATCH 586/660] Added support for Redis Cluster and Redis Sentinel
 (#2961)

* Added support for Redis Cluster and Redis Sentinel (#2959)

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make doc'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make lint'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated Changelog

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated Changelog

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated go.mod

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* removed deprecated flags in redis config

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated modules

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated modules

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* added warning when Redis sentinel returns unexpected master info

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* fixed 'make lint'

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated unit test

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* added master group name to Redis Sentinel config

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated config validation

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* use redis universal client

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated dependencies

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* remove obsolete interface

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* add Redis DB index selection

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* updated CHANGELOG

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* Fixed CHANGELOG

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/cache.go            |   2 +-
 cache/redis_cache.go      | 130 ++++--------------------------------
 cache/redis_cache_test.go |  61 ++++++-----------
 cache/redis_client.go     | 137 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 169 insertions(+), 161 deletions(-)
 create mode 100644 cache/redis_client.go

diff --git a/cache/cache.go b/cache/cache.go
index dbbc6b2e8c4fa..b6144dbf8feae 100644
--- a/cache/cache.go
+++ b/cache/cache.go
@@ -99,7 +99,7 @@ func New(cfg Config, reg prometheus.Registerer, logger log.Logger) (Cache, error
 			cfg.Redis.Expiration = cfg.DefaultValidity
 		}
 		cacheName := cfg.Prefix + "redis"
-		cache := NewRedisCache(cfg.Redis, cacheName, nil, logger)
+		cache := NewRedisCache(cacheName, NewRedisClient(&cfg.Redis), logger)
 		caches = append(caches, NewBackground(cacheName, cfg.Background, Instrument(cacheName, cache, reg), reg))
 	}
 
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 382290e30ba86..5887bd84eedf1 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -2,102 +2,37 @@ package cache
 
 import (
 	"context"
-	"flag"
-	"time"
 
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
-	"github.com/gomodule/redigo/redis"
 
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 // RedisCache type caches chunks in redis
 type RedisCache struct {
-	name       string
-	expiration int
-	timeout    time.Duration
-	pool       *redis.Pool
-	logger     log.Logger
-}
-
-// RedisConfig defines how a RedisCache should be constructed.
-type RedisConfig struct {
-	Endpoint             string         `yaml:"endpoint"`
-	Timeout              time.Duration  `yaml:"timeout"`
-	Expiration           time.Duration  `yaml:"expiration"`
-	MaxIdleConns         int            `yaml:"max_idle_conns"`
-	MaxActiveConns       int            `yaml:"max_active_conns"`
-	Password             flagext.Secret `yaml:"password"`
-	EnableTLS            bool           `yaml:"enable_tls"`
-	IdleTimeout          time.Duration  `yaml:"idle_timeout"`
-	WaitOnPoolExhaustion bool           `yaml:"wait_on_pool_exhaustion"`
-	MaxConnLifetime      time.Duration  `yaml:"max_conn_lifetime"`
-}
-
-// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
-func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
-	f.StringVar(&cfg.Endpoint, prefix+"redis.endpoint", "", description+"Redis service endpoint to use when caching chunks. If empty, no redis will be used.")
-	f.DurationVar(&cfg.Timeout, prefix+"redis.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on redis requests.")
-	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
-	f.IntVar(&cfg.MaxIdleConns, prefix+"redis.max-idle-conns", 80, description+"Maximum number of idle connections in pool.")
-	f.IntVar(&cfg.MaxActiveConns, prefix+"redis.max-active-conns", 0, description+"Maximum number of active connections in pool.")
-	f.Var(&cfg.Password, prefix+"redis.password", description+"Password to use when connecting to redis.")
-	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
-	f.DurationVar(&cfg.IdleTimeout, prefix+"redis.idle-timeout", 0, description+"Close connections after remaining idle for this duration. If the value is zero, then idle connections are not closed.")
-	f.BoolVar(&cfg.WaitOnPoolExhaustion, prefix+"redis.wait-on-pool-exhaustion", false, description+"Enables waiting if there are no idle connections. If the value is false and the pool is at the max_active_conns limit, the pool will return a connection with ErrPoolExhausted error and not wait for idle connections.")
-	f.DurationVar(&cfg.MaxConnLifetime, prefix+"redis.max-conn-lifetime", 0, description+"Close connections older than this duration. If the value is zero, then the pool does not close connections based on age.")
+	name   string
+	redis  *RedisClient
+	logger log.Logger
 }
 
 // NewRedisCache creates a new RedisCache
-func NewRedisCache(cfg RedisConfig, name string, pool *redis.Pool, logger log.Logger) *RedisCache {
+func NewRedisCache(name string, redisClient *RedisClient, logger log.Logger) *RedisCache {
 	util.WarnExperimentalUse("Redis cache")
-	// pool != nil only in unit tests
-	if pool == nil {
-		pool = &redis.Pool{
-			Dial: func() (redis.Conn, error) {
-				options := make([]redis.DialOption, 0, 2)
-				if cfg.EnableTLS {
-					options = append(options, redis.DialUseTLS(true))
-				}
-				if cfg.Password.Value != "" {
-					options = append(options, redis.DialPassword(cfg.Password.Value))
-				}
-
-				c, err := redis.Dial("tcp", cfg.Endpoint, options...)
-				if err != nil {
-					return nil, err
-				}
-				return c, err
-			},
-			MaxIdle:         cfg.MaxIdleConns,
-			MaxActive:       cfg.MaxActiveConns,
-			IdleTimeout:     cfg.IdleTimeout,
-			Wait:            cfg.WaitOnPoolExhaustion,
-			MaxConnLifetime: cfg.MaxConnLifetime,
-		}
-	}
-
 	cache := &RedisCache{
-		expiration: int(cfg.Expiration.Seconds()),
-		timeout:    cfg.Timeout,
-		name:       name,
-		pool:       pool,
-		logger:     logger,
+		name:   name,
+		redis:  redisClient,
+		logger: logger,
 	}
-
-	if err := cache.ping(context.Background()); err != nil {
-		level.Error(logger).Log("msg", "error connecting to redis", "endpoint", cfg.Endpoint, "err", err)
+	if err := cache.redis.Ping(context.Background()); err != nil {
+		level.Error(logger).Log("msg", "error connecting to redis", "name", name, "err", err)
 	}
-
 	return cache
 }
 
 // Fetch gets keys from the cache. The keys that are found must be in the order of the keys requested.
 func (c *RedisCache) Fetch(ctx context.Context, keys []string) (found []string, bufs [][]byte, missed []string) {
-	data, err := c.mget(ctx, keys)
-
+	data, err := c.redis.MGet(ctx, keys)
 	if err != nil {
 		level.Error(c.logger).Log("msg", "failed to get from redis", "name", c.name, "err", err)
 		missed = make([]string, len(keys))
@@ -117,7 +52,7 @@ func (c *RedisCache) Fetch(ctx context.Context, keys []string) (found []string,
 
 // Store stores the key in the cache.
 func (c *RedisCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
-	err := c.mset(ctx, keys, bufs, c.expiration)
+	err := c.redis.MSet(ctx, keys, bufs)
 	if err != nil {
 		level.Error(c.logger).Log("msg", "failed to put to redis", "name", c.name, "err", err)
 	}
@@ -125,46 +60,5 @@ func (c *RedisCache) Store(ctx context.Context, keys []string, bufs [][]byte) {
 
 // Stop stops the redis client.
 func (c *RedisCache) Stop() {
-	_ = c.pool.Close()
-}
-
-// mset adds key-value pairs to the cache.
-func (c *RedisCache) mset(_ context.Context, keys []string, bufs [][]byte, ttl int) error {
-	conn := c.pool.Get()
-	defer conn.Close()
-
-	if err := conn.Send("MULTI"); err != nil {
-		return err
-	}
-	for i := range keys {
-		if err := conn.Send("SETEX", keys[i], ttl, bufs[i]); err != nil {
-			return err
-		}
-	}
-	_, err := redis.DoWithTimeout(conn, c.timeout, "EXEC")
-	return err
-}
-
-// mget retrieves values from the cache.
-func (c *RedisCache) mget(_ context.Context, keys []string) ([][]byte, error) {
-	intf := make([]interface{}, len(keys))
-	for i, key := range keys {
-		intf[i] = key
-	}
-
-	conn := c.pool.Get()
-	defer conn.Close()
-
-	return redis.ByteSlices(redis.DoWithTimeout(conn, c.timeout, "MGET", intf...))
-}
-
-func (c *RedisCache) ping(_ context.Context) error {
-	conn := c.pool.Get()
-	defer conn.Close()
-
-	pong, err := redis.DoWithTimeout(conn, c.timeout, "PING")
-	if err == nil {
-		_, err = redis.String(pong, err)
-	}
-	return err
+	_ = c.redis.Close()
 }
diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
index 1511c11e96bb9..acccf96e067a7 100644
--- a/cache/redis_cache_test.go
+++ b/cache/redis_cache_test.go
@@ -1,28 +1,20 @@
-package cache_test
+package cache
 
 import (
 	"context"
 	"testing"
 	"time"
 
+	"github.com/alicebob/miniredis"
 	"github.com/go-kit/kit/log"
-	"github.com/gomodule/redigo/redis"
-	"github.com/rafaeljusto/redigomock"
+	"github.com/go-redis/redis/v8"
 	"github.com/stretchr/testify/require"
-
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 )
 
 func TestRedisCache(t *testing.T) {
-	cfg := cache.RedisConfig{
-		Timeout: 10 * time.Millisecond,
-	}
-
-	conn := redigomock.NewConn()
-	conn.Clear()
-	pool := &redis.Pool{Dial: func() (redis.Conn, error) {
-		return conn, nil
-	}, MaxIdle: 10}
+	c, err := mockRedisCache()
+	require.Nil(t, err)
+	defer c.redis.Close()
 
 	keys := []string{"key1", "key2", "key3"}
 	bufs := [][]byte{[]byte("data1"), []byte("data2"), []byte("data3")}
@@ -32,29 +24,8 @@ func TestRedisCache(t *testing.T) {
 	nHit := len(keys)
 	require.Len(t, bufs, nHit)
 
-	// mock Redis Store
-	mockRedisStore(conn, keys, bufs)
-
-	//mock cache hit
-	keyIntf := make([]interface{}, nHit)
-	bufIntf := make([]interface{}, nHit)
-
-	for i := 0; i < nHit; i++ {
-		keyIntf[i] = keys[i]
-		bufIntf[i] = bufs[i]
-	}
-	conn.Command("MGET", keyIntf...).Expect(bufIntf)
-
-	// mock cache miss
 	nMiss := len(miss)
-	missIntf := make([]interface{}, nMiss)
-	for i, s := range miss {
-		missIntf[i] = s
-	}
-	conn.Command("MGET", missIntf...).ExpectError(nil)
 
-	// mock the cache
-	c := cache.NewRedisCache(cfg, "mock", pool, log.NewNopLogger())
 	ctx := context.Background()
 
 	c.Store(ctx, keys, bufs)
@@ -79,12 +50,18 @@ func TestRedisCache(t *testing.T) {
 	}
 }
 
-func mockRedisStore(conn *redigomock.Conn, keys []string, bufs [][]byte) {
-	conn.Command("MULTI")
-	ret := []interface{}{}
-	for i := range keys {
-		conn.Command("SETEX", keys[i], 0, bufs[i])
-		ret = append(ret, "OK")
+func mockRedisCache() (*RedisCache, error) {
+	redisServer, err := miniredis.Run()
+	if err != nil {
+		return nil, err
+
+	}
+	redisClient := &RedisClient{
+		expiration: time.Minute,
+		timeout:    100 * time.Millisecond,
+		rdb: redis.NewUniversalClient(&redis.UniversalOptions{
+			Addrs: []string{redisServer.Addr()},
+		}),
 	}
-	conn.Command("EXEC").Expect(ret)
+	return NewRedisCache("mock", redisClient, log.NewNopLogger()), nil
 }
diff --git a/cache/redis_client.go b/cache/redis_client.go
new file mode 100644
index 0000000000000..df4ad5aadb3cd
--- /dev/null
+++ b/cache/redis_client.go
@@ -0,0 +1,137 @@
+package cache
+
+import (
+	"context"
+	"crypto/tls"
+	"flag"
+	"fmt"
+	"strings"
+	"time"
+	"unsafe"
+
+	"github.com/cortexproject/cortex/pkg/util/flagext"
+
+	"github.com/go-redis/redis/v8"
+)
+
+// RedisConfig defines how a RedisCache should be constructed.
+type RedisConfig struct {
+	Endpoint    string         `yaml:"endpoint"`
+	MasterName  string         `yaml:"master_name"`
+	Timeout     time.Duration  `yaml:"timeout"`
+	Expiration  time.Duration  `yaml:"expiration"`
+	DB          int            `yaml:"db"`
+	PoolSize    int            `yaml:"pool_size"`
+	Password    flagext.Secret `yaml:"password"`
+	EnableTLS   bool           `yaml:"enable_tls"`
+	IdleTimeout time.Duration  `yaml:"idle_timeout"`
+	MaxConnAge  time.Duration  `yaml:"max_connection_age"`
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
+	f.StringVar(&cfg.Endpoint, prefix+"redis.endpoint", "", description+"Redis Server endpoint to use for caching. A comma-separated list of endpoints for Redis Cluster or Redis Sentinel. If empty, no redis will be used.")
+	f.StringVar(&cfg.MasterName, prefix+"redis.master-name", "", description+"Redis Sentinel master name. An empty string for Redis Server or Redis Cluster.")
+	f.DurationVar(&cfg.Timeout, prefix+"redis.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on redis requests.")
+	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
+	f.IntVar(&cfg.DB, prefix+"redis.db", 0, description+"Database index.")
+	f.IntVar(&cfg.PoolSize, prefix+"redis.pool-size", 0, description+"Maximum number of connections in the pool.")
+	f.Var(&cfg.Password, prefix+"redis.password", description+"Password to use when connecting to redis.")
+	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
+	f.DurationVar(&cfg.IdleTimeout, prefix+"redis.idle-timeout", 0, description+"Close connections after remaining idle for this duration. If the value is zero, then idle connections are not closed.")
+	f.DurationVar(&cfg.MaxConnAge, prefix+"redis.max-connection-age", 0, description+"Close connections older than this duration. If the value is zero, then the pool does not close connections based on age.")
+}
+
+type RedisClient struct {
+	expiration time.Duration
+	timeout    time.Duration
+	rdb        redis.UniversalClient
+}
+
+// NewRedisClient creates Redis client
+func NewRedisClient(cfg *RedisConfig) *RedisClient {
+	opt := &redis.UniversalOptions{
+		Addrs:       strings.Split(cfg.Endpoint, ","),
+		MasterName:  cfg.MasterName,
+		Password:    cfg.Password.Value,
+		DB:          cfg.DB,
+		PoolSize:    cfg.PoolSize,
+		IdleTimeout: cfg.IdleTimeout,
+		MaxConnAge:  cfg.MaxConnAge,
+	}
+	if cfg.EnableTLS {
+		opt.TLSConfig = &tls.Config{}
+	}
+	return &RedisClient{
+		expiration: cfg.Expiration,
+		timeout:    cfg.Timeout,
+		rdb:        redis.NewUniversalClient(opt),
+	}
+}
+
+func (c *RedisClient) Ping(ctx context.Context) error {
+	var cancel context.CancelFunc
+	if c.timeout > 0 {
+		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		defer cancel()
+	}
+
+	pong, err := c.rdb.Ping(ctx).Result()
+	if err != nil {
+		return err
+	}
+	if pong != "PONG" {
+		return fmt.Errorf("redis: Unexpected PING response %q", pong)
+	}
+	return nil
+}
+
+func (c *RedisClient) MSet(ctx context.Context, keys []string, values [][]byte) error {
+	var cancel context.CancelFunc
+	if c.timeout > 0 {
+		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		defer cancel()
+	}
+
+	pipe := c.rdb.TxPipeline()
+	for i := range keys {
+		pipe.Set(ctx, keys[i], values[i], c.expiration)
+	}
+	_, err := pipe.Exec(ctx)
+	return err
+}
+
+func (c *RedisClient) MGet(ctx context.Context, keys []string) ([][]byte, error) {
+	var cancel context.CancelFunc
+	if c.timeout > 0 {
+		ctx, cancel = context.WithTimeout(ctx, c.timeout)
+		defer cancel()
+	}
+
+	cmd := c.rdb.MGet(ctx, keys...)
+	if err := cmd.Err(); err != nil {
+		return nil, err
+	}
+
+	ret := make([][]byte, len(keys))
+	for i, val := range cmd.Val() {
+		if val != nil {
+			ret[i] = StringToBytes(val.(string))
+		}
+	}
+	return ret, nil
+}
+
+func (c *RedisClient) Close() error {
+	return c.rdb.Close()
+}
+
+// StringToBytes converts string to byte slice. (copied from vendor/github.com/go-redis/redis/v8/internal/util/unsafe.go)
+func StringToBytes(s string) []byte {
+	return *(*[]byte)(unsafe.Pointer(
+		&struct {
+			string
+			Cap int
+		}{s, len(s)},
+	))
+}

From 1e06f93e303d42a8dc13a10395210f5a203d2250 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 15 Sep 2020 12:22:29 +0200
Subject: [PATCH 587/660] Move Cassandra and Blocks storage to GA (#3180)

* Move Cassandra and Blocks storage to GA

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Updated architecture doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed integration tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cassandra/storage_client.go | 4 ----
 storage/factory.go          | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 26129af855dd9..a509c5bfb6e44 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -223,8 +223,6 @@ type StorageClient struct {
 
 // NewStorageClient returns a new StorageClient.
 func NewStorageClient(cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (*StorageClient, error) {
-	pkgutil.WarnExperimentalUse("Cassandra Backend")
-
 	readSession, err := cfg.session("index-read", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
@@ -408,8 +406,6 @@ type ObjectClient struct {
 
 // NewObjectClient returns a new ObjectClient.
 func NewObjectClient(cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (*ObjectClient, error) {
-	pkgutil.WarnExperimentalUse("Cassandra Backend")
-
 	readSession, err := cfg.session("chunks-read", registerer)
 	if err != nil {
 		return nil, errors.WithStack(err)
diff --git a/storage/factory.go b/storage/factory.go
index 32d51ad1fc47f..c0a8fb4384852 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -92,7 +92,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.Swift.RegisterFlags(f)
 	cfg.GrpcConfig.RegisterFlags(f)
 
-	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or blocks. Be aware that blocks storage is experimental and shouldn't be used in production.")
+	f.StringVar(&cfg.Engine, "store.engine", "chunks", "The storage engine to use: chunks or blocks.")
 	cfg.IndexQueriesCacheConfig.RegisterFlagsWithPrefix("store.index-cache-read.", "Cache config for index entry reading. ", f)
 	f.DurationVar(&cfg.IndexCacheValidity, "store.index-cache-validity", 5*time.Minute, "Cache validity for active index entries. Should be no higher than -ingester.max-chunk-idle.")
 }

From 021b3c8121821d6c1bd4cc3120550230e7fbcfdb Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Tue, 15 Sep 2020 14:11:28 +0200
Subject: [PATCH 588/660] Fail the config validation if an empty node is set at
 root level (#3080)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 encoding/factory.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/encoding/factory.go b/encoding/factory.go
index e69a51a2229f8..adf3a0b18240e 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -22,7 +22,7 @@ var (
 // RegisterFlags registers configuration settings.
 func (Config) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&DefaultEncoding, "ingester.chunk-encoding", "Encoding version to use for chunks.")
-	flag.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
+	f.IntVar(&bigchunkSizeCapBytes, "store.bigchunk-size-cap-bytes", bigchunkSizeCapBytes, "When using bigchunk encoding, start a new bigchunk if over this size (0 = unlimited)")
 }
 
 // Validate errors out if the encoding is set to Delta.

From 8cbf91f334ad06c41a01441cd71167213a83fb44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Tue, 15 Sep 2020 14:53:20 +0200
Subject: [PATCH 589/660] Release the lock when loading of cache gen number
 fails. (#3182)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Release the lock when loading of cache gen number fails.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Release the lock when loading of cache gen number fails.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed linter

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 purger/tombstones.go      | 11 +++++++++--
 purger/tombstones_test.go | 25 +++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/purger/tombstones.go b/purger/tombstones.go
index 044ef313035d6..1f1ad1b5bec9f 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -47,6 +47,12 @@ type TombstonesSet struct {
 	oldestTombstoneStart, newestTombstoneEnd model.Time // Used as optimization to find whether we want to iterate over tombstones or not
 }
 
+// Used for easier injection of mocks.
+type DeleteStoreAPI interface {
+	getCacheGenerationNumbers(ctx context.Context, user string) (*cacheGenNumbers, error)
+	GetPendingDeleteRequestsForUser(ctx context.Context, id string) ([]DeleteRequest, error)
+}
+
 // TombstonesLoader loads delete requests and gen numbers from store and keeps checking for updates.
 // It keeps checking for changes in gen numbers, which also means changes in delete requests and reloads specific users delete requests.
 type TombstonesLoader struct {
@@ -56,13 +62,13 @@ type TombstonesLoader struct {
 	cacheGenNumbers    map[string]*cacheGenNumbers
 	cacheGenNumbersMtx sync.RWMutex
 
-	deleteStore *DeleteStore
+	deleteStore DeleteStoreAPI
 	metrics     *tombstonesLoaderMetrics
 	quit        chan struct{}
 }
 
 // NewTombstonesLoader creates a TombstonesLoader
-func NewTombstonesLoader(deleteStore *DeleteStore, registerer prometheus.Registerer) *TombstonesLoader {
+func NewTombstonesLoader(deleteStore DeleteStoreAPI, registerer prometheus.Registerer) *TombstonesLoader {
 	tl := TombstonesLoader{
 		tombstones:      map[string]*TombstonesSet{},
 		cacheGenNumbers: map[string]*cacheGenNumbers{},
@@ -106,6 +112,7 @@ func (tl *TombstonesLoader) reloadTombstones() error {
 	for userID, oldGenNumbers := range tl.cacheGenNumbers {
 		newGenNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
 		if err != nil {
+			tl.cacheGenNumbersMtx.RUnlock()
 			return err
 		}
 
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
index d0e302ecb9500..5f17d75dafd9f 100644
--- a/purger/tombstones_test.go
+++ b/purger/tombstones_test.go
@@ -2,6 +2,7 @@ package purger
 
 import (
 	"context"
+	"errors"
 	"testing"
 	"time"
 
@@ -131,3 +132,27 @@ func TestTombstonesLoader(t *testing.T) {
 		})
 	}
 }
+
+func TestTombstonesReloadDoesntDeadlockOnFailure(t *testing.T) {
+	s := &store{}
+	tombstonesLoader := NewTombstonesLoader(s, nil)
+	tombstonesLoader.getCacheGenNumbers("test")
+
+	s.err = errors.New("error")
+	require.NotNil(t, tombstonesLoader.reloadTombstones())
+
+	s.err = nil
+	require.NotNil(t, tombstonesLoader.getCacheGenNumbers("test2"))
+}
+
+type store struct {
+	err error
+}
+
+func (f *store) getCacheGenerationNumbers(ctx context.Context, user string) (*cacheGenNumbers, error) {
+	return &cacheGenNumbers{}, f.err
+}
+
+func (f *store) GetPendingDeleteRequestsForUser(ctx context.Context, id string) ([]DeleteRequest, error) {
+	return nil, nil
+}

From 7afdc015883fbf56210c27c6eec71b73373650ad Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Wed, 16 Sep 2020 10:06:47 +0100
Subject: [PATCH 590/660] Turn memcached circuit-breaker on by default (#3189)

* Turn memcached circuit-breaker on by default

Will trip after 10 failures within 10 seconds

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Change circuit-breaker log fields to avoid clash

The names 'from' and 'to' are used elsewhere as dates, so avoid re-using them here as strings

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Update CHANGELOG

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Updated doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/memcached_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 231ab18f7bb20..ddb089720492a 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -83,7 +83,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
-	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
+	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
 	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
 	f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
 }
@@ -147,7 +147,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 }
 
 func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
-	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to)
+	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from-state", from, "to-state", to)
 }
 
 func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {

From f2c65c89badb716b130b4baa70222ff4250ce4dd Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 16 Sep 2020 11:52:29 +0200
Subject: [PATCH 591/660] Turn memcached circuit-breaker on by default (#3189)
 (#3190)

* Turn memcached circuit-breaker on by default

Will trip after 10 failures within 10 seconds

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Change circuit-breaker log fields to avoid clash

The names 'from' and 'to' are used elsewhere as dates, so avoid re-using them here as strings

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Update CHANGELOG

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

* Updated doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Bryan Boreham <bryan@weave.works>
---
 cache/memcached_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index 231ab18f7bb20..ddb089720492a 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -83,7 +83,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.DurationVar(&cfg.Timeout, prefix+"memcached.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on memcached requests.")
 	f.DurationVar(&cfg.UpdateInterval, prefix+"memcached.update-interval", 1*time.Minute, description+"Period with which to poll DNS for memcache servers.")
 	f.BoolVar(&cfg.ConsistentHash, prefix+"memcached.consistent-hash", true, description+"Use consistent hashing to distribute to memcache servers.")
-	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 0, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
+	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
 	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
 	f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
 }
@@ -147,7 +147,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 }
 
 func (c *memcachedClient) circuitBreakerStateChange(name string, from gobreaker.State, to gobreaker.State) {
-	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from", from, "to", to)
+	level.Info(c.logger).Log("msg", "circuit-breaker state change", "name", name, "from-state", from, "to-state", to)
 }
 
 func (c *memcachedClient) dialViaCircuitBreaker(network, address string, timeout time.Duration) (net.Conn, error) {

From ce72a2aae16fad5907be9ed08b2bf5d2c47c2315 Mon Sep 17 00:00:00 2001
From: Siavash Safi <siavash.safi@gmail.com>
Date: Tue, 22 Sep 2020 11:55:55 +0200
Subject: [PATCH 592/660] Improve DynamoDB BatchWrite error messages (#3215)

DynamoDB can be used for storing either or both chunks and indexes.

This change improves the error messages in `BatchWrite()` method by using
`items` instead of `chunk` and also adds `DynamoDB` to the error message.

The previous error message was misleading as a failed index write can produce
an error message about chunks:
```
caller=flush.go:199 org_id=fake msg="failed to flush user"
err="failed to write chunk, 1 values remaining: context deadline exceeded"
```

Signed-off-by: Siavash Safi <siavash.safi@gmail.com>
---
 aws/dynamodb_storage_client.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index eb23c8c66162e..345a4feed1edf 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -204,7 +204,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this write will never work, so the only option is to drop the offending items and continue.
-				level.Warn(util.Logger).Log("msg", "Data lost while flushing to Dynamo", "err", awsErr)
+				level.Warn(util.Logger).Log("msg", "Data lost while flushing to DynamoDB", "err", awsErr)
 				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
 				util.Event().Log("msg", "ValidationException", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
@@ -231,7 +231,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 	}
 
 	if valuesLeft := outstanding.Len() + unprocessed.Len(); valuesLeft > 0 {
-		return fmt.Errorf("failed to write chunk, %d values remaining: %s", valuesLeft, backoff.Err())
+		return fmt.Errorf("failed to write items to DynamoDB, %d values remaining: %s", valuesLeft, backoff.Err())
 	}
 	return backoff.Err()
 }

From 6f1287f11d7c4dd8d9ba5ebb8161423592a4b092 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 23 Sep 2020 12:09:10 +0200
Subject: [PATCH 593/660] Clarify that schema config is used only by the chunks
 storage (#3224)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 schema_config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/schema_config.go b/schema_config.go
index b5d5b20c45890..b6e48f4bd585b 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -82,7 +82,7 @@ type SchemaConfig struct {
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
-	f.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file.")
+	f.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file. The schema config is used only when running Cortex with the chunks storage.")
 	// TODO(gouthamve): Add a metric for this.
 	f.StringVar(&cfg.legacyFileName, "config-yaml", "", "DEPRECATED(use -schema-config-file) The path to the schema config file.")
 }

From 463ddb05afef7aaad42283bc165ea3610310b0c0 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 23 Sep 2020 15:05:14 +0200
Subject: [PATCH 594/660] Removed the deprecated flag -config-yaml (#3225)

* Removed the deprecated flag -config-yaml

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 schema_config.go | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/schema_config.go b/schema_config.go
index b6e48f4bd585b..9a37bb0c61e86 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -14,7 +14,6 @@ import (
 	yaml "gopkg.in/yaml.v2"
 
 	"github.com/cortexproject/cortex/pkg/util"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
 const (
@@ -76,28 +75,16 @@ func (d *DayTime) String() string {
 type SchemaConfig struct {
 	Configs []PeriodConfig `yaml:"configs"`
 
-	fileName       string
-	legacyFileName string
+	fileName string
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
 func (cfg *SchemaConfig) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.fileName, "schema-config-file", "", "The path to the schema config file. The schema config is used only when running Cortex with the chunks storage.")
-	// TODO(gouthamve): Add a metric for this.
-	f.StringVar(&cfg.legacyFileName, "config-yaml", "", "DEPRECATED(use -schema-config-file) The path to the schema config file.")
 }
 
 // loadFromFile loads the schema config from a yaml file
 func (cfg *SchemaConfig) loadFromFile() error {
-	if cfg.fileName == "" {
-		cfg.fileName = cfg.legacyFileName
-
-		if cfg.legacyFileName != "" {
-			flagext.DeprecatedFlagsUsed.Inc()
-			level.Warn(util.Logger).Log("msg", "running with DEPRECATED flag -config-yaml, use -schema-config-file instead")
-		}
-	}
-
 	if cfg.fileName == "" {
 		return errConfigFileNotSet
 	}

From 1b64d178c646ce5f6506d1fff0031ef8c1af8f94 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 24 Sep 2020 20:16:10 +0530
Subject: [PATCH 595/660] add a QueryWithCursor method for boltdb to help doing
 multiple queries in a single transaction (#3232)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go | 60 +++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 28 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 5ac25651ea8f0..6d76abf9e3d09 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -239,6 +239,17 @@ func (b *BoltIndexClient) query(ctx context.Context, query chunk.IndexQuery, cal
 }
 
 func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	return db.View(func(tx *bbolt.Tx) error {
+		bucket := tx.Bucket(bucketName)
+		if bucket == nil {
+			return nil
+		}
+
+		return b.QueryWithCursor(ctx, bucket.Cursor(), query, callback)
+	})
+}
+
+func (b *BoltIndexClient) QueryWithCursor(_ context.Context, c *bbolt.Cursor, query chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
 	var start []byte
 	if len(query.RangeValuePrefix) > 0 {
 		start = []byte(query.HashValue + separator + string(query.RangeValuePrefix))
@@ -250,42 +261,35 @@ func (b *BoltIndexClient) QueryDB(ctx context.Context, db *bbolt.DB, query chunk
 
 	rowPrefix := []byte(query.HashValue + separator)
 
-	return db.View(func(tx *bbolt.Tx) error {
-		b := tx.Bucket(bucketName)
-		if b == nil {
-			return nil
-		}
+	var batch boltReadBatch
 
-		var batch boltReadBatch
-		c := b.Cursor()
-		for k, v := c.Seek(start); k != nil; k, v = c.Next() {
-			if len(query.ValueEqual) > 0 && !bytes.Equal(v, query.ValueEqual) {
-				continue
-			}
+	for k, v := c.Seek(start); k != nil; k, v = c.Next() {
+		if len(query.ValueEqual) > 0 && !bytes.Equal(v, query.ValueEqual) {
+			continue
+		}
 
-			if len(query.RangeValuePrefix) > 0 && !bytes.HasPrefix(k, start) {
-				break
-			}
+		if len(query.RangeValuePrefix) > 0 && !bytes.HasPrefix(k, start) {
+			break
+		}
 
-			if !bytes.HasPrefix(k, rowPrefix) {
-				break
-			}
+		if !bytes.HasPrefix(k, rowPrefix) {
+			break
+		}
 
-			// make a copy since k, v are only valid for the life of the transaction.
-			// See: https://godoc.org/github.com/boltdb/bolt#Cursor.Seek
-			batch.rangeValue = make([]byte, len(k)-len(rowPrefix))
-			copy(batch.rangeValue, k[len(rowPrefix):])
+		// make a copy since k, v are only valid for the life of the transaction.
+		// See: https://godoc.org/github.com/boltdb/bolt#Cursor.Seek
+		batch.rangeValue = make([]byte, len(k)-len(rowPrefix))
+		copy(batch.rangeValue, k[len(rowPrefix):])
 
-			batch.value = make([]byte, len(v))
-			copy(batch.value, v)
+		batch.value = make([]byte, len(v))
+		copy(batch.value, v)
 
-			if !callback(query, &batch) {
-				break
-			}
+		if !callback(query, &batch) {
+			break
 		}
+	}
 
-		return nil
-	})
+	return nil
 }
 
 type TableWrites struct {

From 7290547b252a43ae6bfcbc5d8aef4317f40a720e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Fri, 25 Sep 2020 17:34:14 +0200
Subject: [PATCH 596/660] ObjectClient List refactoring (#3238)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Make delimeter parameter to List method.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make FSObjectClient support recursive List and delimiter parameter.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix errors and lint issues.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Allow listing non-existant prefix, or single file.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed formatting error. :facepalm:

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added comment to List method.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 aws/fixtures.go                  |   5 +-
 aws/s3_storage_client.go         |  14 ++---
 aws/s3_storage_client_test.go    |   2 +-
 azure/blob_storage_client.go     |  16 ++---
 gcp/fixtures.go                  |   4 +-
 gcp/gcs_object_client.go         |  30 ++++-----
 inmemory_storage_client.go       |   7 +--
 local/fixtures.go                |   4 +-
 local/fs_object_client.go        |  87 +++++++++++++++-----------
 local/fs_object_client_test.go   | 101 ++++++++++++++++++++++---------
 openstack/swift_object_client.go |  42 ++++++-------
 purger/purger_test.go            |   2 +-
 storage/factory.go               |  16 ++---
 storage_client.go                |  18 ++++--
 14 files changed, 190 insertions(+), 158 deletions(-)

diff --git a/aws/fixtures.go b/aws/fixtures.go
index 6c8d0ec054fd2..acc05dc0adbc8 100644
--- a/aws/fixtures.go
+++ b/aws/fixtures.go
@@ -44,10 +44,7 @@ var Fixtures = []testutils.Fixture{
 				schemaCfg:               schemaConfig,
 				metrics:                 newMetrics(nil),
 			}
-			object := objectclient.NewClient(&S3ObjectClient{
-				S3:        newMockS3(),
-				delimiter: chunk.DirDelim,
-			}, nil)
+			object := objectclient.NewClient(&S3ObjectClient{S3: newMockS3()}, nil)
 			return index, object, table, schemaConfig, testutils.CloserFunc(func() error {
 				table.Stop()
 				index.Stop()
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index eb35948f8abb6..3ff64618e2ab8 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -94,12 +94,11 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 type S3ObjectClient struct {
 	bucketNames   []string
 	S3            s3iface.S3API
-	delimiter     string
 	sseEncryption *string
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
-func NewS3ObjectClient(cfg S3Config, delimiter string) (*S3ObjectClient, error) {
+func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	s3Config, bucketNames, err := buildS3Config(cfg)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed to build s3 config")
@@ -120,7 +119,6 @@ func NewS3ObjectClient(cfg S3Config, delimiter string) (*S3ObjectClient, error)
 	client := S3ObjectClient{
 		S3:            s3Client,
 		bucketNames:   bucketNames,
-		delimiter:     delimiter,
 		sseEncryption: sseEncryption,
 	}
 	return &client, nil
@@ -289,8 +287,8 @@ func (a *S3ObjectClient) PutObject(ctx context.Context, objectKey string, object
 	})
 }
 
-// List objects and common-prefixes i.e synthetic directories from the store non-recursively
-func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+// List implements chunk.ObjectClient.
+func (a *S3ObjectClient) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
 	var commonPrefixes []chunk.StorageCommonPrefix
 
@@ -299,7 +297,7 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 			input := s3.ListObjectsV2Input{
 				Bucket:    aws.String(a.bucketNames[i]),
 				Prefix:    aws.String(prefix),
-				Delimiter: aws.String(a.delimiter),
+				Delimiter: aws.String(delimiter),
 			}
 
 			for {
@@ -337,7 +335,3 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix string) ([]chunk.Stora
 
 	return storageObjects, commonPrefixes, nil
 }
-
-func (a *S3ObjectClient) PathSeparator() string {
-	return a.delimiter
-}
diff --git a/aws/s3_storage_client_test.go b/aws/s3_storage_client_test.go
index e5bfd5a9a4a99..7e7fd89e506d3 100644
--- a/aws/s3_storage_client_test.go
+++ b/aws/s3_storage_client_test.go
@@ -59,7 +59,7 @@ func TestRequestMiddleware(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			cfg.Inject = tt.fn
-			client, err := NewS3ObjectClient(cfg, "/")
+			client, err := NewS3ObjectClient(cfg)
 			require.NoError(t, err)
 
 			readCloser, err := client.GetObject(context.Background(), "key")
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index ba45008e80c12..08fecb81e5e76 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -88,15 +88,13 @@ type BlobStorage struct {
 	//blobService storage.Serv
 	cfg          *BlobStorageConfig
 	containerURL azblob.ContainerURL
-	delimiter    string
 }
 
 // NewBlobStorage creates a new instance of the BlobStorage struct.
-func NewBlobStorage(cfg *BlobStorageConfig, delimiter string) (*BlobStorage, error) {
+func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
 	util.WarnExperimentalUse("Azure Blob Storage")
 	blobStorage := &BlobStorage{
-		cfg:       cfg,
-		delimiter: delimiter,
+		cfg: cfg,
 	}
 
 	var err error
@@ -196,8 +194,8 @@ func (b *BlobStorage) newPipeline() (pipeline.Pipeline, error) {
 	}), nil
 }
 
-// List objects and common-prefixes i.e synthetic directories from the store non-recursively
-func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+// List implements chunk.ObjectClient.
+func (b *BlobStorage) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
 	var commonPrefixes []chunk.StorageCommonPrefix
 
@@ -206,7 +204,7 @@ func (b *BlobStorage) List(ctx context.Context, prefix string) ([]chunk.StorageO
 			return nil, nil, ctx.Err()
 		}
 
-		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, b.delimiter, azblob.ListBlobsSegmentOptions{Prefix: prefix})
+		listBlob, err := b.containerURL.ListBlobsHierarchySegment(ctx, marker, delimiter, azblob.ListBlobsSegmentOptions{Prefix: prefix})
 		if err != nil {
 			return nil, nil, err
 		}
@@ -240,10 +238,6 @@ func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error {
 	return err
 }
 
-func (b *BlobStorage) PathSeparator() string {
-	return b.delimiter
-}
-
 // Validate the config.
 func (c *BlobStorageConfig) Validate() error {
 	if !util.StringsContain(supportedEnvironments, c.Environment) {
diff --git a/gcp/fixtures.go b/gcp/fixtures.go
index f87ca59b0ca3b..ff9142f972574 100644
--- a/gcp/fixtures.go
+++ b/gcp/fixtures.go
@@ -78,9 +78,7 @@ func (f *fixture) Clients() (
 	}
 
 	if f.gcsObjectClient {
-		cClient = objectclient.NewClient(newGCSObjectClient(GCSConfig{
-			BucketName: "chunks",
-		}, f.gcssrv.Client(), chunk.DirDelim), nil)
+		cClient = objectclient.NewClient(newGCSObjectClient(GCSConfig{BucketName: "chunks"}, f.gcssrv.Client()), nil)
 	} else {
 		cClient = newBigtableObjectClient(Config{}, schemaConfig, client)
 	}
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index fd35d0fa743e7..28574373f8a38 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -13,10 +13,9 @@ import (
 )
 
 type GCSObjectClient struct {
-	cfg       GCSConfig
-	client    *storage.Client
-	bucket    *storage.BucketHandle
-	delimiter string
+	cfg    GCSConfig
+	client *storage.Client
+	bucket *storage.BucketHandle
 }
 
 // GCSConfig is config for the GCS Chunk Client.
@@ -39,7 +38,7 @@ func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 }
 
 // NewGCSObjectClient makes a new chunk.Client that writes chunks to GCS.
-func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, delimiter string) (*GCSObjectClient, error) {
+func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, error) {
 	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
 		return nil, err
@@ -49,16 +48,15 @@ func NewGCSObjectClient(ctx context.Context, cfg GCSConfig, delimiter string) (*
 	if err != nil {
 		return nil, err
 	}
-	return newGCSObjectClient(cfg, client, delimiter), nil
+	return newGCSObjectClient(cfg, client), nil
 }
 
-func newGCSObjectClient(cfg GCSConfig, client *storage.Client, delimiter string) *GCSObjectClient {
+func newGCSObjectClient(cfg GCSConfig, client *storage.Client) *GCSObjectClient {
 	bucket := client.Bucket(cfg.BucketName)
 	return &GCSObjectClient{
-		cfg:       cfg,
-		client:    client,
-		bucket:    bucket,
-		delimiter: delimiter,
+		cfg:    cfg,
+		client: client,
+		bucket: bucket,
 	}
 }
 
@@ -107,12 +105,12 @@ func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, objec
 	return nil
 }
 
-// List objects and common-prefixes i.e synthetic directories from the store non-recursively
-func (s *GCSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+// List implements chunk.ObjectClient.
+func (s *GCSObjectClient) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
 	var commonPrefixes []chunk.StorageCommonPrefix
 
-	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: s.delimiter})
+	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: delimiter})
 	for {
 		if ctx.Err() != nil {
 			return nil, nil, ctx.Err()
@@ -155,7 +153,3 @@ func (s *GCSObjectClient) DeleteObject(ctx context.Context, objectKey string) er
 
 	return nil
 }
-
-func (s *GCSObjectClient) PathSeparator() string {
-	return s.delimiter
-}
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 8ec2397e8bc5f..2ac50db1cfa71 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -425,7 +425,8 @@ func (m *MockStorage) DeleteObject(ctx context.Context, objectKey string) error
 	return nil
 }
 
-func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject, []StorageCommonPrefix, error) {
+// List implements chunk.ObjectClient.
+func (m *MockStorage) List(ctx context.Context, prefix, delimiter string) ([]StorageObject, []StorageCommonPrefix, error) {
 	m.mtx.RLock()
 	defer m.mtx.RUnlock()
 
@@ -442,10 +443,6 @@ func (m *MockStorage) List(ctx context.Context, prefix string) ([]StorageObject,
 	return storageObjects, []StorageCommonPrefix{}, nil
 }
 
-func (m *MockStorage) PathSeparator() string {
-	return DirDelim
-}
-
 type mockWriteBatch struct {
 	inserts []struct {
 		tableName, hashValue string
diff --git a/local/fixtures.go b/local/fixtures.go
index 1e101a281a042..5e0bc9f9c7b55 100644
--- a/local/fixtures.go
+++ b/local/fixtures.go
@@ -38,9 +38,7 @@ func (f *fixture) Clients() (
 		return
 	}
 
-	oClient, err := NewFSObjectClient(FSConfig{
-		Directory: f.dirname,
-	})
+	oClient, err := NewFSObjectClient(FSConfig{Directory: f.dirname})
 	if err != nil {
 		return
 	}
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index 340a1b8016d7b..f64776943481c 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -3,8 +3,8 @@ package local
 import (
 	"context"
 	"flag"
+	"fmt"
 	"io"
-	"io/ioutil"
 	"os"
 	"path/filepath"
 	"time"
@@ -58,8 +58,8 @@ func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) {
 func (FSObjectClient) Stop() {}
 
 // GetObject from the store
-func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
-	fl, err := os.Open(filepath.Join(f.cfg.Directory, objectKey))
+func (f *FSObjectClient) GetObject(_ context.Context, objectKey string) (io.ReadCloser, error) {
+	fl, err := os.Open(filepath.Join(f.cfg.Directory, filepath.FromSlash(objectKey)))
 	if err != nil && os.IsNotExist(err) {
 		return nil, chunk.ErrStorageObjectNotFound
 	}
@@ -68,8 +68,8 @@ func (f *FSObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 }
 
 // PutObject into the store
-func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
-	fullPath := filepath.Join(f.cfg.Directory, objectKey)
+func (f *FSObjectClient) PutObject(_ context.Context, objectKey string, object io.ReadSeeker) error {
+	fullPath := filepath.Join(f.cfg.Directory, filepath.FromSlash(objectKey))
 	err := util.EnsureDirectory(filepath.Dir(fullPath))
 	if err != nil {
 		return err
@@ -95,53 +95,74 @@ func (f *FSObjectClient) PutObject(ctx context.Context, objectKey string, object
 	return fl.Close()
 }
 
-// List objects and common-prefixes i.e directories from the store non-recursively
-func (f *FSObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
-	var storageObjects []chunk.StorageObject
-	var commonPrefixes []chunk.StorageCommonPrefix
+// List implements chunk.ObjectClient.
+// FSObjectClient assumes that prefix is a directory, and only supports "" and "/" delimiters.
+func (f *FSObjectClient) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+	if delimiter != "" && delimiter != "/" {
+		return nil, nil, fmt.Errorf("unsupported delimiter: %q", delimiter)
+	}
 
-	folderPath := filepath.Join(f.cfg.Directory, prefix)
+	folderPath := filepath.Join(f.cfg.Directory, filepath.FromSlash(prefix))
 
-	_, err := os.Stat(folderPath)
+	info, err := os.Stat(folderPath)
 	if err != nil {
 		if os.IsNotExist(err) {
-			return storageObjects, commonPrefixes, nil
+			return nil, nil, nil
 		}
 		return nil, nil, err
 	}
-
-	filesInfo, err := ioutil.ReadDir(folderPath)
-	if err != nil {
-		return nil, nil, err
+	if !info.IsDir() {
+		// When listing single file, return this file only.
+		return []chunk.StorageObject{{Key: info.Name(), ModifiedAt: info.ModTime()}}, nil, nil
 	}
 
-	for _, fileInfo := range filesInfo {
-		nameWithPrefix := filepath.Join(prefix, fileInfo.Name())
+	var storageObjects []chunk.StorageObject
+	var commonPrefixes []chunk.StorageCommonPrefix
+
+	err = filepath.Walk(folderPath, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Ignore starting folder itself.
+		if path == folderPath {
+			return nil
+		}
+
+		relPath, err := filepath.Rel(f.cfg.Directory, path)
+		if err != nil {
+			return err
+		}
+
+		relPath = filepath.ToSlash(relPath)
+
+		if info.IsDir() {
+			if delimiter == "" {
+				// Go into directory
+				return nil
+			}
 
-		if fileInfo.IsDir() {
-			empty, err := isDirEmpty(filepath.Join(folderPath, fileInfo.Name()))
+			empty, err := isDirEmpty(path)
 			if err != nil {
-				return nil, nil, err
+				return err
 			}
 
-			// add the directory only if it is not empty
 			if !empty {
-				commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(nameWithPrefix+f.pathSeparator))
+				commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(relPath+delimiter))
 			}
-			continue
+			return filepath.SkipDir
 		}
-		storageObjects = append(storageObjects, chunk.StorageObject{
-			Key:        nameWithPrefix,
-			ModifiedAt: fileInfo.ModTime(),
-		})
-	}
 
-	return storageObjects, commonPrefixes, nil
+		storageObjects = append(storageObjects, chunk.StorageObject{Key: relPath, ModifiedAt: info.ModTime()})
+		return nil
+	})
+
+	return storageObjects, commonPrefixes, err
 }
 
 func (f *FSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
 	// inspired from https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/objstore/filesystem/filesystem.go#L195
-	file := filepath.Join(f.cfg.Directory, objectKey)
+	file := filepath.Join(f.cfg.Directory, filepath.FromSlash(objectKey))
 
 	for file != f.cfg.Directory {
 		if err := os.Remove(file); err != nil {
@@ -175,10 +196,6 @@ func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) e
 	})
 }
 
-func (f *FSObjectClient) PathSeparator() string {
-	return f.pathSeparator
-}
-
 // copied from https://github.com/thanos-io/thanos/blob/55cb8ca38b3539381dc6a781e637df15c694e50a/pkg/objstore/filesystem/filesystem.go#L181
 func isDirEmpty(name string) (ok bool, err error) {
 	f, err := os.Open(name)
diff --git a/local/fs_object_client_test.go b/local/fs_object_client_test.go
index 06573d3010e5a..7f3b374018e1e 100644
--- a/local/fs_object_client_test.go
+++ b/local/fs_object_client_test.go
@@ -5,7 +5,9 @@ import (
 	"context"
 	"io/ioutil"
 	"os"
+	"path"
 	"path/filepath"
+	"strings"
 	"testing"
 	"time"
 
@@ -74,52 +76,93 @@ func TestFSObjectClient_List(t *testing.T) {
 		require.NoError(t, os.RemoveAll(fsObjectsDir))
 	}()
 
-	foldersWithFiles := make(map[string][]string)
-	foldersWithFiles["folder1"] = []string{"file1", "file2"}
-	foldersWithFiles["folder2"] = []string{"file3", "file4", "file5"}
+	allFiles := []string{
+		"outer-file1",
+		"outer-file2",
+		"folder1/file1",
+		"folder1/file2",
+		"folder2/file3",
+		"folder2/file4",
+		"folder2/file5",
+		"deeply/nested/folder/a",
+		"deeply/nested/folder/b",
+		"deeply/nested/folder/c",
+	}
 
-	for folder, files := range foldersWithFiles {
-		for _, filename := range files {
-			err := bucketClient.PutObject(context.Background(), filepath.Join(folder, filename), bytes.NewReader([]byte(filename)))
-			require.NoError(t, err)
+	topLevelFolders := map[string]bool{}
+	topLevelFiles := map[string]bool{}
+	filesInTopLevelFolders := map[string]map[string]bool{}
+
+	for _, f := range allFiles {
+		require.NoError(t, bucketClient.PutObject(context.Background(), f, bytes.NewReader([]byte(f))))
+
+		s := strings.Split(f, "/")
+		if len(s) > 1 {
+			topLevelFolders[s[0]] = true
+		} else {
+			topLevelFiles[s[0]] = true
+		}
+
+		if len(s) == 2 {
+			if filesInTopLevelFolders[s[0]] == nil {
+				filesInTopLevelFolders[s[0]] = map[string]bool{}
+			}
+			filesInTopLevelFolders[s[0]][s[1]] = true
 		}
 	}
 
 	// create an empty directory which should get excluded from the list
 	require.NoError(t, util.EnsureDirectory(filepath.Join(fsObjectsDir, "empty-folder")))
 
-	files := []string{"outer-file1", "outer-file2"}
-
-	for _, fl := range files {
-		err := bucketClient.PutObject(context.Background(), fl, bytes.NewReader([]byte(fl)))
-		require.NoError(t, err)
-	}
-
-	storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), "")
+	storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), "", "/")
 	require.NoError(t, err)
 
-	require.Len(t, storageObjects, len(files))
-	for i := range storageObjects {
-		require.Equal(t, storageObjects[i].Key, files[i])
+	require.Len(t, storageObjects, len(topLevelFiles))
+	for _, so := range storageObjects {
+		require.True(t, topLevelFiles[so.Key])
 	}
 
-	require.Len(t, commonPrefixes, len(foldersWithFiles))
+	require.Len(t, commonPrefixes, len(topLevelFolders))
 	for _, commonPrefix := range commonPrefixes {
-		_, ok := foldersWithFiles[string(commonPrefix)[:len(commonPrefix)-len(bucketClient.PathSeparator())]]
-		require.True(t, ok)
+		require.True(t, topLevelFolders[string(commonPrefix)[:len(commonPrefix)-1]]) // 1 to remove "/" separator.
 	}
 
-	for folder, files := range foldersWithFiles {
-		storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), folder)
+	for folder, files := range filesInTopLevelFolders {
+		storageObjects, commonPrefixes, err := bucketClient.List(context.Background(), folder, "/")
 		require.NoError(t, err)
 
 		require.Len(t, storageObjects, len(files))
-		for i := range storageObjects {
-			require.Equal(t, storageObjects[i].Key, filepath.Join(folder, files[i]))
+		for _, so := range storageObjects {
+			require.True(t, strings.HasPrefix(so.Key, folder+"/"))
+			require.True(t, files[path.Base(so.Key)])
 		}
 
 		require.Len(t, commonPrefixes, 0)
 	}
+
+	// List everything from the top, recursively.
+	storageObjects, commonPrefixes, err = bucketClient.List(context.Background(), "", "")
+	require.NoError(t, err)
+
+	// Since delimiter is empty, there are no commonPrefixes.
+	require.Empty(t, commonPrefixes)
+
+	var storageObjectPaths []string
+	for _, so := range storageObjects {
+		storageObjectPaths = append(storageObjectPaths, so.Key)
+	}
+	require.ElementsMatch(t, allFiles, storageObjectPaths)
+
+	storageObjects, commonPrefixes, err = bucketClient.List(context.Background(), "doesnt_exist", "")
+	require.NoError(t, err)
+	require.Empty(t, storageObjects)
+	require.Empty(t, commonPrefixes)
+
+	storageObjects, commonPrefixes, err = bucketClient.List(context.Background(), "outer-file1", "")
+	require.NoError(t, err)
+	require.Len(t, storageObjects, 1)
+	require.Equal(t, "outer-file1", storageObjects[0].Key)
+	require.Empty(t, commonPrefixes)
 }
 
 func TestFSObjectClient_DeleteObject(t *testing.T) {
@@ -140,18 +183,18 @@ func TestFSObjectClient_DeleteObject(t *testing.T) {
 
 	for folder, files := range foldersWithFiles {
 		for _, filename := range files {
-			err := bucketClient.PutObject(context.Background(), filepath.Join(folder, filename), bytes.NewReader([]byte(filename)))
+			err := bucketClient.PutObject(context.Background(), path.Join(folder, filename), bytes.NewReader([]byte(filename)))
 			require.NoError(t, err)
 		}
 	}
 
 	// let us check if we have right folders created
-	_, commonPrefixes, err := bucketClient.List(context.Background(), "")
+	_, commonPrefixes, err := bucketClient.List(context.Background(), "", "/")
 	require.NoError(t, err)
 	require.Len(t, commonPrefixes, len(foldersWithFiles))
 
 	// let us delete file1 from folder1 and check that file1 is gone but folder1 with file2 is still there
-	require.NoError(t, bucketClient.DeleteObject(context.Background(), filepath.Join("folder1", "file1")))
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), path.Join("folder1", "file1")))
 	_, err = os.Stat(filepath.Join(fsObjectsDir, filepath.Join("folder1", "file1")))
 	require.True(t, os.IsNotExist(err))
 
@@ -159,7 +202,7 @@ func TestFSObjectClient_DeleteObject(t *testing.T) {
 	require.NoError(t, err)
 
 	// let us delete second file as well and check that folder1 also got removed
-	require.NoError(t, bucketClient.DeleteObject(context.Background(), filepath.Join("folder1", "file2")))
+	require.NoError(t, bucketClient.DeleteObject(context.Background(), path.Join("folder1", "file2")))
 	_, err = os.Stat(filepath.Join(fsObjectsDir, "folder1"))
 	require.True(t, os.IsNotExist(err))
 
diff --git a/openstack/swift_object_client.go b/openstack/swift_object_client.go
index 5e19b87eb09e4..7cab474833087 100644
--- a/openstack/swift_object_client.go
+++ b/openstack/swift_object_client.go
@@ -16,9 +16,8 @@ import (
 )
 
 type SwiftObjectClient struct {
-	conn      *swift.Connection
-	cfg       SwiftConfig
-	delimiter rune
+	conn *swift.Connection
+	cfg  SwiftConfig
 }
 
 // SwiftConfig is config for the Swift Chunk Client.
@@ -55,7 +54,7 @@ func (cfg *SwiftConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet)
 }
 
 // NewSwiftObjectClient makes a new chunk.Client that writes chunks to OpenStack Swift.
-func NewSwiftObjectClient(cfg SwiftConfig, delimiter string) (*SwiftObjectClient, error) {
+func NewSwiftObjectClient(cfg SwiftConfig) (*SwiftObjectClient, error) {
 	util.WarnExperimentalUse("OpenStack Swift Storage")
 
 	// Create a connection
@@ -83,14 +82,6 @@ func NewSwiftObjectClient(cfg SwiftConfig, delimiter string) (*SwiftObjectClient
 		c.DomainId = cfg.UserDomainID
 	}
 
-	if len(delimiter) > 1 {
-		return nil, fmt.Errorf("delimiter must be a single character but was %s", delimiter)
-	}
-	var delim rune
-	if len(delimiter) != 0 {
-		delim = []rune(delimiter)[0]
-	}
-
 	// Authenticate
 	err := c.Authenticate()
 	if err != nil {
@@ -103,9 +94,8 @@ func NewSwiftObjectClient(cfg SwiftConfig, delimiter string) (*SwiftObjectClient
 	}
 
 	return &SwiftObjectClient{
-		conn:      c,
-		cfg:       cfg,
-		delimiter: delim,
+		conn: c,
+		cfg:  cfg,
 	}, nil
 }
 
@@ -135,11 +125,19 @@ func (s *SwiftObjectClient) PutObject(ctx context.Context, objectKey string, obj
 }
 
 // List only objects from the store non-recursively
-func (s *SwiftObjectClient) List(ctx context.Context, prefix string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
-	objs, err := s.conn.Objects(s.cfg.ContainerName, &swift.ObjectsOpts{
-		Prefix:    prefix,
-		Delimiter: s.delimiter,
-	})
+func (s *SwiftObjectClient) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
+	if len(delimiter) > 1 {
+		return nil, nil, fmt.Errorf("delimiter must be a single character but was %s", delimiter)
+	}
+
+	opts := &swift.ObjectsOpts{
+		Prefix: prefix,
+	}
+	if len(delimiter) > 0 {
+		opts.Delimiter = []rune(delimiter)[0]
+	}
+
+	objs, err := s.conn.Objects(s.cfg.ContainerName, opts)
 	if err != nil {
 		return nil, nil, err
 	}
@@ -176,7 +174,3 @@ func (s *SwiftObjectClient) DeleteObject(ctx context.Context, objectKey string)
 	}
 	return err
 }
-
-func (s *SwiftObjectClient) PathSeparator() string {
-	return string(s.delimiter)
-}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 567f1ad6da311..752acba090b7c 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -208,7 +208,7 @@ func TestPurger_BuildPlan(t *testing.T) {
 				require.NoError(t, err)
 				planPath := fmt.Sprintf("%s:%s/", userID, deleteRequest.RequestID)
 
-				plans, _, err := storageClient.List(context.Background(), planPath)
+				plans, _, err := storageClient.List(context.Background(), planPath, "/")
 				require.NoError(t, err)
 				require.Equal(t, tc.expectedNumberOfPlans, len(plans))
 
diff --git a/storage/factory.go b/storage/factory.go
index c0a8fb4384852..704ca5f29ef0f 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -246,7 +246,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "aws", "s3":
-		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config, chunk.DirDelim))
+		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config))
 	case "aws-dynamo":
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
@@ -257,15 +257,15 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 		}
 		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
 	case "azure":
-		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim))
+		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig))
 	case "gcp":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcp-columnkey", "bigtable", "bigtable-hashed":
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
 	case "gcs":
-		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim))
+		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig))
 	case "swift":
-		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim))
+		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift))
 	case "cassandra":
 		return cassandra.NewObjectClient(cfg.CassandraStorageConfig, schemaCfg, registerer)
 	case "filesystem":
@@ -334,13 +334,13 @@ func NewBucketClient(storageConfig Config) (chunk.BucketClient, error) {
 func NewObjectClient(name string, cfg Config) (chunk.ObjectClient, error) {
 	switch name {
 	case "aws", "s3":
-		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config, chunk.DirDelim)
+		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config)
 	case "gcs":
-		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig, chunk.DirDelim)
+		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig)
 	case "azure":
-		return azure.NewBlobStorage(&cfg.AzureStorageConfig, chunk.DirDelim)
+		return azure.NewBlobStorage(&cfg.AzureStorageConfig)
 	case "swift":
-		return openstack.NewSwiftObjectClient(cfg.Swift, chunk.DirDelim)
+		return openstack.NewSwiftObjectClient(cfg.Swift)
 	case "inmemory":
 		return chunk.NewMockStorage(), nil
 	case "filesystem":
diff --git a/storage_client.go b/storage_client.go
index 16470639ac1ce..3ea2f479594f6 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -7,9 +7,6 @@ import (
 	"time"
 )
 
-// DirDelim is the delimiter used to model a directory structure in an object store.
-const DirDelim = "/"
-
 var (
 	// ErrStorageObjectNotFound when object storage does not have requested object
 	ErrStorageObjectNotFound = errors.New("object not found in storage")
@@ -65,9 +62,19 @@ type ReadBatchIterator interface {
 type ObjectClient interface {
 	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
-	List(ctx context.Context, prefix string) ([]StorageObject, []StorageCommonPrefix, error)
+
+	// List objects with given prefix.
+	//
+	// If delimiter is empty, all objects are returned, even if they are in nested in "subdirectories".
+	// If delimiter is not empty, it is used to compute common prefixes ("subdirectories"),
+	// and objects containing delimiter in the name will not be returned in the result.
+	//
+	// For example, if the prefix is "notes/" and the delimiter is a slash (/) as in "notes/summer/july", the common prefix is "notes/summer/".
+	// Common prefixes will always end with passed delimiter.
+	//
+	// Keys of returned storage objects have given prefix.
+	List(ctx context.Context, prefix string, delimiter string) ([]StorageObject, []StorageCommonPrefix, error)
 	DeleteObject(ctx context.Context, objectKey string) error
-	PathSeparator() string
 	Stop()
 }
 
@@ -78,5 +85,4 @@ type StorageObject struct {
 }
 
 // StorageCommonPrefix represents a common prefix aka a synthetic directory in Object Store.
-// It is guaranteed to always end with DirDelim
 type StorageCommonPrefix string

From 4ea33f44c490bd538360923ea31c25ec78b836b4 Mon Sep 17 00:00:00 2001
From: gotjosh <josue@grafana.com>
Date: Fri, 2 Oct 2020 08:40:09 +0100
Subject: [PATCH 597/660] Speed up the listing of rule groups on the Ruler
 Configuration API (#3218)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Speed up the listing of rule groups on the Ruler Configuration API

Signed-off-by: gotjosh <josue@grafana.com>

* Update changelog based on feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 gcp/gcs_object_client.go | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 28574373f8a38..11421077019e7 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -109,8 +109,13 @@ func (s *GCSObjectClient) PutObject(ctx context.Context, objectKey string, objec
 func (s *GCSObjectClient) List(ctx context.Context, prefix, delimiter string) ([]chunk.StorageObject, []chunk.StorageCommonPrefix, error) {
 	var storageObjects []chunk.StorageObject
 	var commonPrefixes []chunk.StorageCommonPrefix
+	q := &storage.Query{Prefix: prefix, Delimiter: delimiter}
+	err := q.SetAttrSelection([]string{"Name", "Updated"})
+	if err != nil {
+		return nil, nil, err
+	}
 
-	iter := s.bucket.Objects(ctx, &storage.Query{Prefix: prefix, Delimiter: delimiter})
+	iter := s.bucket.Objects(ctx, q)
 	for {
 		if ctx.Err() != nil {
 			return nil, nil, ctx.Err()

From 286d7be31f5ecb9ada3c6533f19fe50164a28b82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Fri, 2 Oct 2020 10:20:48 +0200
Subject: [PATCH 598/660] Shuffle-sharding support in Ruler. (#3235)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Reconfigure ruler in docker compose.

Two rulers are started now, they use S3 for storing rules, and use
sharding.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Extracted different sharding strategies into methods.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added support for shuffle sharding to ruler.go

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Sharding tests.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Add tests for shuffle sharding strategy.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed configstore.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Return error if "common prefix" without supplied prefix is returned.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Changed replication strategy used by ruler.

Ruler must be active to receive any rules now.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Another test.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Load rules for users concurrently when using shuffle sharding.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Removed extra method LoadRuleGroupsForUser, and replaced with LoadRuleGroupsForUserAndNamespace.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix comments after rename.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Lint comments.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added ruler sync on ring change.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Move ShuffleShardSeed to util package.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added log message.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation of tests.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Create user-manager even if files even haven't changed.

During resharding, user can easily "jump" between rulers.
When it "jumps" back, previously manager wouldn't be second time.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Unregister user registry when removing ruler manager.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added short paragraph about ruler shuffle sharding.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Modified test, it now verifies expected rules for each ruler.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for bugfix in DefaultTenantManagerFactory.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added test for Collect and DeleteUserRegistry.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Clean white noise.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Move comment about replication factor.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added comment about tokens setup.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation problem after master merge.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 storage_client.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/storage_client.go b/storage_client.go
index 3ea2f479594f6..1ad88bb34bb0a 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -58,7 +58,7 @@ type ReadBatchIterator interface {
 	Value() []byte
 }
 
-// ObjectClient is used to store arbitrary data in Object Store (S3/GCS/Azure/Etc)
+// ObjectClient is used to store arbitrary data in Object Store (S3/GCS/Azure/...)
 type ObjectClient interface {
 	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
@@ -85,4 +85,5 @@ type StorageObject struct {
 }
 
 // StorageCommonPrefix represents a common prefix aka a synthetic directory in Object Store.
+// It is guaranteed to always end with delimiter passed to List method.
 type StorageCommonPrefix string

From 0919468a64e27b0965b847411f64cd00c0aa0880 Mon Sep 17 00:00:00 2001
From: Ben Wells <b.v.wells@gmail.com>
Date: Mon, 5 Oct 2020 08:51:41 +0100
Subject: [PATCH 599/660] Clarify 'implements ChunkStore' doc comments (#3270)

Signed-off-by: Ben Wells <b.v.wells@gmail.com>
---
 chunk_store.go  | 5 +++--
 series_store.go | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 8746a21dcfa8c..2352938a80073 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -77,6 +77,7 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Limit how long back data can be queried")
 }
 
+// Validate validates the store config.
 func (cfg *StoreConfig) Validate() error {
 	if err := cfg.ChunkCacheConfig.Validate(); err != nil {
 		return err
@@ -138,7 +139,7 @@ func newStore(cfg StoreConfig, schema StoreSchema, index IndexClient, chunks Cli
 	}, nil
 }
 
-// Put implements ChunkStore
+// Put implements Store
 func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 	for _, chunk := range chunks {
 		if err := c.PutOne(ctx, chunk.From, chunk.Through, chunk); err != nil {
@@ -148,7 +149,7 @@ func (c *store) Put(ctx context.Context, chunks []Chunk) error {
 	return nil
 }
 
-// PutOne implements ChunkStore
+// PutOne implements Store
 func (c *store) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "ChunkStore.PutOne")
 	defer log.Finish()
diff --git a/series_store.go b/series_store.go
index 18ceab0915cea..9fb5b94128b47 100644
--- a/series_store.go
+++ b/series_store.go
@@ -408,7 +408,7 @@ func (c *seriesStore) lookupLabelNamesBySeries(ctx context.Context, from, throug
 	return result.Strings(), nil
 }
 
-// Put implements ChunkStore
+// Put implements Store
 func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 	for _, chunk := range chunks {
 		if err := c.PutOne(ctx, chunk.From, chunk.Through, chunk); err != nil {
@@ -418,7 +418,7 @@ func (c *seriesStore) Put(ctx context.Context, chunks []Chunk) error {
 	return nil
 }
 
-// PutOne implements ChunkStore
+// PutOne implements Store
 func (c *seriesStore) PutOne(ctx context.Context, from, through model.Time, chunk Chunk) error {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.PutOne")
 	defer log.Finish()

From bd7db286828fd5bef69162125c3dc0a32b0f77ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 7 Oct 2020 10:03:04 +0200
Subject: [PATCH 600/660] Split listing and loading rule groups in
 rules.RuleStore interface (#3269)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Split List and Load methods in the RuleStore.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Added tests for rule store loading.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix compilation errors.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make sure to load listed rules.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix rule groups.
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fixed compilation error.
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Review feedback.
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 inmemory_storage_client.go | 36 ++++++++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index 2ac50db1cfa71..f982c59d7456d 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"io/ioutil"
 	"sort"
+	"strings"
 	"sync"
 
 	"github.com/go-kit/kit/log/level"
@@ -434,13 +435,44 @@ func (m *MockStorage) List(ctx context.Context, prefix, delimiter string) ([]Sto
 		return nil, nil, errPermissionDenied
 	}
 
+	prefixes := map[string]struct{}{}
+
 	storageObjects := make([]StorageObject, 0, len(m.objects))
 	for key := range m.objects {
+		if !strings.HasPrefix(key, prefix) {
+			continue
+		}
+
 		// ToDo: Store mtime when we have mtime based use-cases for storage objects
-		storageObjects = append(storageObjects, StorageObject{Key: key})
+		if delimiter == "" {
+			storageObjects = append(storageObjects, StorageObject{Key: key})
+			continue
+		}
+
+		ix := strings.Index(key[len(prefix):], delimiter)
+		if ix < 0 {
+			storageObjects = append(storageObjects, StorageObject{Key: key})
+			continue
+		}
+
+		commonPrefix := key[:len(prefix)+ix+len(delimiter)] // Include delimeter in the common prefix.
+		prefixes[commonPrefix] = struct{}{}
 	}
 
-	return storageObjects, []StorageCommonPrefix{}, nil
+	var commonPrefixes = []StorageCommonPrefix(nil)
+	for p := range prefixes {
+		commonPrefixes = append(commonPrefixes, StorageCommonPrefix(p))
+	}
+
+	// Object stores return results in sorted order.
+	sort.Slice(storageObjects, func(i, j int) bool {
+		return storageObjects[i].Key < storageObjects[j].Key
+	})
+	sort.Slice(commonPrefixes, func(i, j int) bool {
+		return commonPrefixes[i] < commonPrefixes[j]
+	})
+
+	return storageObjects, commonPrefixes, nil
 }
 
 type mockWriteBatch struct {

From 61f27b9b34139ca1eed7ee79433f664b71c67ead Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 7 Oct 2020 12:50:30 +0200
Subject: [PATCH 601/660] Don't set selected attributes when using delimiter.
 (#3292)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Don't set selected attributes when using delimiter.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* CHANGELOG.md

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 gcp/gcs_object_client.go | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 11421077019e7..7cb50c93a4f3a 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -110,9 +110,16 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix, delimiter string) ([
 	var storageObjects []chunk.StorageObject
 	var commonPrefixes []chunk.StorageCommonPrefix
 	q := &storage.Query{Prefix: prefix, Delimiter: delimiter}
-	err := q.SetAttrSelection([]string{"Name", "Updated"})
-	if err != nil {
-		return nil, nil, err
+
+	// Using delimiter and selected attributes doesn't work well together -- it returns nothing.
+	// Reason is that Go's API only sets "fields=items(name,updated)" parameter in the request,
+	// but what we really need is "fields=prefixes,items(name,updated)". Unfortunately we cannot set that,
+	// so instead we don't use attributes selection when using delimiter.
+	if delimiter == "" {
+		err := q.SetAttrSelection([]string{"Name", "Updated"})
+		if err != nil {
+			return nil, nil, err
+		}
 	}
 
 	iter := s.bucket.Objects(ctx, q)
@@ -130,7 +137,7 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix, delimiter string) ([
 		}
 
 		// When doing query with Delimiter, Prefix is the only field set for entries which represent synthetic "directory entries".
-		if attr.Name == "" {
+		if attr.Prefix != "" {
 			commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(attr.Prefix))
 			continue
 		}

From 4a0668c05cfd6a23050f15bf3344a7b16962fde8 Mon Sep 17 00:00:00 2001
From: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>
Date: Thu, 8 Oct 2020 07:41:42 -0700
Subject: [PATCH 602/660] Redis cache: added parameter to disable SSL
 certificate verification (#3298)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Redis cache: added parameter to disable SSL certificate verification

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

* Update CHANGELOG.md

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <pstibrany@gmail.com>

* Update CHANGELOG.md

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Update CHANGELOG.md

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* addressed comments

Signed-off-by: Dmitry Shmulevich <dmitry.shmulevich@sysdig.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
Co-authored-by: Peter Štibraný <pstibrany@gmail.com>
---
 cache/redis_client.go | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/cache/redis_client.go b/cache/redis_client.go
index df4ad5aadb3cd..9ed059cc70b64 100644
--- a/cache/redis_client.go
+++ b/cache/redis_client.go
@@ -16,16 +16,17 @@ import (
 
 // RedisConfig defines how a RedisCache should be constructed.
 type RedisConfig struct {
-	Endpoint    string         `yaml:"endpoint"`
-	MasterName  string         `yaml:"master_name"`
-	Timeout     time.Duration  `yaml:"timeout"`
-	Expiration  time.Duration  `yaml:"expiration"`
-	DB          int            `yaml:"db"`
-	PoolSize    int            `yaml:"pool_size"`
-	Password    flagext.Secret `yaml:"password"`
-	EnableTLS   bool           `yaml:"enable_tls"`
-	IdleTimeout time.Duration  `yaml:"idle_timeout"`
-	MaxConnAge  time.Duration  `yaml:"max_connection_age"`
+	Endpoint           string         `yaml:"endpoint"`
+	MasterName         string         `yaml:"master_name"`
+	Timeout            time.Duration  `yaml:"timeout"`
+	Expiration         time.Duration  `yaml:"expiration"`
+	DB                 int            `yaml:"db"`
+	PoolSize           int            `yaml:"pool_size"`
+	Password           flagext.Secret `yaml:"password"`
+	EnableTLS          bool           `yaml:"tls_enabled"`
+	InsecureSkipVerify bool           `yaml:"tls_insecure_skip_verify"`
+	IdleTimeout        time.Duration  `yaml:"idle_timeout"`
+	MaxConnAge         time.Duration  `yaml:"max_connection_age"`
 }
 
 // RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
@@ -37,7 +38,8 @@ func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *f
 	f.IntVar(&cfg.DB, prefix+"redis.db", 0, description+"Database index.")
 	f.IntVar(&cfg.PoolSize, prefix+"redis.pool-size", 0, description+"Maximum number of connections in the pool.")
 	f.Var(&cfg.Password, prefix+"redis.password", description+"Password to use when connecting to redis.")
-	f.BoolVar(&cfg.EnableTLS, prefix+"redis.enable-tls", false, description+"Enables connecting to redis with TLS.")
+	f.BoolVar(&cfg.EnableTLS, prefix+"redis.tls-enabled", false, description+"Enable connecting to redis with TLS.")
+	f.BoolVar(&cfg.InsecureSkipVerify, prefix+"redis.tls-insecure-skip-verify", false, description+"Skip validating server certificate.")
 	f.DurationVar(&cfg.IdleTimeout, prefix+"redis.idle-timeout", 0, description+"Close connections after remaining idle for this duration. If the value is zero, then idle connections are not closed.")
 	f.DurationVar(&cfg.MaxConnAge, prefix+"redis.max-connection-age", 0, description+"Close connections older than this duration. If the value is zero, then the pool does not close connections based on age.")
 }
@@ -60,7 +62,7 @@ func NewRedisClient(cfg *RedisConfig) *RedisClient {
 		MaxConnAge:  cfg.MaxConnAge,
 	}
 	if cfg.EnableTLS {
-		opt.TLSConfig = &tls.Config{}
+		opt.TLSConfig = &tls.Config{InsecureSkipVerify: cfg.InsecureSkipVerify}
 	}
 	return &RedisClient{
 		expiration: cfg.Expiration,

From 831a7f0661ae9f5dd3bca40c5e243ba28a7a449f Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 9 Oct 2020 09:10:34 +0200
Subject: [PATCH 603/660] Increased -<prefix>.redis.timeout default from 100ms
 to 500ms (#3301)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 cache/redis_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cache/redis_client.go b/cache/redis_client.go
index 9ed059cc70b64..dc8789e4139f3 100644
--- a/cache/redis_client.go
+++ b/cache/redis_client.go
@@ -33,7 +33,7 @@ type RedisConfig struct {
 func (cfg *RedisConfig) RegisterFlagsWithPrefix(prefix, description string, f *flag.FlagSet) {
 	f.StringVar(&cfg.Endpoint, prefix+"redis.endpoint", "", description+"Redis Server endpoint to use for caching. A comma-separated list of endpoints for Redis Cluster or Redis Sentinel. If empty, no redis will be used.")
 	f.StringVar(&cfg.MasterName, prefix+"redis.master-name", "", description+"Redis Sentinel master name. An empty string for Redis Server or Redis Cluster.")
-	f.DurationVar(&cfg.Timeout, prefix+"redis.timeout", 100*time.Millisecond, description+"Maximum time to wait before giving up on redis requests.")
+	f.DurationVar(&cfg.Timeout, prefix+"redis.timeout", 500*time.Millisecond, description+"Maximum time to wait before giving up on redis requests.")
 	f.DurationVar(&cfg.Expiration, prefix+"redis.expiration", 0, description+"How long keys stay in the redis.")
 	f.IntVar(&cfg.DB, prefix+"redis.db", 0, description+"Database index.")
 	f.IntVar(&cfg.PoolSize, prefix+"redis.pool-size", 0, description+"Maximum number of connections in the pool.")

From aa567270b778335fab0270c71077729bcd47787e Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 9 Oct 2020 17:34:46 +0200
Subject: [PATCH 604/660] Clarify how to configure auth for BigTable and GCS
 (#3311)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 gcp/bigtable_index_client.go | 2 +-
 gcp/gcs_object_client.go     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 6bcd8d23085b8..042507eef459e 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -47,7 +47,7 @@ type Config struct {
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Project, "bigtable.project", "", "Bigtable project ID.")
-	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID.")
+	f.StringVar(&cfg.Instance, "bigtable.instance", "", "Bigtable instance ID. Please refer to https://cloud.google.com/docs/authentication/production for more information about how to configure authentication.")
 	f.BoolVar(&cfg.TableCacheEnabled, "bigtable.table-cache.enabled", true, "If enabled, once a tables info is fetched, it is cached.")
 	f.DurationVar(&cfg.TableCacheExpiration, "bigtable.table-cache.expiration", 30*time.Minute, "Duration to cache tables before checking again.")
 
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 7cb50c93a4f3a..05469aea21cb1 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -32,7 +32,7 @@ func (cfg *GCSConfig) RegisterFlags(f *flag.FlagSet) {
 
 // RegisterFlagsWithPrefix registers flags with prefix.
 func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	f.StringVar(&cfg.BucketName, prefix+"gcs.bucketname", "", "Name of GCS bucket to put chunks in.")
+	f.StringVar(&cfg.BucketName, prefix+"gcs.bucketname", "", "Name of GCS bucket. Please refer to https://cloud.google.com/docs/authentication/production for more information about how to configure authentication.")
 	f.IntVar(&cfg.ChunkBufferSize, prefix+"gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
 	f.DurationVar(&cfg.RequestTimeout, prefix+"gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
 }

From bf42c6842239ba28b725e99ca318491089d54d68 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 12 Oct 2020 14:48:10 +0200
Subject: [PATCH 605/660] Upgrade build-image from Debian stretch to buster
 (#3323)

* Upgrade build-image from Debian stretch to buster

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Rebuilt protos

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed 'Checkout version' note from GitHub actions workflow doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 purger/delete_plan.pb.go           | 60 +++++++++++++++---------------
 storage/caching_index_client.pb.go | 46 +++++++++++------------
 2 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/purger/delete_plan.pb.go b/purger/delete_plan.pb.go
index ab1ef599ac673..336634b1c3e0c 100644
--- a/purger/delete_plan.pb.go
+++ b/purger/delete_plan.pb.go
@@ -126,7 +126,7 @@ func (m *ChunksGroup) GetChunks() []ChunkDetails {
 }
 
 type ChunkDetails struct {
-	ID                       string    `protobuf:"bytes,1,opt,name=ID,json=iD,proto3" json:"ID,omitempty"`
+	ID                       string    `protobuf:"bytes,1,opt,name=ID,proto3" json:"ID,omitempty"`
 	PartiallyDeletedInterval *Interval `protobuf:"bytes,2,opt,name=partially_deleted_interval,json=partiallyDeletedInterval,proto3" json:"partially_deleted_interval,omitempty"`
 }
 
@@ -238,35 +238,35 @@ func init() { proto.RegisterFile("delete_plan.proto", fileDescriptor_c38868cf63b
 
 var fileDescriptor_c38868cf63b27372 = []byte{
 	// 454 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x52, 0x31, 0x6f, 0xd4, 0x30,
-	0x18, 0x8d, 0x53, 0x74, 0xa2, 0xbe, 0xa3, 0x6a, 0x8d, 0x04, 0xa7, 0x1b, 0xdc, 0xea, 0xa6, 0x1b,
-	0x20, 0x91, 0x8a, 0x90, 0x18, 0x90, 0x80, 0xe3, 0x24, 0xa8, 0x04, 0x52, 0x89, 0x98, 0x58, 0x22,
-	0x27, 0xf9, 0x48, 0x4d, 0x7d, 0xb1, 0xb1, 0x1d, 0x04, 0x1b, 0x1b, 0x2b, 0x3f, 0x83, 0xbf, 0xc0,
-	0x3f, 0xe8, 0x78, 0x63, 0xc5, 0x50, 0x71, 0xb9, 0x85, 0xb1, 0x3f, 0x01, 0xc5, 0xc9, 0x5d, 0x03,
-	0x12, 0x0b, 0x5b, 0xde, 0xf7, 0xde, 0xf7, 0xfc, 0xfc, 0x62, 0xbc, 0x97, 0x81, 0x00, 0x0b, 0xb1,
-	0x12, 0xac, 0x08, 0x94, 0x96, 0x56, 0x92, 0x6d, 0x55, 0xea, 0x1c, 0xea, 0xc1, 0xe8, 0x6e, 0xce,
-	0xed, 0x49, 0x99, 0x04, 0xa9, 0x9c, 0x87, 0xb9, 0xcc, 0x65, 0xe8, 0x14, 0x49, 0xf9, 0xd6, 0x21,
-	0x07, 0xdc, 0x57, 0xb3, 0x39, 0x7a, 0xdc, 0x91, 0xa7, 0x52, 0x5b, 0xf8, 0xa8, 0xb4, 0x7c, 0x07,
-	0xa9, 0x6d, 0x51, 0xa8, 0x4e, 0xf3, 0x90, 0x17, 0x39, 0x18, 0x0b, 0x3a, 0x4c, 0x05, 0x87, 0x62,
-	0x4d, 0x35, 0x0e, 0xe3, 0x2f, 0x08, 0xe3, 0x99, 0x4b, 0x74, 0x2c, 0x58, 0x41, 0x1e, 0xe0, 0x1b,
-	0x75, 0x8e, 0x98, 0x17, 0x16, 0xf4, 0x07, 0x26, 0x86, 0xe8, 0x00, 0x4d, 0xfa, 0x87, 0x37, 0x83,
-	0x4d, 0xc4, 0xe0, 0xa8, 0xa5, 0xa2, 0x41, 0x0d, 0xd7, 0x88, 0x3c, 0xc2, 0x83, 0xf4, 0xa4, 0x2c,
-	0x4e, 0x4d, 0x9c, 0x6b, 0x59, 0xaa, 0xa1, 0x7f, 0xb0, 0x35, 0xe9, 0x1f, 0xde, 0xea, 0x2c, 0x3e,
-	0x75, 0xf4, 0xb3, 0x9a, 0x9d, 0x5e, 0x3b, 0xbb, 0xd8, 0xf7, 0xa2, 0x7e, 0x7a, 0x35, 0x1a, 0x7f,
-	0x47, 0xb8, 0xdf, 0x91, 0x10, 0x83, 0x7b, 0x82, 0x25, 0x20, 0xcc, 0x10, 0x39, 0xab, 0xbd, 0xa0,
-	0x0d, 0xfe, 0xa2, 0x9e, 0x1e, 0x33, 0xae, 0xa7, 0xcf, 0x6b, 0x97, 0x1f, 0x17, 0xfb, 0xff, 0x53,
-	0x43, 0x63, 0xf3, 0x24, 0x63, 0xca, 0x82, 0x8e, 0xda, 0xa3, 0xc8, 0x7d, 0xdc, 0x6b, 0x32, 0xb5,
-	0xf9, 0x6f, 0xff, 0x9d, 0x7f, 0x06, 0x96, 0x71, 0x61, 0xda, 0x0b, 0xb4, 0xe2, 0xf1, 0x7b, 0x3c,
-	0xe8, 0xb2, 0x64, 0x07, 0xfb, 0x47, 0x33, 0xd7, 0xdd, 0x76, 0xe4, 0xf3, 0x19, 0x79, 0x85, 0x47,
-	0x8a, 0x69, 0xcb, 0x99, 0x10, 0x9f, 0xe2, 0xe6, 0x01, 0x64, 0x57, 0x1d, 0xfb, 0xff, 0xee, 0x78,
-	0xb8, 0x59, 0x6b, 0x7e, 0x52, 0xb6, 0x66, 0xc6, 0x09, 0xbe, 0xbe, 0xe9, 0xfe, 0x0e, 0x26, 0xc6,
-	0x32, 0x6d, 0x63, 0xcb, 0xe7, 0x60, 0x2c, 0x9b, 0xab, 0x78, 0x6e, 0xdc, 0xf1, 0x5b, 0xd1, 0xae,
-	0x63, 0x5e, 0xaf, 0x89, 0x97, 0x86, 0x4c, 0xf0, 0x2e, 0x14, 0xd9, 0x9f, 0x5a, 0xdf, 0x69, 0x77,
-	0xa0, 0xc8, 0x3a, 0xca, 0xe9, 0xc3, 0xc5, 0x92, 0x7a, 0xe7, 0x4b, 0xea, 0x5d, 0x2e, 0x29, 0xfa,
-	0x5c, 0x51, 0xf4, 0xad, 0xa2, 0xe8, 0xac, 0xa2, 0x68, 0x51, 0x51, 0xf4, 0xb3, 0xa2, 0xe8, 0x57,
-	0x45, 0xbd, 0xcb, 0x8a, 0xa2, 0xaf, 0x2b, 0xea, 0x2d, 0x56, 0xd4, 0x3b, 0x5f, 0x51, 0xef, 0x4d,
-	0xcf, 0xdd, 0x43, 0x27, 0x3d, 0xf7, 0xc2, 0xee, 0xfd, 0x0e, 0x00, 0x00, 0xff, 0xff, 0x6d, 0xa1,
-	0xa8, 0x2d, 0xf2, 0x02, 0x00, 0x00,
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x52, 0x31, 0x6f, 0xd3, 0x40,
+	0x18, 0xf5, 0xb9, 0x28, 0xa2, 0x97, 0x50, 0xb5, 0x87, 0x04, 0x51, 0x86, 0x6b, 0x95, 0x29, 0x03,
+	0xd8, 0x52, 0x11, 0x12, 0x03, 0x12, 0x10, 0x22, 0x41, 0x24, 0x90, 0x8a, 0xc5, 0xc4, 0x62, 0x9d,
+	0xed, 0x0f, 0xd7, 0xf4, 0xe2, 0x3b, 0xee, 0xce, 0x08, 0x36, 0x36, 0x56, 0x7e, 0x06, 0x7f, 0x81,
+	0x7f, 0xd0, 0x31, 0x63, 0xc5, 0x50, 0x11, 0x67, 0x61, 0xec, 0x4f, 0x40, 0x3e, 0xdb, 0x89, 0x41,
+	0x62, 0x61, 0xf3, 0xfb, 0xde, 0xfb, 0xde, 0xbd, 0x7b, 0x3e, 0x7c, 0x90, 0x00, 0x07, 0x03, 0xa1,
+	0xe4, 0x2c, 0xf7, 0xa4, 0x12, 0x46, 0x90, 0x5d, 0x59, 0xa8, 0x14, 0xaa, 0xc1, 0xe8, 0x6e, 0x9a,
+	0x99, 0xd3, 0x22, 0xf2, 0x62, 0xb1, 0xf0, 0x53, 0x91, 0x0a, 0xdf, 0x2a, 0xa2, 0xe2, 0xad, 0x45,
+	0x16, 0xd8, 0xaf, 0x7a, 0x73, 0xf4, 0xb8, 0x23, 0x8f, 0x85, 0x32, 0xf0, 0x51, 0x2a, 0xf1, 0x0e,
+	0x62, 0xd3, 0x20, 0x5f, 0x9e, 0xa5, 0x7e, 0x96, 0xa7, 0xa0, 0x0d, 0x28, 0x3f, 0xe6, 0x19, 0xe4,
+	0x2d, 0x55, 0x3b, 0x8c, 0xbf, 0x20, 0x8c, 0x67, 0x36, 0xd1, 0x09, 0x67, 0x39, 0x79, 0x80, 0x6f,
+	0x54, 0x39, 0xc2, 0x2c, 0x37, 0xa0, 0x3e, 0x30, 0x3e, 0x44, 0x47, 0x68, 0xd2, 0x3f, 0xbe, 0xe9,
+	0x6d, 0x22, 0x7a, 0xf3, 0x86, 0x0a, 0x06, 0x15, 0x6c, 0x11, 0x79, 0x84, 0x07, 0xf1, 0x69, 0x91,
+	0x9f, 0xe9, 0x30, 0x55, 0xa2, 0x90, 0x43, 0xf7, 0x68, 0x67, 0xd2, 0x3f, 0xbe, 0xd5, 0x59, 0x7c,
+	0x6a, 0xe9, 0x67, 0x15, 0x3b, 0xbd, 0x76, 0x7e, 0x79, 0xe8, 0x04, 0xfd, 0x78, 0x3b, 0x1a, 0x7f,
+	0x47, 0xb8, 0xdf, 0x91, 0x10, 0x8d, 0x7b, 0x9c, 0x45, 0xc0, 0xf5, 0x10, 0x59, 0xab, 0x03, 0xaf,
+	0x09, 0xfe, 0xa2, 0x9a, 0x9e, 0xb0, 0x4c, 0x4d, 0x9f, 0x57, 0x2e, 0x3f, 0x2e, 0x0f, 0xff, 0xa7,
+	0x86, 0xda, 0xe6, 0x49, 0xc2, 0xa4, 0x01, 0x15, 0x34, 0x47, 0x91, 0xfb, 0xb8, 0x57, 0x67, 0x6a,
+	0xf2, 0xdf, 0xfe, 0x3b, 0xff, 0x0c, 0x0c, 0xcb, 0xb8, 0x6e, 0x2e, 0xd0, 0x88, 0xc7, 0xef, 0xf1,
+	0xa0, 0xcb, 0x92, 0x3d, 0xec, 0xce, 0x67, 0xb6, 0xbb, 0xdd, 0xc0, 0x9d, 0xcf, 0xc8, 0x2b, 0x3c,
+	0x92, 0x4c, 0x99, 0x8c, 0x71, 0xfe, 0x29, 0xac, 0x1f, 0x40, 0xb2, 0xed, 0xd8, 0xfd, 0x77, 0xc7,
+	0xc3, 0xcd, 0x5a, 0xfd, 0x93, 0x92, 0x96, 0x19, 0x47, 0xf8, 0xfa, 0xa6, 0xfb, 0x3b, 0x98, 0x68,
+	0xc3, 0x94, 0x09, 0x4d, 0xb6, 0x00, 0x6d, 0xd8, 0x42, 0x86, 0x0b, 0x6d, 0x8f, 0xdf, 0x09, 0xf6,
+	0x2d, 0xf3, 0xba, 0x25, 0x5e, 0x6a, 0x32, 0xc1, 0xfb, 0x90, 0x27, 0x7f, 0x6a, 0x5d, 0xab, 0xdd,
+	0x83, 0x3c, 0xe9, 0x28, 0xa7, 0x0f, 0x97, 0x2b, 0xea, 0x5c, 0xac, 0xa8, 0x73, 0xb5, 0xa2, 0xe8,
+	0x73, 0x49, 0xd1, 0xb7, 0x92, 0xa2, 0xf3, 0x92, 0xa2, 0x65, 0x49, 0xd1, 0xcf, 0x92, 0xa2, 0x5f,
+	0x25, 0x75, 0xae, 0x4a, 0x8a, 0xbe, 0xae, 0xa9, 0xb3, 0x5c, 0x53, 0xe7, 0x62, 0x4d, 0x9d, 0x37,
+	0x3d, 0x7b, 0x0f, 0x15, 0xf5, 0xec, 0x0b, 0xbb, 0xf7, 0x3b, 0x00, 0x00, 0xff, 0xff, 0x03, 0x08,
+	0xac, 0x7f, 0xf2, 0x02, 0x00, 0x00,
 }
 
 func (this *DeletePlan) Equal(that interface{}) bool {
diff --git a/storage/caching_index_client.pb.go b/storage/caching_index_client.pb.go
index c90cffbbc3477..a0bfe0a53a609 100644
--- a/storage/caching_index_client.pb.go
+++ b/storage/caching_index_client.pb.go
@@ -26,8 +26,8 @@ var _ = math.Inf
 const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 type Entry struct {
-	Column Bytes `protobuf:"bytes,1,opt,name=Column,json=column,proto3,customtype=Bytes" json:"Column"`
-	Value  Bytes `protobuf:"bytes,2,opt,name=Value,json=value,proto3,customtype=Bytes" json:"Value"`
+	Column Bytes `protobuf:"bytes,1,opt,name=Column,proto3,customtype=Bytes" json:"Column"`
+	Value  Bytes `protobuf:"bytes,2,opt,name=Value,proto3,customtype=Bytes" json:"Value"`
 }
 
 func (m *Entry) Reset()      { *m = Entry{} }
@@ -140,27 +140,27 @@ func init() {
 func init() { proto.RegisterFile("caching_index_client.proto", fileDescriptor_6a83955bbc783296) }
 
 var fileDescriptor_6a83955bbc783296 = []byte{
-	// 313 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xb1, 0x4e, 0xf3, 0x30,
-	0x14, 0x85, 0xed, 0x3f, 0x4d, 0xaa, 0xba, 0x3f, 0x08, 0x79, 0x40, 0x51, 0x87, 0xdb, 0xa8, 0x08,
-	0x29, 0x0b, 0xa9, 0x04, 0x3c, 0x41, 0x10, 0x2f, 0x10, 0x24, 0xd6, 0xca, 0x75, 0x4d, 0x6a, 0x91,
-	0xda, 0x55, 0xea, 0xa0, 0x66, 0x63, 0x63, 0xe5, 0x31, 0x78, 0x94, 0x8e, 0x1d, 0x2b, 0x86, 0x8a,
-	0xba, 0x0b, 0x63, 0x1f, 0x01, 0xd5, 0x14, 0x89, 0x81, 0xed, 0x1c, 0x7f, 0xc7, 0xf7, 0x5c, 0x5d,
-	0xd2, 0xe1, 0x8c, 0x8f, 0xa5, 0xca, 0x07, 0x52, 0x8d, 0xc4, 0x7c, 0xc0, 0x0b, 0x29, 0x94, 0x49,
-	0xa6, 0xa5, 0x36, 0x9a, 0x36, 0x67, 0x46, 0x97, 0x2c, 0x17, 0x9d, 0x8b, 0x5c, 0x9a, 0x71, 0x35,
-	0x4c, 0xb8, 0x9e, 0xf4, 0x73, 0x9d, 0xeb, 0xbe, 0xe3, 0xc3, 0xea, 0xc1, 0x39, 0x67, 0x9c, 0xfa,
-	0xfe, 0xd7, 0xbb, 0x23, 0xfe, 0xad, 0x32, 0x65, 0x4d, 0xcf, 0x49, 0x70, 0xa3, 0x8b, 0x6a, 0xa2,
-	0x42, 0x1c, 0xe1, 0xf8, 0x7f, 0x7a, 0xb4, 0x58, 0x77, 0xd1, 0xfb, 0xba, 0xeb, 0xa7, 0xb5, 0x11,
-	0xb3, 0x2c, 0xe0, 0x0e, 0xd2, 0x33, 0xe2, 0xdf, 0xb3, 0xa2, 0x12, 0xe1, 0xbf, 0xbf, 0x52, 0xfe,
-	0xd3, 0x9e, 0xf5, 0x5e, 0x30, 0x69, 0x65, 0x82, 0x8d, 0x52, 0x66, 0xf8, 0x98, 0x26, 0xa4, 0x29,
-	0x94, 0x29, 0xa5, 0x98, 0x85, 0x38, 0xf2, 0xe2, 0xf6, 0xe5, 0x71, 0x72, 0x58, 0x36, 0x71, 0xd5,
-	0x69, 0x63, 0x3f, 0x24, 0xfb, 0x09, 0xd1, 0x13, 0xe2, 0x3d, 0x8a, 0xda, 0x15, 0xb4, 0xb2, 0xbd,
-	0xa4, 0xa7, 0x24, 0x10, 0xf3, 0xa9, 0x2c, 0xeb, 0xd0, 0x8b, 0x70, 0xec, 0x65, 0x07, 0x47, 0x23,
-	0xd2, 0xe6, 0xac, 0x1c, 0x49, 0xc5, 0x0a, 0x69, 0xea, 0xb0, 0x11, 0xe1, 0xd8, 0xcf, 0x7e, 0x3f,
-	0xa5, 0xd7, 0xcb, 0x0d, 0xa0, 0xd5, 0x06, 0xd0, 0x6e, 0x03, 0xf8, 0xd9, 0x02, 0x7e, 0xb3, 0x80,
-	0x17, 0x16, 0xf0, 0xd2, 0x02, 0xfe, 0xb0, 0x80, 0x3f, 0x2d, 0xa0, 0x9d, 0x05, 0xfc, 0xba, 0x05,
-	0xb4, 0xdc, 0x02, 0x5a, 0x6d, 0x01, 0x0d, 0x03, 0x77, 0x9b, 0xab, 0xaf, 0x00, 0x00, 0x00, 0xff,
-	0xff, 0xce, 0x7e, 0x67, 0x82, 0x71, 0x01, 0x00, 0x00,
+	// 311 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xb1, 0x6e, 0xea, 0x30,
+	0x14, 0x86, 0x7d, 0x6e, 0x08, 0x08, 0x73, 0xef, 0xd5, 0x95, 0x87, 0xab, 0x88, 0xe1, 0x10, 0x51,
+	0x55, 0xca, 0xd2, 0x20, 0xb5, 0x7d, 0x82, 0x54, 0x7d, 0x81, 0x54, 0xea, 0x8a, 0x4c, 0x70, 0x83,
+	0xd5, 0x60, 0xa3, 0xe0, 0x48, 0x64, 0xeb, 0xd6, 0xb5, 0x8f, 0xd1, 0x47, 0x61, 0x64, 0x44, 0x1d,
+	0x50, 0x31, 0x4b, 0x47, 0x1e, 0xa1, 0xc2, 0x50, 0xa9, 0x43, 0xb7, 0xff, 0xf7, 0xf7, 0xfb, 0xfc,
+	0x47, 0x87, 0x76, 0x33, 0x9e, 0x4d, 0xa4, 0xca, 0x87, 0x52, 0x8d, 0xc5, 0x62, 0x98, 0x15, 0x52,
+	0x28, 0x13, 0xcf, 0x4a, 0x6d, 0x34, 0x6b, 0xcd, 0x8d, 0x2e, 0x79, 0x2e, 0xba, 0x17, 0xb9, 0x34,
+	0x93, 0x6a, 0x14, 0x67, 0x7a, 0x3a, 0xc8, 0x75, 0xae, 0x07, 0x8e, 0x8f, 0xaa, 0x07, 0xe7, 0x9c,
+	0x71, 0xea, 0xf8, 0xaf, 0x7f, 0x47, 0xfd, 0x5b, 0x65, 0xca, 0x9a, 0x9d, 0xd3, 0xe6, 0x8d, 0x2e,
+	0xaa, 0xa9, 0x0a, 0x20, 0x84, 0xe8, 0x77, 0xf2, 0x67, 0xb9, 0xe9, 0x91, 0xb7, 0x4d, 0xcf, 0x4f,
+	0x6a, 0x23, 0xe6, 0xe9, 0x09, 0xb2, 0x33, 0xea, 0xdf, 0xf3, 0xa2, 0x12, 0xc1, 0xaf, 0x9f, 0x52,
+	0x47, 0xd6, 0x7f, 0x06, 0xda, 0x4e, 0x05, 0x1f, 0x27, 0xdc, 0x64, 0x13, 0x16, 0xd3, 0x96, 0x50,
+	0xa6, 0x94, 0x62, 0x1e, 0x40, 0xe8, 0x45, 0x9d, 0xcb, 0xbf, 0xf1, 0x69, 0xd9, 0xd8, 0x55, 0x27,
+	0x8d, 0xc3, 0x90, 0xf4, 0x2b, 0xc4, 0xfe, 0x51, 0xef, 0x51, 0xd4, 0xae, 0xa0, 0x9d, 0x1e, 0x24,
+	0xfb, 0x4f, 0x9b, 0x62, 0x31, 0x93, 0x65, 0x1d, 0x78, 0x21, 0x44, 0x5e, 0x7a, 0x72, 0x2c, 0xa4,
+	0x9d, 0x8c, 0x97, 0x63, 0xa9, 0x78, 0x21, 0x4d, 0x1d, 0x34, 0x42, 0x88, 0xfc, 0xf4, 0xfb, 0x53,
+	0x72, 0xbd, 0xda, 0x22, 0x59, 0x6f, 0x91, 0xec, 0xb7, 0x08, 0x4f, 0x16, 0xe1, 0xd5, 0x22, 0x2c,
+	0x2d, 0xc2, 0xca, 0x22, 0xbc, 0x5b, 0x84, 0x0f, 0x8b, 0x64, 0x6f, 0x11, 0x5e, 0x76, 0x48, 0x56,
+	0x3b, 0x24, 0xeb, 0x1d, 0x92, 0x51, 0xd3, 0xdd, 0xe6, 0xea, 0x33, 0x00, 0x00, 0xff, 0xff, 0xc2,
+	0xe7, 0xfe, 0xff, 0x71, 0x01, 0x00, 0x00,
 }
 
 func (this *Entry) Equal(that interface{}) bool {

From aed24693bb2be7d99b9805e5850b45efa20c1d49 Mon Sep 17 00:00:00 2001
From: Julien Pivotto <roidelapluie@inuits.eu>
Date: Tue, 13 Oct 2020 16:43:17 +0200
Subject: [PATCH 606/660] Use Promauto (#3263)

Signed-off-by: Julien Pivotto <roidelapluie@inuits.eu>
---
 table_manager.go | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 383a4cf5da877..7426f19f3213d 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -12,6 +12,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	tsdberrors "github.com/prometheus/prometheus/tsdb/errors"
 	"github.com/weaveworks/common/instrument"
@@ -38,46 +39,36 @@ type tableManagerMetrics struct {
 
 func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 	m := tableManagerMetrics{}
-	m.syncTableDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	m.syncTableDuration = promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
 		Name:      "table_manager_sync_duration_seconds",
 		Help:      "Time spent synching tables.",
 		Buckets:   prometheus.DefBuckets,
 	}, []string{"operation", "status_code"})
 
-	m.tableCapacity = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+	m.tableCapacity = promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "table_capacity_units",
 		Help:      "Per-table capacity, measured in DynamoDB capacity units.",
 	}, []string{"op", "table"})
 
-	m.createFailures = prometheus.NewGauge(prometheus.GaugeOpts{
+	m.createFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "table_manager_create_failures",
 		Help:      "Number of table creation failures during the last table-manager reconciliation",
 	})
-	m.deleteFailures = prometheus.NewGauge(prometheus.GaugeOpts{
+	m.deleteFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "table_manager_delete_failures",
 		Help:      "Number of table deletion failures during the last table-manager reconciliation",
 	})
 
-	m.lastSuccessfulSync = prometheus.NewGauge(prometheus.GaugeOpts{
+	m.lastSuccessfulSync = promauto.With(r).NewGauge(prometheus.GaugeOpts{
 		Namespace: "cortex",
 		Name:      "table_manager_sync_success_timestamp_seconds",
 		Help:      "Timestamp of the last successful table manager sync.",
 	})
 
-	if r != nil {
-		r.MustRegister(
-			m.syncTableDuration,
-			m.tableCapacity,
-			m.createFailures,
-			m.deleteFailures,
-			m.lastSuccessfulSync,
-		)
-	}
-
 	return &m
 }
 

From 3f8a53032be07c289194a9ffa0ceaf3eb6caa952 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 21 Oct 2020 12:40:12 +0530
Subject: [PATCH 607/660] fix common prefixes returned by List method of s3
 client (#3358)

* fix common prefixes returned by List method of s3

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 aws/s3_storage_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 3ff64618e2ab8..718384d79a7c2 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -314,7 +314,7 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix, delimiter string) ([]
 				}
 
 				for _, commonPrefix := range output.CommonPrefixes {
-					commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(commonPrefix.String()))
+					commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(aws.StringValue(commonPrefix.Prefix)))
 				}
 
 				if !*output.IsTruncated {

From ca8d17761aa10985cd34873409dd6f85a468f747 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 21 Oct 2020 15:28:06 +0530
Subject: [PATCH 608/660] fix timeout handling in GetObject for azure and gcs
 (#3285)

* fix timeout handling in GetObject for azure and gcs

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* wrapped readcloser which helps with cancelling the context when reader is closed

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update changelog

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* changes suggested from PR review

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* update comments

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 azure/blob_storage_client.go | 16 +++++++++++++---
 gcp/gcs_object_client.go     | 16 +++++++++++++---
 storage_client.go            |  1 +
 util/util.go                 | 21 +++++++++++++++++++++
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 08fecb81e5e76..7a68262233c0c 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -13,6 +13,7 @@ import (
 	"github.com/Azure/azure-storage-blob-go/azblob"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
@@ -110,13 +111,22 @@ func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
 func (b *BlobStorage) Stop() {}
 
 func (b *BlobStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	var cancel context.CancelFunc = func() {}
 	if b.cfg.RequestTimeout > 0 {
-		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
-		var cancel context.CancelFunc
 		ctx, cancel = context.WithTimeout(ctx, b.cfg.RequestTimeout)
-		defer cancel()
 	}
 
+	rc, err := b.getObject(ctx, objectKey)
+	if err != nil {
+		// cancel the context if there is an error.
+		cancel()
+		return nil, err
+	}
+	// else return a wrapped ReadCloser which cancels the context while closing the reader.
+	return chunk_util.NewReadCloserWithContextCancelFunc(rc, cancel), nil
+}
+
+func (b *BlobStorage) getObject(ctx context.Context, objectKey string) (rc io.ReadCloser, err error) {
 	blockBlobURL, err := b.getBlobURL(objectKey)
 	if err != nil {
 		return nil, err
diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 05469aea21cb1..0c956e446adf0 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -10,6 +10,7 @@ import (
 	"google.golang.org/api/iterator"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/chunk/util"
 )
 
 type GCSObjectClient struct {
@@ -67,13 +68,22 @@ func (s *GCSObjectClient) Stop() {
 // GetObject returns a reader for the specified object key from the configured GCS bucket. If the
 // key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
 func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error) {
+	var cancel context.CancelFunc = func() {}
 	if s.cfg.RequestTimeout > 0 {
-		// The context will be cancelled with the timeout or when the parent context is cancelled, whichever occurs first.
-		var cancel context.CancelFunc
 		ctx, cancel = context.WithTimeout(ctx, s.cfg.RequestTimeout)
-		defer cancel()
 	}
 
+	rc, err := s.getObject(ctx, objectKey)
+	if err != nil {
+		// cancel the context if there is an error.
+		cancel()
+		return nil, err
+	}
+	// else return a wrapped ReadCloser which cancels the context while closing the reader.
+	return util.NewReadCloserWithContextCancelFunc(rc, cancel), nil
+}
+
+func (s *GCSObjectClient) getObject(ctx context.Context, objectKey string) (rc io.ReadCloser, err error) {
 	reader, err := s.bucket.Object(objectKey).NewReader(ctx)
 
 	if err != nil {
diff --git a/storage_client.go b/storage_client.go
index 1ad88bb34bb0a..62d4eaa49ab0c 100644
--- a/storage_client.go
+++ b/storage_client.go
@@ -61,6 +61,7 @@ type ReadBatchIterator interface {
 // ObjectClient is used to store arbitrary data in Object Store (S3/GCS/Azure/...)
 type ObjectClient interface {
 	PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error
+	// NOTE: The consumer of GetObject should always call the Close method when it is done reading which otherwise could cause a resource leak.
 	GetObject(ctx context.Context, objectKey string) (io.ReadCloser, error)
 
 	// List objects with given prefix.
diff --git a/util/util.go b/util/util.go
index 64ae96a969e4a..b6af344458935 100644
--- a/util/util.go
+++ b/util/util.go
@@ -6,6 +6,8 @@ import (
 	"fmt"
 	"os"
 
+	"io"
+
 	ot "github.com/opentracing/opentracing-go"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
@@ -125,3 +127,22 @@ func EnsureDirectory(dir string) error {
 	}
 	return err
 }
+
+// ReadCloserWithContextCancelFunc helps with cancelling the context when closing a ReadCloser.
+// NOTE: The consumer of ReadCloserWithContextCancelFunc should always call the Close method when it is done reading which otherwise could cause a resource leak.
+type ReadCloserWithContextCancelFunc struct {
+	io.ReadCloser
+	cancel context.CancelFunc
+}
+
+func NewReadCloserWithContextCancelFunc(readCloser io.ReadCloser, cancel context.CancelFunc) io.ReadCloser {
+	return ReadCloserWithContextCancelFunc{
+		ReadCloser: readCloser,
+		cancel:     cancel,
+	}
+}
+
+func (r ReadCloserWithContextCancelFunc) Close() error {
+	defer r.cancel()
+	return r.ReadCloser.Close()
+}

From 1a3501db0d4fa2e3e3a767c20da26c3c52a6c8d0 Mon Sep 17 00:00:00 2001
From: Christopher Bradford <bradfordcp@gmail.com>
Date: Wed, 28 Oct 2020 03:58:36 -0400
Subject: [PATCH 609/660] Support for specifying SSL certificates for Cassandra
 client (#3384)

Updated tests
Regenerated documentation
Added CHANGELOG entry

Signed-off-by: Christopher Bradford <christopher.bradford@datastax.com>

Regenerate documentation and update certificate key file to private key file

Signed-off-by: Christopher Bradford <christopher.bradford@datastax.com>
---
 cassandra/storage_client.go            | 28 +++++++++++++++--
 cassandra/storage_client_test.go       | 42 ++++++++++++++++++++++++++
 cassandra/testdata/example.com-key.pem | 27 +++++++++++++++++
 cassandra/testdata/example.com.ca.pem  | 25 +++++++++++++++
 cassandra/testdata/example.com.pem     | 27 +++++++++++++++++
 5 files changed, 146 insertions(+), 3 deletions(-)
 create mode 100644 cassandra/testdata/example.com-key.pem
 create mode 100644 cassandra/testdata/example.com.ca.pem
 create mode 100644 cassandra/testdata/example.com.pem

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index a509c5bfb6e44..1e638f6091f0f 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -34,6 +34,8 @@ type Config struct {
 	SSL                      bool                `yaml:"SSL"`
 	HostVerification         bool                `yaml:"host_verification"`
 	CAPath                   string              `yaml:"CA_path"`
+	CertPath                 string              `yaml:"tls_cert_path"`
+	KeyPath                  string              `yaml:"tls_key_path"`
 	Auth                     bool                `yaml:"auth"`
 	Username                 string              `yaml:"username"`
 	Password                 flagext.Secret      `yaml:"password"`
@@ -62,6 +64,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
 	f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
 	f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
+	f.StringVar(&cfg.CertPath, "cassandra.tls-cert-path", "", "Path to certificate file used by TLS.")
+	f.StringVar(&cfg.KeyPath, "cassandra.tls-key-path", "", "Path to private key file used by TLS.")
 	f.BoolVar(&cfg.Auth, "cassandra.auth", false, "Enable password authentication when connecting to cassandra.")
 	f.StringVar(&cfg.Username, "cassandra.username", "", "Username to use when connecting to cassandra.")
 	f.Var(&cfg.Password, "cassandra.password", "Password to use when connecting to cassandra.")
@@ -86,6 +90,12 @@ func (cfg *Config) Validate() error {
 	if cfg.SSL && cfg.HostVerification && len(strings.Split(cfg.Addresses, ",")) != 1 {
 		return errors.Errorf("Host verification is only possible for a single host.")
 	}
+	if cfg.SSL && cfg.CertPath != "" && cfg.KeyPath == "" {
+		return errors.Errorf("TLS certificate specified, but private key configuration is missing.")
+	}
+	if cfg.SSL && cfg.KeyPath != "" && cfg.CertPath == "" {
+		return errors.Errorf("TLS private key specified, but certificate configuration is missing.")
+	}
 	return nil
 }
 
@@ -144,17 +154,29 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
 	cluster.DisableInitialHostLookup = cfg.DisableInitialHostLookup
 
 	if cfg.SSL {
+		tlsConfig := &tls.Config{}
+
+		if cfg.CertPath != "" {
+			cert, err := tls.LoadX509KeyPair(cfg.CertPath, cfg.KeyPath)
+			if err != nil {
+				return errors.Wrap(err, "Unable to load TLS certificate and private key")
+			}
+
+			tlsConfig.Certificates = []tls.Certificate{cert}
+		}
+
 		if cfg.HostVerification {
+			tlsConfig.ServerName = strings.Split(cfg.Addresses, ",")[0]
+
 			cluster.SslOpts = &gocql.SslOptions{
 				CaPath:                 cfg.CAPath,
 				EnableHostVerification: true,
-				Config: &tls.Config{
-					ServerName: strings.Split(cfg.Addresses, ",")[0],
-				},
+				Config:                 tlsConfig,
 			}
 		} else {
 			cluster.SslOpts = &gocql.SslOptions{
 				EnableHostVerification: false,
+				Config:                 tlsConfig,
 			}
 		}
 	}
diff --git a/cassandra/storage_client_test.go b/cassandra/storage_client_test.go
index 0ef25791fba0b..7fd6867fe2734 100644
--- a/cassandra/storage_client_test.go
+++ b/cassandra/storage_client_test.go
@@ -75,6 +75,48 @@ func TestConfig_setClusterConfig_authWithPasswordAndPasswordFile(t *testing.T) {
 	assert.Error(t, cfg.Validate())
 }
 
+func TestConfig_setClusterConfig_clientSSL(t *testing.T) {
+	cfg := defaultConfig()
+	cfg.SSL = true
+	cfg.CAPath = "testdata/example.com.ca.pem"
+	cfg.CertPath = "testdata/example.com.pem"
+	cfg.KeyPath = "testdata/example.com-key.pem"
+	require.NoError(t, cfg.Validate())
+
+	cqlCfg := gocql.NewCluster()
+	err := cfg.setClusterConfig(cqlCfg)
+	require.NoError(t, err)
+	assert.NotNil(t, cqlCfg.SslOpts)
+	assert.Len(t, cqlCfg.SslOpts.Certificates, 1)
+}
+
+func TestConfig_setClusterConfig_clientSSLWithOnlyCertificatePath(t *testing.T) {
+	cfg := defaultConfig()
+	cfg.SSL = true
+	cfg.CAPath = "testdata/example.com.ca.pem"
+	cfg.CertPath = "testdata/example.com.pem"
+	assert.Error(t, cfg.Validate(), "TLS certificate specified, but private key configuration is missing.")
+}
+
+func TestConfig_setClusterConfig_clientSSLWithOnlyKeyPath(t *testing.T) {
+	cfg := defaultConfig()
+	cfg.SSL = true
+	cfg.CAPath = "testdata/example.com.ca.pem"
+	cfg.KeyPath = "testdata/example.com-key.pem"
+	assert.Error(t, cfg.Validate(), "TLS private key specified, but certificate configuration is missing.")
+}
+
+func TestConfig_setClusterConfig_clientSSLWithInvalidParameters(t *testing.T) {
+	cfg := defaultConfig()
+	cfg.SSL = true
+	cfg.CAPath = "testdata/example.com.ca.pem"
+	cfg.CertPath = "testdata/example.com-key.pem"
+	cfg.KeyPath = "testdata/example.com.pem"
+
+	cluster := gocql.NewCluster()
+	assert.Error(t, cfg.setClusterConfig(cluster), "Unable to load TLS certificate and private key.")
+}
+
 func TestConfig_setClusterConfig_consistency(t *testing.T) {
 	tests := map[string]struct {
 		cfg                 Config
diff --git a/cassandra/testdata/example.com-key.pem b/cassandra/testdata/example.com-key.pem
new file mode 100644
index 0000000000000..cef0fa6cc5467
--- /dev/null
+++ b/cassandra/testdata/example.com-key.pem
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEogIBAAKCAQEAp7T84Nzd5y54a0qEFNeCXIDoz4LdfQqB2MloSl6aicKFRX7Y
+rTPDgwul9trau0dtDi7goT7GoyesRRJBsAqmUEaX9YjgRbWCiiKr47SxQJRn0Hom
+sNgCfIgCJ+Ej8no87JaxM7gklqoXrWK2KeG+NoC7jO2hrlBomkTkjgjU466kYpXM
+8z2+p3Jt3JHNJYTnOl36s/Iaw0ugmEPOJpLrsdnt2Irs5J2dmbMbeDJ5+yaE2c8B
+YSzAnLh5GQZO+5G7Ez0SXs1VJ2DALXybLuLD9pEZpAt40TK9jNwU5dfTYZ6yEeyn
+jw6qqJMcxVEYRru7x4ArOP7Uvs0qlOLDxJdD+QIDAQABAoIBAFWbj9KBLE0Mba/n
+E9FHyWXK8Aytcr6XlHzDIxeDf0N/JmS5QYX4fH7yfT+rrCgZZ9PrngLxdphmcgu4
+LAvfA9LKlltiCYnMA9zbof7Uh/69Qtkq4YE4YtyK2P7ecGkgeOUUb1RFVXgLT5bU
+YsSyVVShFhv0WaoPpbXKIRlX7MRj188laiYQnhLJ8gZeutk9A3wkaPsRp2s1ME9R
+oyUDs9OAEJOXB7wEqGLmKXqxSqzTDSoC/7wbjQut8G63/ri+EYwp5ukRP886cbep
+hVUDt8bzoratw+IFsGt+fBe9MGNbAMvva7x0yb/7gdEf505+c1h3Q19CLbPHFIJv
+hEcbhAECgYEAwkZp26SQANi7zDVoGqS7PXIWs2TL2g9Y2vO5g0MTvbno/0OPo3+e
+GOtFW4rK2k6SoK3RtdOdmobJcxKf2G4W7ncdM6X1Rgxfgqyt58qfXSL/TlS3wfH2
+vDFQDyLEofZ64vSMEKinSkv5kY/FCCtw8q8hCgiqmpwY51PFlI/52bkCgYEA3P2h
+LmRYGImkw8eiKm4eAFYXdVlxzsytCF3s2cCI3GavoGyMwxayYUkEKRA7bcup1pK+
+RdIRaaVkNcFICrX4N9zaEqSkBEQkQ+ZQN0hW9HM3XKPR7VPQEusRCbV5hMNgI3tY
+irAZXgeOzS61Rl+o3Ta3SQ2jDcCoK31xzE1s3EECgYAYYb/tWfTctlazZUyAc4Yw
+Sv5AW3keD+kF6aqxp5x1pjxwtOj1CxIrbHOS7pNQ3KWYVthH6pwQBbSIpaC8B+0G
+1poqnjxvIyRlgQh+W7aueLL0ALvjMlvV+JZkn+dvsEBx9WESwifksi5LL3D5+oG9
+Y29RFA9dQhP6DFByubMQuQKBgAyIogykik6R9/NWrj7j0fXI7DmuogLNnv67fQR4
+pAqEFG/v2Cf0cJeN8Zt2nThD9dUCq6IAIRax17YoyTI6UeKxNvkZt2e6iagENwZ7
+ptrkcf5iGDTyrPl1tZisX0EFZ717cHElPbsUiKfgf02HfWdWhByzlkzgYWleCwdA
+WO1BAoGAMJA/dYgbEachKXl9nZ9HvqAtc30ioYG0eq5Zx9iYZWqhb1vqGJXS2M2t
+Onmw/+2pnvPIM1uP66G5VG0rcVL7jdJ0pibCgw+L9IHpDwCAuVGn1Cuzcty+NiAy
+qLkQAgPOMjwL9zJYrn6LAOVchnIbpuoaGNG14BdZWeh25kozk/k=
+-----END RSA PRIVATE KEY-----
diff --git a/cassandra/testdata/example.com.ca.pem b/cassandra/testdata/example.com.ca.pem
new file mode 100644
index 0000000000000..2880d2c27de9e
--- /dev/null
+++ b/cassandra/testdata/example.com.ca.pem
@@ -0,0 +1,25 @@
+-----BEGIN CERTIFICATE-----
+MIIEMjCCAxqgAwIBAgIUDpfGq/7zYQRROCP65MxUy+LYewkwDQYJKoZIhvcNAQEL
+BQAwgbAxGDAWBgNVBAYTD0V4YW1wbGUgQ291bnRyeTEWMBQGA1UECBMNRXhhbXBs
+ZSBTdGF0ZTEZMBcGA1UEBxMQRXhhbXBsZSBMb2NhbGl0eTEdMBsGA1UEChMURXhh
+bXBsZSBPcmdhbml6YXRpb24xIjAgBgNVBAsTGUV4YW1wbGUgT3JnYW5pemF0aW9u
+IFVuaXQxHjAcBgNVBAMTFVNhbXBsZSBTZWxmLVNpZ25lZCBDQTAeFw0yMDEwMjMx
+NTUxMDBaFw0yNTEwMjIxNTUxMDBaMIGwMRgwFgYDVQQGEw9FeGFtcGxlIENvdW50
+cnkxFjAUBgNVBAgTDUV4YW1wbGUgU3RhdGUxGTAXBgNVBAcTEEV4YW1wbGUgTG9j
+YWxpdHkxHTAbBgNVBAoTFEV4YW1wbGUgT3JnYW5pemF0aW9uMSIwIAYDVQQLExlF
+eGFtcGxlIE9yZ2FuaXphdGlvbiBVbml0MR4wHAYDVQQDExVTYW1wbGUgU2VsZi1T
+aWduZWQgQ0EwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCXiANIcYCd
+AaniIG59ZEIhgT2tx4PpsjPc07sx4B8cPAhYVPlpom2lSfI9lj14i37vEBFEsuwD
+Dk7xz6MGIAsLbmw8eEAbUyMaKeCfUZDVwWWW/OQo/riCc1Vw85nKAUQ7YA4TyvpO
+ORf5UAhLx0/ZhJsoAOgEOGa+S7NZjNCwADvhOzE0j5oDCblv3+EeiB8zAYzAG1xm
+onkVsS09mfmiE2V5mA6zAn60E9Ssfx4hJbxFNAvTlv+im37uumipKEGr/gRCcnFp
+QcgamxjCbmD+XNjJ35u0/r/mXHeghvRNl+2ARl3XngKclFeNwhgm7DWJsx66bbGy
+Hv2YQb9xxTNbAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTAD
+AQH/MB0GA1UdDgQWBBSuamXDGCosE/xvsAWBVbSshGQ65DANBgkqhkiG9w0BAQsF
+AAOCAQEAd21HOCRsQENKxOsMbsYyla1lyWPjTTnn+c4IbgfcKA7lkf8ESFa7ChVq
+Q2z8mTAxblvnGy1bHaeFw1vy0hIYnV1rizsb+3nBN5oBZQtG6Rmc9iL5MhIaHprB
+bHqx/9zuCwH2jzSMcIYGUbfJcC5+W67P/zpX5rKkCqiyu81Unw5GAmcawaU6600b
+Dtx1YEWWLjwnBXXQp+4udHHCChsq5SFwJuWZ13+KNIrrD0QqcXn4lGtEWLD7/Anl
+BFGcQJpHq5x17Of1pxOqEogXTk44+cMKNDRr05mH2xc6nZqvZlUkdusl34XLC9Lo
+kIim0bc2p5dHjfAeBS7HSkXuwYzeQA==
+-----END CERTIFICATE-----
diff --git a/cassandra/testdata/example.com.pem b/cassandra/testdata/example.com.pem
new file mode 100644
index 0000000000000..6fee105b19de4
--- /dev/null
+++ b/cassandra/testdata/example.com.pem
@@ -0,0 +1,27 @@
+-----BEGIN CERTIFICATE-----
+MIIEfzCCA2egAwIBAgIUJr0Q3zi9e99mQtYS9nK2jniXnZkwDQYJKoZIhvcNAQEL
+BQAwgbAxGDAWBgNVBAYTD0V4YW1wbGUgQ291bnRyeTEWMBQGA1UECBMNRXhhbXBs
+ZSBTdGF0ZTEZMBcGA1UEBxMQRXhhbXBsZSBMb2NhbGl0eTEdMBsGA1UEChMURXhh
+bXBsZSBPcmdhbml6YXRpb24xIjAgBgNVBAsTGUV4YW1wbGUgT3JnYW5pemF0aW9u
+IFVuaXQxHjAcBgNVBAMTFVNhbXBsZSBTZWxmLVNpZ25lZCBDQTAeFw0yMDEwMjMx
+NTUyMDBaFw0yMTEwMjMxNTUyMDBaMIGmMRgwFgYDVQQGEw9FeGFtcGxlIENvdW50
+cnkxFjAUBgNVBAgTDUV4YW1wbGUgU3RhdGUxGTAXBgNVBAcTEEV4YW1wbGUgTG9j
+YWxpdHkxHTAbBgNVBAoTFEV4YW1wbGUgT3JnYW5pemF0aW9uMSIwIAYDVQQLExlF
+eGFtcGxlIE9yZ2FuaXphdGlvbiBVbml0MRQwEgYDVQQDEwtleGFtcGxlLmNvbTCC
+ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKe0/ODc3ecueGtKhBTXglyA
+6M+C3X0KgdjJaEpemonChUV+2K0zw4MLpfba2rtHbQ4u4KE+xqMnrEUSQbAKplBG
+l/WI4EW1gooiq+O0sUCUZ9B6JrDYAnyIAifhI/J6POyWsTO4JJaqF61itinhvjaA
+u4ztoa5QaJpE5I4I1OOupGKVzPM9vqdybdyRzSWE5zpd+rPyGsNLoJhDziaS67HZ
+7diK7OSdnZmzG3gyefsmhNnPAWEswJy4eRkGTvuRuxM9El7NVSdgwC18my7iw/aR
+GaQLeNEyvYzcFOXX02GeshHsp48OqqiTHMVRGEa7u8eAKzj+1L7NKpTiw8SXQ/kC
+AwEAAaOBmDCBlTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
+CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFNfuWV6Y129JoLnf12l+
+/OHgSyXEMB8GA1UdIwQYMBaAFK5qZcMYKiwT/G+wBYFVtKyEZDrkMBYGA1UdEQQP
+MA2CC2V4YW1wbGUuY29tMA0GCSqGSIb3DQEBCwUAA4IBAQBoIy1KhsjfHMwO1yAZ
+ECiyoPr7cfqPFSteAEk7CxqgdCW2ZsZJnKHdZYFhBfc7cEGTHss9gANEgOogzgEV
+ZOny0fINK3+GVuDlvm9DEB/r5sXm9zWoS5qvbtZ58lUlgOXDSlQ5gsMxOSFl7Gp5
+GAKxuGIvF8bFejQP8u1f5YBxSPgHhg+vzyHJ2vmOw4r+dxpMG5B/NDpbOxQNpNmw
+GTOETPbQh01Y/D6jI4HGZ90c6C9dP0+Tc1PScfaE3uOqa3GEToahXzLDLCW4sQFS
+PYma10nXk29DfjYtaYYFMmpZnemhZG8E61OXe7fLn5/DLt1a4lZRfoEoiGAaKqQd
+glLB
+-----END CERTIFICATE-----

From 1ddcaa5af398a0d26631d128413d68db80a36afc Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Thu, 29 Oct 2020 17:50:20 +0100
Subject: [PATCH 610/660] Created dedicated section for chunks storage in the
 doc (#3407)

* Created dedicated section to chunks storage in the doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed white noise

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed links

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed TestPurger_Restarts flakyness

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 purger/purger_test.go | 3 ---
 table_manager.go      | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/purger/purger_test.go b/purger/purger_test.go
index 752acba090b7c..fdbc536d60fad 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -359,9 +359,6 @@ func TestPurger_Restarts(t *testing.T) {
 	// load in process delete requests by calling Run
 	require.NoError(t, services.StartAndAwaitRunning(context.Background(), newPurger))
 
-	// there must be 1 pending delete request
-	require.Equal(t, float64(1), testutil.ToFloat64(newPurger.metrics.pendingDeleteRequestsCount))
-
 	defer newPurger.StopAsync()
 
 	test.Poll(t, time.Minute, 0, func() interface{} {
diff --git a/table_manager.go b/table_manager.go
index 7426f19f3213d..cd9b581b6644f 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -141,7 +141,7 @@ func (cfg *TableManagerConfig) Validate() error {
 func (cfg *TableManagerConfig) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.ThroughputUpdatesDisabled, "table-manager.throughput-updates-disabled", false, "If true, disable all changes to DB capacity")
 	f.BoolVar(&cfg.RetentionDeletesEnabled, "table-manager.retention-deletes-enabled", false, "If true, enables retention deletes of DB tables")
-	f.Var(&cfg.RetentionPeriodModel, "table-manager.retention-period", "Tables older than this retention period are deleted. Note: This setting is destructive to data!(default: 0, which disables deletion)")
+	f.Var(&cfg.RetentionPeriodModel, "table-manager.retention-period", "Tables older than this retention period are deleted. Must be either 0 (disabled) or a multiple of 24h. When enabled, be aware this setting is destructive to data!")
 	f.DurationVar(&cfg.PollInterval, "table-manager.poll-interval", 2*time.Minute, "How frequently to poll backend to learn our capacity.")
 	f.DurationVar(&cfg.CreationGracePeriod, "table-manager.periodic-table.grace-period", 10*time.Minute, "Periodic tables grace period (duration which table will be created/deleted before/after it's needed).")
 

From c06b917ca06ce5eee7495b691fe02d26c909484a Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 30 Oct 2020 09:05:41 +0100
Subject: [PATCH 611/660] Intern Prometheus MultiError to solve blocking
 circular dependency between Cortex and Thanos (#3425)

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 table_manager.go | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index cd9b581b6644f..2c462cdded4e1 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -14,7 +14,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
-	tsdberrors "github.com/prometheus/prometheus/tsdb/errors"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
 
@@ -470,7 +469,7 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 
 func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
 	numFailures := 0
-	merr := tsdberrors.MultiError{}
+	merr := util.NewMultiError()
 
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "creating table", "table", desc.Name)
@@ -487,7 +486,7 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 
 func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDesc) error {
 	numFailures := 0
-	merr := tsdberrors.MultiError{}
+	merr := util.NewMultiError()
 
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)

From 1d876f794dacb9276dc3000057aacc21874b3303 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 2 Nov 2020 08:24:58 +0100
Subject: [PATCH 612/660] Ensure grpc keepalive on all clients (#3431)

* Enforced gRPC keepalive on internal gRPC clients

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 grpc/grpc_client.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grpc/grpc_client.go b/grpc/grpc_client.go
index 7c9998f657f62..5e7b635be71b9 100644
--- a/grpc/grpc_client.go
+++ b/grpc/grpc_client.go
@@ -22,7 +22,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 func connectToGrpcServer(serverAddress string) (GrpcStoreClient, *grpc.ClientConn, error) {
 	params := keepalive.ClientParameters{
 		Time:                time.Second * 20,
-		Timeout:             time.Minute * 10,
+		Timeout:             time.Second * 10,
 		PermitWithoutStream: true,
 	}
 	param := grpc.WithKeepaliveParams(params)

From 6956ab36f1425801d131c9d769e1c0e1d2907a46 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 2 Nov 2020 11:13:14 +0100
Subject: [PATCH 613/660] Upgrade Prometheus and Thanos (#3426)

* Upgraded Prometheus and Thanos

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed temporarily interned MultiError

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fix linter

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 table_manager.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/table_manager.go b/table_manager.go
index 2c462cdded4e1..eda8a83f753fd 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -14,6 +14,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
+	tsdb_errors "github.com/prometheus/prometheus/tsdb/errors"
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
 
@@ -469,7 +470,7 @@ func (m *TableManager) partitionTables(ctx context.Context, descriptions []Table
 
 func (m *TableManager) createTables(ctx context.Context, descriptions []TableDesc) error {
 	numFailures := 0
-	merr := util.NewMultiError()
+	merr := tsdb_errors.NewMulti()
 
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "creating table", "table", desc.Name)
@@ -486,7 +487,7 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 
 func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDesc) error {
 	numFailures := 0
-	merr := util.NewMultiError()
+	merr := tsdb_errors.NewMulti()
 
 	for _, desc := range descriptions {
 		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)

From 0261243b1598303b8ee2863d1ed823b541cb9a32 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 4 Nov 2020 08:47:01 +0100
Subject: [PATCH 614/660] Added -querier.max-query-lookback and fixed
 -querier.max-query-into-future (#3452)

* Added querier max-query-lookback and fixed max-query-into-future

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Updated PR number

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed dead code

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added warning for the deprecated flag

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 chunk_store.go | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 2352938a80073..f0869ef3853af 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -8,6 +8,7 @@ import (
 	"sync"
 	"time"
 
+	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
@@ -18,6 +19,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
+	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -74,11 +76,16 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.Var(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", "Cache index entries older than this period. 0 to disable.")
-	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Limit how long back data can be queried")
+	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Deprecated: use -querier.max-query-lookback instead. Limit how long back data can be queried. This setting applies to chunks storage only.") // To be removed in Cortex 1.8.
 }
 
 // Validate validates the store config.
-func (cfg *StoreConfig) Validate() error {
+func (cfg *StoreConfig) Validate(logger log.Logger) error {
+	if cfg.MaxLookBackPeriod > 0 {
+		flagext.DeprecatedFlagsUsed.Inc()
+		level.Warn(logger).Log("msg", "running with DEPRECATED flag -store.max-look-back-period, use -querier.max-query-lookback instead.")
+	}
+
 	if err := cfg.ChunkCacheConfig.Validate(); err != nil {
 		return err
 	}

From 8680148b0605491d7a93bb19130d305bc56292f4 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Tue, 10 Nov 2020 22:10:20 +0530
Subject: [PATCH 615/660] add support for slicing of chunk in encodings (#3472)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 chunk.go                | 43 ++++--------------------
 chunk_store.go          |  5 +--
 chunk_test.go           |  2 +-
 encoding/bigchunk.go    |  4 +++
 encoding/chunk.go       | 54 +++++++++++++++++++++++++++++--
 encoding/chunk_test.go  | 72 +++++++++++++++++++++++++++++++++++++++++
 encoding/doubledelta.go |  4 +++
 encoding/varbit.go      |  4 +++
 8 files changed, 145 insertions(+), 43 deletions(-)

diff --git a/chunk.go b/chunk.go
index d52acf4bc4dee..2879a8050ae1f 100644
--- a/chunk.go
+++ b/chunk.go
@@ -20,15 +20,12 @@ import (
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 )
 
-// Errors that decode can return
 const (
-	ErrInvalidChecksum    = errs.Error("invalid chunk checksum")
-	ErrWrongMetadata      = errs.Error("wrong chunk metadata")
-	ErrMetadataLength     = errs.Error("chunk metadata wrong length")
-	ErrDataLength         = errs.Error("chunk data wrong length")
-	ErrSliceOutOfRange    = errs.Error("chunk can't be sliced out of its data range")
-	ErrSliceNoDataInRange = errs.Error("chunk has no data for given range to slice")
-	ErrSliceChunkOverflow = errs.Error("slicing should not overflow a chunk")
+	ErrInvalidChecksum = errs.Error("invalid chunk checksum")
+	ErrWrongMetadata   = errs.Error("wrong chunk metadata")
+	ErrMetadataLength  = errs.Error("chunk metadata wrong length")
+	ErrDataLength      = errs.Error("chunk data wrong length")
+	ErrSliceOutOfRange = errs.Error("chunk can't be sliced out of its data range")
 )
 
 var castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
@@ -338,39 +335,11 @@ func (c *Chunk) Slice(from, through model.Time) (*Chunk, error) {
 		return nil, ErrSliceOutOfRange
 	}
 
-	itr := c.Data.NewIterator(nil)
-	if !itr.FindAtOrAfter(from) {
-		return nil, ErrSliceNoDataInRange
-	}
-
-	pc, err := prom_chunk.NewForEncoding(c.Data.Encoding())
+	pc, err := c.Data.Rebound(from, through)
 	if err != nil {
 		return nil, err
 	}
 
-	for !itr.Value().Timestamp.After(through) {
-		oc, err := pc.Add(itr.Value())
-		if err != nil {
-			return nil, err
-		}
-
-		if oc != nil {
-			return nil, ErrSliceChunkOverflow
-		}
-		if !itr.Scan() {
-			break
-		}
-	}
-
-	err = itr.Err()
-	if err != nil {
-		return nil, err
-	}
-
-	if pc.Len() == 0 {
-		return nil, ErrSliceNoDataInRange
-	}
-
 	nc := NewChunk(c.UserID, c.Fingerprint, c.Metric, pc, from, through)
 	return &nc, nil
 }
diff --git a/chunk_store.go b/chunk_store.go
index f0869ef3853af..97670b10fa138 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -17,6 +17,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
@@ -684,7 +685,7 @@ func (c *baseStore) reboundChunk(ctx context.Context, userID, chunkID string, pa
 	var newChunks []*Chunk
 	if partiallyDeletedInterval.Start > chunk.From {
 		newChunk, err := chunk.Slice(chunk.From, partiallyDeletedInterval.Start-1)
-		if err != nil && err != ErrSliceNoDataInRange {
+		if err != nil && err != encoding.ErrSliceNoDataInRange {
 			return errors.Wrapf(err, "when slicing chunk for interval %d - %d", chunk.From, partiallyDeletedInterval.Start-1)
 		}
 
@@ -695,7 +696,7 @@ func (c *baseStore) reboundChunk(ctx context.Context, userID, chunkID string, pa
 
 	if partiallyDeletedInterval.End < chunk.Through {
 		newChunk, err := chunk.Slice(partiallyDeletedInterval.End+1, chunk.Through)
-		if err != nil && err != ErrSliceNoDataInRange {
+		if err != nil && err != encoding.ErrSliceNoDataInRange {
 			return errors.Wrapf(err, "when slicing chunk for interval %d - %d", partiallyDeletedInterval.End+1, chunk.Through)
 		}
 
diff --git a/chunk_test.go b/chunk_test.go
index d7b5570c6ab2d..e0c78549941aa 100644
--- a/chunk_test.go
+++ b/chunk_test.go
@@ -333,7 +333,7 @@ func TestChunk_Slice(t *testing.T) {
 		{
 			name:       "slice no data in range",
 			sliceRange: model.Interval{Start: chunkStartTime.Add(time.Second), End: chunkStartTime.Add(10 * time.Second)},
-			err:        ErrSliceNoDataInRange,
+			err:        encoding.ErrSliceNoDataInRange,
 		},
 		{
 			name:       "slice interval not aligned with sample intervals",
diff --git a/encoding/bigchunk.go b/encoding/bigchunk.go
index 8683ebc5a00bc..c05defedb172c 100644
--- a/encoding/bigchunk.go
+++ b/encoding/bigchunk.go
@@ -210,6 +210,10 @@ func (b *bigchunk) Slice(start, end model.Time) Chunk {
 	}
 }
 
+func (b *bigchunk) Rebound(start, end model.Time) (Chunk, error) {
+	return reboundChunk(b, start, end)
+}
+
 type writer struct {
 	io.Writer
 }
diff --git a/encoding/chunk.go b/encoding/chunk.go
index b31304714d1b7..97c95e41a7736 100644
--- a/encoding/chunk.go
+++ b/encoding/chunk.go
@@ -22,12 +22,18 @@ import (
 	"sort"
 
 	"github.com/prometheus/common/model"
+	errs "github.com/weaveworks/common/errors"
 
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 )
 
-// ChunkLen is the length of a chunk in bytes.
-const ChunkLen = 1024
+const (
+	// ChunkLen is the length of a chunk in bytes.
+	ChunkLen = 1024
+
+	ErrSliceNoDataInRange = errs.Error("chunk has no data for given range to slice")
+	ErrSliceChunkOverflow = errs.Error("slicing should not overflow a chunk")
+)
 
 var (
 	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
@@ -50,10 +56,15 @@ type Chunk interface {
 	Encoding() Encoding
 	Utilization() float64
 
-	// Slice returns a smaller chunk the includes all samples between start and end
+	// Slice returns a smaller chunk that includes all samples between start and end
 	// (inclusive).  Its may over estimate. On some encodings it is a noop.
 	Slice(start, end model.Time) Chunk
 
+	// Rebound returns a smaller chunk that includes all samples between start and end (inclusive).
+	// We do not want to change existing Slice implementations because
+	// it is built specifically for query optimization and is a noop for some of the encodings.
+	Rebound(start, end model.Time) (Chunk, error)
+
 	// Len returns the number of samples in the chunk.  Implementations may be
 	// expensive.
 	Len() int
@@ -246,3 +257,40 @@ func (it *indexAccessingChunkIterator) Batch(size int) Batch {
 func (it *indexAccessingChunkIterator) Err() error {
 	return it.acc.err()
 }
+
+func reboundChunk(c Chunk, start, end model.Time) (Chunk, error) {
+	itr := c.NewIterator(nil)
+	if !itr.FindAtOrAfter(start) {
+		return nil, ErrSliceNoDataInRange
+	}
+
+	pc, err := NewForEncoding(c.Encoding())
+	if err != nil {
+		return nil, err
+	}
+
+	for !itr.Value().Timestamp.After(end) {
+		oc, err := pc.Add(itr.Value())
+		if err != nil {
+			return nil, err
+		}
+
+		if oc != nil {
+			return nil, ErrSliceChunkOverflow
+		}
+		if !itr.Scan() {
+			break
+		}
+	}
+
+	err = itr.Err()
+	if err != nil {
+		return nil, err
+	}
+
+	if pc.Len() == 0 {
+		return nil, ErrSliceNoDataInRange
+	}
+
+	return pc, nil
+}
diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index c78e06381b010..5b06cd3e0e311 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -86,6 +86,10 @@ func TestChunk(t *testing.T) {
 			t.Run(fmt.Sprintf("testChunkBatch/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
 				testChunkBatch(t, tc.encoding, samples)
 			})
+
+			t.Run(fmt.Sprintf("testChunkRebound/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+				testChunkRebound(t, tc.encoding, samples)
+			})
 		}
 	}
 }
@@ -220,3 +224,71 @@ func testChunkBatch(t *testing.T, encoding Encoding, samples int) {
 	require.False(t, iter.Scan())
 	require.NoError(t, iter.Err())
 }
+
+func testChunkRebound(t *testing.T, encoding Encoding, samples int) {
+	for _, tc := range []struct {
+		name               string
+		sliceFrom, sliceTo model.Time
+		err                error
+	}{
+		{
+			name:      "slice first half",
+			sliceFrom: 0,
+			sliceTo:   model.Time((samples / 2) * step),
+		},
+		{
+			name:      "slice second half",
+			sliceFrom: model.Time((samples / 2) * step),
+			sliceTo:   model.Time((samples - 1) * step),
+		},
+		{
+			name:      "slice in the middle",
+			sliceFrom: model.Time(int(float64(samples)*0.25) * step),
+			sliceTo:   model.Time(int(float64(samples)*0.75) * step),
+		},
+		{
+			name:      "slice no data in range",
+			err:       ErrSliceNoDataInRange,
+			sliceFrom: model.Time((samples + 1) * step),
+			sliceTo:   model.Time(samples * 2 * step),
+		},
+		{
+			name:      "slice interval not aligned with sample intervals",
+			sliceFrom: model.Time(0 + step/2),
+			sliceTo:   model.Time(samples * step).Add(time.Duration(-step / 2)),
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			originalChunk := mkChunk(t, encoding, samples)
+
+			newChunk, err := originalChunk.Rebound(tc.sliceFrom, tc.sliceTo)
+			if tc.err != nil {
+				require.Equal(t, tc.err, err)
+				return
+			}
+			require.NoError(t, err)
+
+			chunkItr := originalChunk.NewIterator(nil)
+			chunkItr.FindAtOrAfter(tc.sliceFrom)
+
+			newChunkItr := newChunk.NewIterator(nil)
+			newChunkItr.Scan()
+
+			for {
+				require.Equal(t, chunkItr.Value(), newChunkItr.Value())
+
+				originalChunksHasMoreSamples := chunkItr.Scan()
+				newChunkHasMoreSamples := newChunkItr.Scan()
+
+				// originalChunk and newChunk both should end at same time or newChunk should end before or at slice end time
+				if !originalChunksHasMoreSamples || chunkItr.Value().Timestamp > tc.sliceTo {
+					require.False(t, newChunkHasMoreSamples)
+					break
+				}
+
+				require.True(t, newChunkHasMoreSamples)
+			}
+
+		})
+	}
+}
diff --git a/encoding/doubledelta.go b/encoding/doubledelta.go
index 683ce844eef6a..e0e43e7d63bdd 100644
--- a/encoding/doubledelta.go
+++ b/encoding/doubledelta.go
@@ -233,6 +233,10 @@ func (c *doubleDeltaEncodedChunk) Slice(_, _ model.Time) Chunk {
 	return c
 }
 
+func (c *doubleDeltaEncodedChunk) Rebound(start, end model.Time) (Chunk, error) {
+	return reboundChunk(c, start, end)
+}
+
 // Marshal implements chunk.
 func (c doubleDeltaEncodedChunk) Marshal(w io.Writer) error {
 	if len(c) > math.MaxUint16 {
diff --git a/encoding/varbit.go b/encoding/varbit.go
index a9d1c2f28771d..fe67337ecadff 100644
--- a/encoding/varbit.go
+++ b/encoding/varbit.go
@@ -287,6 +287,10 @@ func (c *varbitChunk) Slice(_, _ model.Time) Chunk {
 	return c
 }
 
+func (c *varbitChunk) Rebound(start, end model.Time) (Chunk, error) {
+	return reboundChunk(c, start, end)
+}
+
 // Marshal implements chunk.
 func (c varbitChunk) Marshal(w io.Writer) error {
 	size := c.Size()

From 58b226854c398ee05a6c4920c83156696c487607 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Wed, 18 Nov 2020 10:02:29 -0500
Subject: [PATCH 616/660] Safely check pointer in aws client before using them.
 (#3505)

It seems that some client might not returns those properties.

see  https://github.com/grafana/loki/issues/2912

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 aws/s3_storage_client.go | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 718384d79a7c2..94669df97c99d 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -317,11 +317,14 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix, delimiter string) ([]
 					commonPrefixes = append(commonPrefixes, chunk.StorageCommonPrefix(aws.StringValue(commonPrefix.Prefix)))
 				}
 
-				if !*output.IsTruncated {
+				if output.IsTruncated == nil || !*output.IsTruncated {
 					// No more results to fetch
 					break
 				}
-
+				if output.NextContinuationToken == nil {
+					// No way to continue
+					break
+				}
 				input.SetContinuationToken(*output.NextContinuationToken)
 			}
 

From 4245565fcf1e0d83de1024161b27bf276a195a76 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Wed, 18 Nov 2020 10:17:20 -0500
Subject: [PATCH 617/660] Reuse components slice when parsing chunk time range
 value. (#3501)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This uses a buffer pool for when we parse the chunk time range value.

```
❯ benchcmp before.txt after.txt
benchmark                                      old ns/op     new ns/op     delta
BenchmarkParseIndexEntries500-16               265343        232446        -12.40%
BenchmarkParseIndexEntries2500-16              1393835       1232639       -11.56%
BenchmarkParseIndexEntries10000-16             5741372       5032199       -12.35%
BenchmarkParseIndexEntries50000-16             30158888      27272835      -9.57%
BenchmarkParseIndexEntriesRegexSet500-16       79101         50394         -36.29%
BenchmarkParseIndexEntriesRegexSet2500-16      386920        246015        -36.42%
BenchmarkParseIndexEntriesRegexSet10000-16     1570068       957154        -39.04%
BenchmarkParseIndexEntriesRegexSet50000-16     7445006       4757111       -36.10%

benchmark                                      old allocs     new allocs     delta
BenchmarkParseIndexEntries500-16               1504           1004           -33.24%
BenchmarkParseIndexEntries2500-16              7504           5004           -33.32%
BenchmarkParseIndexEntries10000-16             30006          20005          -33.33%
BenchmarkParseIndexEntries50000-16             150008         100007         -33.33%
BenchmarkParseIndexEntriesRegexSet500-16       1522           1022           -32.85%
BenchmarkParseIndexEntriesRegexSet2500-16      7522           5022           -33.24%
BenchmarkParseIndexEntriesRegexSet10000-16     30022          20022          -33.31%
BenchmarkParseIndexEntriesRegexSet50000-16     150022         100022         -33.33%

benchmark                                      old bytes     new bytes     delta
BenchmarkParseIndexEntries500-16               96397         32365         -66.43%
BenchmarkParseIndexEntries2500-16              482307        162164        -66.38%
BenchmarkParseIndexEntries10000-16             1928766       648454        -66.38%
BenchmarkParseIndexEntries50000-16             9608702       3207322       -66.62%
BenchmarkParseIndexEntriesRegexSet500-16       88665         24689         -72.15%
BenchmarkParseIndexEntriesRegexSet2500-16      441471        121573        -72.46%
BenchmarkParseIndexEntriesRegexSet10000-16     1764370       484644        -72.53%
BenchmarkParseIndexEntriesRegexSet50000-16     8803429       2404042       -72.69%
```

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go      |  4 ++--
 schema_test.go      |  4 ++--
 schema_util.go      | 33 +++++++++++++++++++++++++--------
 schema_util_test.go |  2 +-
 4 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 97670b10fa138..9cb373801f1b9 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -254,7 +254,7 @@ func (c *baseStore) LabelValuesForMetricName(ctx context.Context, userID string,
 
 	var result UniqueStrings
 	for _, entry := range entries {
-		_, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		_, labelValue, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
@@ -559,7 +559,7 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
 
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
-		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+		chunkKey, labelValue, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 		if err != nil {
 			return nil, err
 		}
diff --git a/schema_test.go b/schema_test.go
index 9386b6c884858..574be2e0cc9af 100644
--- a/schema_test.go
+++ b/schema_test.go
@@ -151,7 +151,7 @@ const (
 
 // parseRangeValueType returns the type of rangeValue
 func parseRangeValueType(rangeValue []byte) (int, error) {
-	components := decodeRangeKey(rangeValue)
+	components := decodeRangeKey(rangeValue, make([][]byte, 0, 5))
 	switch {
 	case len(components) < 3:
 		return 0, fmt.Errorf("invalid range value: %x", rangeValue)
@@ -340,7 +340,7 @@ func TestSchemaRangeKey(t *testing.T) {
 					_, err := parseMetricNameRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case ChunkTimeRangeValue:
-					_, _, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
+					_, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
 					require.NoError(t, err)
 				case SeriesRangeValue:
 					_, err := parseSeriesRangeValue(entry.RangeValue, entry.Value)
diff --git a/schema_util.go b/schema_util.go
index ed13060615ea1..2bb8eecb0544e 100644
--- a/schema_util.go
+++ b/schema_util.go
@@ -8,6 +8,7 @@ import (
 	"encoding/json"
 	"strconv"
 	"strings"
+	"sync"
 
 	"fmt"
 
@@ -82,8 +83,8 @@ func rangeValuePrefix(ss ...[]byte) []byte {
 	return buildRangeValue(0, ss...)
 }
 
-func decodeRangeKey(value []byte) [][]byte {
-	components := make([][]byte, 0, 5)
+func decodeRangeKey(value []byte, components [][]byte) [][]byte {
+	components = components[:0]
 	i, j := 0, 0
 	for j < len(value) {
 		if value[j] != 0 {
@@ -134,7 +135,10 @@ func encodeTime(t uint32) []byte {
 // range values. Currently checks range value key and returns the value as the
 // metric name.
 func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValue, error) {
-	components := decodeRangeKey(rangeValue)
+	componentRef := componentsPool.Get().(*componentRef)
+	defer componentsPool.Put(componentRef)
+	components := decodeRangeKey(rangeValue, componentRef.components)
+
 	switch {
 	case len(components) < 4:
 		return "", fmt.Errorf("invalid metric name range value: %x", rangeValue)
@@ -151,7 +155,10 @@ func parseMetricNameRangeValue(rangeValue []byte, value []byte) (model.LabelValu
 // parseSeriesRangeValue returns the model.Metric stored in metric fingerprint
 // range values.
 func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error) {
-	components := decodeRangeKey(rangeValue)
+	componentRef := componentsPool.Get().(*componentRef)
+	defer componentsPool.Put(componentRef)
+	components := decodeRangeKey(rangeValue, componentRef.components)
+
 	switch {
 	case len(components) < 4:
 		return nil, fmt.Errorf("invalid metric range value: %x", rangeValue)
@@ -169,12 +176,24 @@ func parseSeriesRangeValue(rangeValue []byte, value []byte) (model.Metric, error
 	}
 }
 
+type componentRef struct {
+	components [][]byte
+}
+
+var componentsPool = sync.Pool{
+	New: func() interface{} {
+		return &componentRef{components: make([][]byte, 0, 5)}
+	},
+}
+
 // parseChunkTimeRangeValue returns the chunkID and labelValue for chunk time
 // range values.
 func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
-	chunkID string, labelValue model.LabelValue, isSeriesID bool, err error,
+	chunkID string, labelValue model.LabelValue, err error,
 ) {
-	components := decodeRangeKey(rangeValue)
+	componentRef := componentsPool.Get().(*componentRef)
+	defer componentsPool.Put(componentRef)
+	components := decodeRangeKey(rangeValue, componentRef.components)
 
 	switch {
 	case len(components) < 3:
@@ -225,13 +244,11 @@ func parseChunkTimeRangeValue(rangeValue []byte, value []byte) (
 		// v9 schema actually return series IDs
 		case seriesRangeKeyV1:
 			chunkID = string(components[0])
-			isSeriesID = true
 			return
 
 		case labelSeriesRangeKeyV1:
 			chunkID = string(components[1])
 			labelValue = model.LabelValue(value)
-			isSeriesID = true
 			return
 		}
 	}
diff --git a/schema_util_test.go b/schema_util_test.go
index ea0f5fa4faa28..07486e75caf11 100644
--- a/schema_util_test.go
+++ b/schema_util_test.go
@@ -93,7 +93,7 @@ func TestParseChunkTimeRangeValue(t *testing.T) {
 		{[]byte("a1b2c3d4\x00Y29kZQ\x002:1484661279394:1484664879394\x004\x00"),
 			"code", "2:1484661279394:1484664879394"},
 	} {
-		chunkID, labelValue, _, err := parseChunkTimeRangeValue(c.encoded, nil)
+		chunkID, labelValue, err := parseChunkTimeRangeValue(c.encoded, nil)
 		require.NoError(t, err)
 		assert.Equal(t, model.LabelValue(c.value), labelValue)
 		assert.Equal(t, c.chunkID, chunkID)

From 81ee496d1e3b6785bd999d9458c83a1c6dfa23a0 Mon Sep 17 00:00:00 2001
From: Doron Somech <somdoron@gmail.com>
Date: Wed, 18 Nov 2020 17:40:22 +0200
Subject: [PATCH 618/660] Remove dummy credentials from AWS config (#3443)

the current dummy credentials deny using EKS IAM service accounts, removing this line fallback to the AWS SDK default which is the following:

* Environment Variables
* Shared Credentials file
* Shared Configuration file (if SharedConfig is enabled)
* EC2 Instance Metadata (credentials only)
---
 aws/s3_storage_client.go | 1 -
 1 file changed, 1 deletion(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 94669df97c99d..40c51da88166b 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -137,7 +137,6 @@ func buildS3Config(cfg S3Config) (*aws.Config, []string, error) {
 	} else {
 		s3Config = &aws.Config{}
 		s3Config = s3Config.WithRegion("dummy")
-		s3Config = s3Config.WithCredentials(credentials.AnonymousCredentials)
 	}
 
 	s3Config = s3Config.WithMaxRetries(0)                          // We do our own retries, so we can monitor them

From 98c4dfc06c9b474dea265c184ac8d48df30a86ea Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Thu, 19 Nov 2020 13:53:07 -0500
Subject: [PATCH 619/660] Use formatMatcher only once. (#3517)

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk_store.go | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 9cb373801f1b9..0c212abb2fe99 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -458,7 +458,8 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 }
 
 func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, filter func([]IndexQuery) []IndexQuery) ([]string, error) {
-	log, ctx := spanlogger.New(ctx, "Store.lookupIdsByMetricNameMatcher", "metricName", metricName, "matcher", formatMatcher(matcher))
+	formattedMatcher := formatMatcher(matcher)
+	log, ctx := spanlogger.New(ctx, "Store.lookupIdsByMetricNameMatcher", "metricName", metricName, "matcher", formattedMatcher)
 	defer log.Span.Finish()
 
 	var err error
@@ -476,11 +477,11 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", formatMatcher(matcher), "queries", len(queries))
+	level.Debug(log).Log("matcher", formattedMatcher, "queries", len(queries))
 
 	if filter != nil {
 		queries = filter(queries)
-		level.Debug(log).Log("matcher", formatMatcher(matcher), "filteredQueries", len(queries))
+		level.Debug(log).Log("matcher", formattedMatcher, "filteredQueries", len(queries))
 	}
 
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
@@ -491,13 +492,13 @@ func (c *baseStore) lookupIdsByMetricNameMatcher(ctx context.Context, from, thro
 	} else if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", formatMatcher(matcher), "entries", len(entries))
+	level.Debug(log).Log("matcher", formattedMatcher, "entries", len(entries))
 
 	ids, err := c.parseIndexEntries(ctx, entries, matcher)
 	if err != nil {
 		return nil, err
 	}
-	level.Debug(log).Log("matcher", formatMatcher(matcher), "ids", len(ids))
+	level.Debug(log).Log("matcher", formattedMatcher, "ids", len(ids))
 
 	return ids, nil
 }

From b848b8bec8cec2dd8022147a52f9374b74c7e858 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 20 Nov 2020 02:41:52 -0500
Subject: [PATCH 620/660] Improve delete index query signature. (#3516)

* Improve delete index query signature.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* add a comment for future reference.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 purger/delete_requests_store.go | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index 4f8d1025f37d9..2818a061f9855 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -142,21 +142,29 @@ func (ds *DeleteStore) addDeleteRequest(ctx context.Context, userID string, crea
 
 // GetDeleteRequestsByStatus returns all delete requests for given status.
 func (ds *DeleteStore) GetDeleteRequestsByStatus(ctx context.Context, status DeleteRequestStatus) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), ValueEqual: []byte(status)}})
+	return ds.queryDeleteRequests(ctx, chunk.IndexQuery{
+		TableName:  ds.cfg.RequestsTableName,
+		HashValue:  string(deleteRequestID),
+		ValueEqual: []byte(status),
+	})
 }
 
 // GetDeleteRequestsForUserByStatus returns all delete requests for a user with given status.
 func (ds *DeleteStore) GetDeleteRequestsForUserByStatus(ctx context.Context, userID string, status DeleteRequestStatus) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userID), ValueEqual: []byte(status)},
+	return ds.queryDeleteRequests(ctx, chunk.IndexQuery{
+		TableName:        ds.cfg.RequestsTableName,
+		HashValue:        string(deleteRequestID),
+		RangeValuePrefix: []byte(userID),
+		ValueEqual:       []byte(status),
 	})
 }
 
 // GetAllDeleteRequestsForUser returns all delete requests for a user.
 func (ds *DeleteStore) GetAllDeleteRequestsForUser(ctx context.Context, userID string) ([]DeleteRequest, error) {
-	return ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userID)},
+	return ds.queryDeleteRequests(ctx, chunk.IndexQuery{
+		TableName:        ds.cfg.RequestsTableName,
+		HashValue:        string(deleteRequestID),
+		RangeValuePrefix: []byte(userID),
 	})
 }
 
@@ -180,8 +188,10 @@ func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID strin
 func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID string) (*DeleteRequest, error) {
 	userIDAndRequestID := fmt.Sprintf("%s:%s", userID, requestID)
 
-	deleteRequests, err := ds.queryDeleteRequests(ctx, []chunk.IndexQuery{
-		{TableName: ds.cfg.RequestsTableName, HashValue: string(deleteRequestID), RangeValuePrefix: []byte(userIDAndRequestID)},
+	deleteRequests, err := ds.queryDeleteRequests(ctx, chunk.IndexQuery{
+		TableName:        ds.cfg.RequestsTableName,
+		HashValue:        string(deleteRequestID),
+		RangeValuePrefix: []byte(userIDAndRequestID),
 	})
 
 	if err != nil {
@@ -210,9 +220,10 @@ func (ds *DeleteStore) GetPendingDeleteRequestsForUser(ctx context.Context, user
 	return pendingDeleteRequests, nil
 }
 
-func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery []chunk.IndexQuery) ([]DeleteRequest, error) {
+func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery chunk.IndexQuery) ([]DeleteRequest, error) {
 	deleteRequests := []DeleteRequest{}
-	err := ds.indexClient.QueryPages(ctx, deleteQuery, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
+	// No need to lock inside the callback since we run a single index query.
+	err := ds.indexClient.QueryPages(ctx, []chunk.IndexQuery{deleteQuery}, func(query chunk.IndexQuery, batch chunk.ReadBatch) (shouldContinue bool) {
 		itr := batch.Iterator()
 		for itr.Next() {
 			userID, requestID := splitUserIDAndRequestID(string(itr.RangeValue()))

From 176218443d25e1b96b4563ba90b562eab2fa7ea4 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Mon, 23 Nov 2020 12:10:10 -0500
Subject: [PATCH 621/660] Add chunk.EncodeTo to allow to pass your own buffer.
 (#3521)

* Add chunk.EncodeTo to allow to pass your own buffer.

Useful if you want to re-use memory after flushing data.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Review feedback.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 chunk.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/chunk.go b/chunk.go
index 2879a8050ae1f..b9fde2bd7f259 100644
--- a/chunk.go
+++ b/chunk.go
@@ -183,8 +183,14 @@ var writerPool = sync.Pool{
 
 // Encode writes the chunk into a buffer, and calculates the checksum.
 func (c *Chunk) Encode() error {
-	var buf bytes.Buffer
+	return c.EncodeTo(nil)
+}
 
+// EncodeTo is like Encode but you can provide your own buffer to use.
+func (c *Chunk) EncodeTo(buf *bytes.Buffer) error {
+	if buf == nil {
+		buf = bytes.NewBuffer(nil)
+	}
 	// Write 4 empty bytes first - we will come back and put the len in here.
 	metadataLenBytes := [4]byte{}
 	if _, err := buf.Write(metadataLenBytes[:]); err != nil {
@@ -194,7 +200,7 @@ func (c *Chunk) Encode() error {
 	// Encode chunk metadata into snappy-compressed buffer
 	writer := writerPool.Get().(*snappy.Writer)
 	defer writerPool.Put(writer)
-	writer.Reset(&buf)
+	writer.Reset(buf)
 	json := jsoniter.ConfigFastest
 	if err := json.NewEncoder(writer).Encode(c); err != nil {
 		return err
@@ -214,7 +220,7 @@ func (c *Chunk) Encode() error {
 	}
 
 	// And now the chunk data
-	if err := c.Data.Marshal(&buf); err != nil {
+	if err := c.Data.Marshal(buf); err != nil {
 		return err
 	}
 

From 7fe6d3ed595cfcaf0e1fd52155f94b1970d64b59 Mon Sep 17 00:00:00 2001
From: Christian Simon <simon@swine.de>
Date: Thu, 26 Nov 2020 13:57:16 +0000
Subject: [PATCH 622/660] Add tenant resolver (#3486)

* Add tenant resolver package

This implements the multi tenant resolver as described by the [proposal]
for multi tenant query-federation.

By default it behaves like before, but it's implementation can be
swapped out.

[proposal]: https://github.com/cortexproject/cortex/pull/3364

Signed-off-by: Christian Simon <simon@swine.de>

* Replace usages of `ExtractOrgID`

Use TenantID or UserID depending on which of the methods are meant to be
used.

Signed-off-by: Christian Simon <simon@swine.de>

* Replace usages of `ExtractOrgIDFromHTTPRequest`

This is replaced by ExtractTenantIDFromHTTPRequest, which makes sure
that exactly one tenant ID is set.

Signed-off-by: Christian Simon <simon@swine.de>

* Add methods to `tenant` package to use resolver directly

Signed-off-by: Christian Simon <simon@swine.de>

* Remove UserID method from Resolver interface

We need a better definition for what we are trying to achieve with
UserID before we can add it to the interface

Signed-off-by: Christian Simon <simon@swine.de>

* Update comment on the TenantID/TenantIDs

Signed-off-by: Christian Simon <simon@swine.de>

* Improve performance of NormalizeTenantIDs

- reduce allocations by reusing the input slice during de-duplication

Signed-off-by: Christian Simon <simon@swine.de>
---
 purger/request_handler.go       | 8 ++++----
 storage/caching_index_client.go | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/purger/request_handler.go b/purger/request_handler.go
index b3eb5be4014fa..0799716afad54 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -12,8 +12,8 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql/parser"
-	"github.com/weaveworks/common/user"
 
+	"github.com/cortexproject/cortex/pkg/tenant"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -54,7 +54,7 @@ func NewDeleteRequestHandler(deleteStore *DeleteStore, deleteRequestCancelPeriod
 // AddDeleteRequestHandler handles addition of new delete request
 func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
-	userID, err := user.ExtractOrgID(ctx)
+	userID, err := tenant.TenantID(ctx)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusBadRequest)
 		return
@@ -119,7 +119,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 // GetAllDeleteRequestsHandler handles get all delete requests
 func (dm *DeleteRequestHandler) GetAllDeleteRequestsHandler(w http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
-	userID, err := user.ExtractOrgID(ctx)
+	userID, err := tenant.TenantID(ctx)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusBadRequest)
 		return
@@ -141,7 +141,7 @@ func (dm *DeleteRequestHandler) GetAllDeleteRequestsHandler(w http.ResponseWrite
 // CancelDeleteRequestHandler handles delete request cancellation
 func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
-	userID, err := user.ExtractOrgID(ctx)
+	userID, err := tenant.TenantID(ctx)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusBadRequest)
 		return
diff --git a/storage/caching_index_client.go b/storage/caching_index_client.go
index 6408a5c5eab08..add31d03cdc1c 100644
--- a/storage/caching_index_client.go
+++ b/storage/caching_index_client.go
@@ -10,11 +10,11 @@ import (
 	"github.com/gogo/protobuf/proto"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
-	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/cortexproject/cortex/pkg/tenant"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -72,7 +72,7 @@ func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.Ind
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
 
-	userID, err := user.ExtractOrgID(ctx)
+	userID, err := tenant.TenantID(ctx)
 	if err != nil {
 		return err
 	}

From f5fb69459c236ba1418be59b7d30c47589c2d746 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Thu, 3 Dec 2020 09:32:28 +0100
Subject: [PATCH 623/660] Tenant deletion: API, ingester. (#3549)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Introduced API for tenant deletion.
- Ingester: don't ship blocks and close TSDB faster if tenant deletion mark exists.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/blocks_purger_api.go      | 121 +++++++++++++++++++++++++++++++
 purger/blocks_purger_api_test.go |  90 +++++++++++++++++++++++
 purger/purger.go                 |   4 +-
 3 files changed, 213 insertions(+), 2 deletions(-)
 create mode 100644 purger/blocks_purger_api.go
 create mode 100644 purger/blocks_purger_api_test.go

diff --git a/purger/blocks_purger_api.go b/purger/blocks_purger_api.go
new file mode 100644
index 0000000000000..543865739dc66
--- /dev/null
+++ b/purger/blocks_purger_api.go
@@ -0,0 +1,121 @@
+package purger
+
+import (
+	"context"
+	"net/http"
+	"strings"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/oklog/ulid"
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/thanos-io/thanos/pkg/objstore"
+
+	"github.com/cortexproject/cortex/pkg/storage/bucket"
+	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
+	"github.com/cortexproject/cortex/pkg/tenant"
+	"github.com/cortexproject/cortex/pkg/util"
+)
+
+type BlocksPurgerAPI struct {
+	bucketClient objstore.Bucket
+	logger       log.Logger
+}
+
+func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
+	bucketClient, err := createBucketClient(storageCfg, logger, reg)
+	if err != nil {
+		return nil, err
+	}
+
+	return newBlocksPurgerAPI(bucketClient, logger), nil
+}
+
+func newBlocksPurgerAPI(bkt objstore.Bucket, logger log.Logger) *BlocksPurgerAPI {
+	return &BlocksPurgerAPI{bucketClient: bkt, logger: logger}
+}
+
+func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	userID, err := tenant.TenantID(ctx)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	level.Info(api.logger).Log("msg", "tenant deletion marker created", "user", userID)
+
+	w.WriteHeader(http.StatusOK)
+}
+
+type DeleteTenantStatusResponse struct {
+	TenantID                  string `json:"tenant_id"`
+	BlocksDeleted             bool   `json:"blocks_deleted"`
+	RuleGroupsDeleted         bool   `json:"rule_groups_deleted"`
+	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted"`
+}
+
+func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {
+	ctx := r.Context()
+	userID, err := tenant.TenantID(ctx)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusBadRequest)
+		return
+	}
+
+	result := DeleteTenantStatusResponse{}
+	result.TenantID = userID
+	result.BlocksDeleted, err = api.checkBlocksForUser(ctx, userID)
+
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
+
+	util.WriteJSONResponse(w, result)
+}
+
+func (api *BlocksPurgerAPI) checkBlocksForUser(ctx context.Context, userID string) (bool, error) {
+	var errBlockFound = errors.New("block found")
+
+	userBucket := bucket.NewUserBucketClient(userID, api.bucketClient)
+	err := userBucket.Iter(ctx, "", func(s string) error {
+		s = strings.TrimSuffix(s, "/")
+
+		_, err := ulid.Parse(s)
+		if err != nil {
+			// not block, keep looking
+			return nil
+		}
+
+		// Used as shortcut to stop iteration.
+		return errBlockFound
+	})
+
+	if errors.Is(err, errBlockFound) {
+		return false, nil
+	}
+
+	if err != nil {
+		return false, err
+	}
+
+	// No blocks found, all good.
+	return true, nil
+}
+
+func createBucketClient(cfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, reg prometheus.Registerer) (objstore.Bucket, error) {
+	bucketClient, err := bucket.NewClient(context.Background(), cfg.Bucket, "purger", logger, reg)
+	if err != nil {
+		return nil, errors.Wrap(err, "create bucket client")
+	}
+
+	return bucketClient, nil
+}
diff --git a/purger/blocks_purger_api_test.go b/purger/blocks_purger_api_test.go
new file mode 100644
index 0000000000000..8de16431bf834
--- /dev/null
+++ b/purger/blocks_purger_api_test.go
@@ -0,0 +1,90 @@
+package purger
+
+import (
+	"bytes"
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"path"
+	"testing"
+
+	"github.com/go-kit/kit/log"
+	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/thanos/pkg/objstore"
+	"github.com/weaveworks/common/user"
+
+	"github.com/cortexproject/cortex/pkg/storage/tsdb"
+)
+
+func TestDeleteTenant(t *testing.T) {
+	bkt := objstore.NewInMemBucket()
+	api := newBlocksPurgerAPI(bkt, log.NewNopLogger())
+
+	{
+		resp := httptest.NewRecorder()
+		api.DeleteTenant(resp, &http.Request{})
+		require.Equal(t, http.StatusBadRequest, resp.Code)
+	}
+
+	{
+		ctx := context.Background()
+		ctx = user.InjectOrgID(ctx, "fake")
+
+		req := &http.Request{}
+		resp := httptest.NewRecorder()
+		api.DeleteTenant(resp, req.WithContext(ctx))
+
+		require.Equal(t, http.StatusOK, resp.Code)
+		objs := bkt.Objects()
+		require.NotNil(t, objs[path.Join("fake", tsdb.TenantDeletionMarkPath)])
+	}
+}
+
+func TestDeleteTenantStatus(t *testing.T) {
+	const username = "user"
+
+	for name, tc := range map[string]struct {
+		objects               map[string][]byte
+		expectedBlocksDeleted bool
+	}{
+		"empty": {
+			objects:               nil,
+			expectedBlocksDeleted: true,
+		},
+
+		"no user objects": {
+			objects: map[string][]byte{
+				"different-user/01EQK4QKFHVSZYVJ908Y7HH9E0/meta.json": []byte("data"),
+			},
+			expectedBlocksDeleted: true,
+		},
+
+		"non-block files": {
+			objects: map[string][]byte{
+				"user/deletion-mark.json": []byte("data"),
+			},
+			expectedBlocksDeleted: true,
+		},
+
+		"block files": {
+			objects: map[string][]byte{
+				"user/01EQK4QKFHVSZYVJ908Y7HH9E0/meta.json": []byte("data"),
+			},
+			expectedBlocksDeleted: false,
+		},
+	} {
+		t.Run(name, func(t *testing.T) {
+			bkt := objstore.NewInMemBucket()
+			// "upload" objects
+			for objName, data := range tc.objects {
+				require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
+			}
+
+			api := newBlocksPurgerAPI(bkt, log.NewNopLogger())
+
+			res, err := api.checkBlocksForUser(context.Background(), username)
+			require.NoError(t, err)
+			require.Equal(t, tc.expectedBlocksDeleted, res)
+		})
+	}
+}
diff --git a/purger/purger.go b/purger/purger.go
index 8f419e1f59f87..5e7131a587d4d 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -85,7 +85,7 @@ type deleteRequestWithLogger struct {
 	logger log.Logger // logger is initialized with userID and requestID to add context to every log generated using this
 }
 
-// Config holds config for Purger
+// Config holds config for chunks Purger
 type Config struct {
 	Enable                    bool          `yaml:"enable"`
 	NumWorkers                int           `yaml:"num_workers"`
@@ -108,7 +108,7 @@ type workerJob struct {
 	logger          log.Logger
 }
 
-// Purger does the purging of data which is requested to be deleted
+// Purger does the purging of data which is requested to be deleted. Purger only works for chunks.
 type Purger struct {
 	services.Service
 

From 36bd0725a91a227492c95c7e6254f1e852b6c6dd Mon Sep 17 00:00:00 2001
From: Christian Simon <simon@swine.de>
Date: Thu, 3 Dec 2020 16:21:48 +0000
Subject: [PATCH 624/660] Add signature v2 support for S3 chunks client (#3560)

Signed-off-by: Christian Simon <simon@swine.de>
---
 aws/dynamodb_storage_client.go |  8 ++++
 aws/s3_storage_client.go       | 67 ++++++++++++++++++++++++++++++----
 storage/factory.go             |  3 ++
 3 files changed, 70 insertions(+), 8 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 345a4feed1edf..6b47005052650 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -87,6 +87,14 @@ func (cfg *StorageConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.S3Config.RegisterFlags(f)
 }
 
+// Validate config and returns error on failure
+func (cfg *StorageConfig) Validate() error {
+	if err := cfg.S3Config.Validate(); err != nil {
+		return errors.Wrap(err, "invalid S3 Storage config")
+	}
+	return nil
+}
+
 type dynamoDBStorageClient struct {
 	cfg       DynamoDBConfig
 	schemaCfg chunk.SchemaConfig
diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 40c51da88166b..db4bafcf38413 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"crypto/tls"
 	"flag"
+	"fmt"
 	"hash/fnv"
 	"io"
 	"net"
@@ -14,18 +15,32 @@ import (
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/awserr"
 	"github.com/aws/aws-sdk-go/aws/credentials"
+	"github.com/aws/aws-sdk-go/aws/request"
 	"github.com/aws/aws-sdk-go/aws/session"
+	v4 "github.com/aws/aws-sdk-go/aws/signer/v4"
 	"github.com/aws/aws-sdk-go/service/s3"
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
+	"github.com/minio/minio-go/v7/pkg/signer"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
 
+const (
+	SignatureVersionV4 = "v4"
+	SignatureVersionV2 = "v2"
+)
+
+var (
+	supportedSignatureVersions     = []string{SignatureVersionV4, SignatureVersionV2}
+	errUnsupportedSignatureVersion = errors.New("unsupported signature version")
+)
+
 var (
 	s3RequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
 		Namespace: "cortex",
@@ -48,14 +63,15 @@ type S3Config struct {
 	S3               flagext.URLValue
 	S3ForcePathStyle bool
 
-	BucketNames     string
-	Endpoint        string     `yaml:"endpoint"`
-	Region          string     `yaml:"region"`
-	AccessKeyID     string     `yaml:"access_key_id"`
-	SecretAccessKey string     `yaml:"secret_access_key"`
-	Insecure        bool       `yaml:"insecure"`
-	SSEEncryption   bool       `yaml:"sse_encryption"`
-	HTTPConfig      HTTPConfig `yaml:"http_config"`
+	BucketNames      string
+	Endpoint         string     `yaml:"endpoint"`
+	Region           string     `yaml:"region"`
+	AccessKeyID      string     `yaml:"access_key_id"`
+	SecretAccessKey  string     `yaml:"secret_access_key"`
+	Insecure         bool       `yaml:"insecure"`
+	SSEEncryption    bool       `yaml:"sse_encryption"`
+	HTTPConfig       HTTPConfig `yaml:"http_config"`
+	SignatureVersion string     `yaml:"signature_version"`
 
 	Inject InjectRequestMiddleware `yaml:"-"`
 }
@@ -89,6 +105,15 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	f.DurationVar(&cfg.HTTPConfig.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The maximum amount of time an idle connection will be held open.")
 	f.DurationVar(&cfg.HTTPConfig.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 0, "If non-zero, specifies the amount of time to wait for a server's response headers after fully writing the request.")
 	f.BoolVar(&cfg.HTTPConfig.InsecureSkipVerify, prefix+"s3.http.insecure-skip-verify", false, "Set to false to skip verifying the certificate chain and hostname.")
+	f.StringVar(&cfg.SignatureVersion, prefix+"s3.signature-version", SignatureVersionV4, fmt.Sprintf("The signature version to use for authenticating against S3. Supported values are: %s.", strings.Join(supportedSignatureVersions, ", ")))
+}
+
+// Validate config and returns error on failure
+func (cfg *S3Config) Validate() error {
+	if !util.StringsContain(supportedSignatureVersions, cfg.SignatureVersion) {
+		return errUnsupportedSignatureVersion
+	}
+	return nil
 }
 
 type S3ObjectClient struct {
@@ -111,6 +136,10 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 
 	s3Client := s3.New(sess)
 
+	if cfg.SignatureVersion == SignatureVersionV2 {
+		s3Client.Handlers.Sign.Swap(v4.SignRequestHandler.Name, v2SignRequestHandler(cfg))
+	}
+
 	var sseEncryption *string
 	if cfg.SSEEncryption {
 		sseEncryption = aws.String("AES256")
@@ -124,6 +153,28 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 	return &client, nil
 }
 
+func v2SignRequestHandler(cfg S3Config) request.NamedHandler {
+	return request.NamedHandler{
+		Name: "v2.SignRequestHandler",
+		Fn: func(req *request.Request) {
+			credentials, err := req.Config.Credentials.GetWithContext(req.Context())
+			if err != nil {
+				if err != nil {
+					req.Error = err
+					return
+				}
+			}
+
+			req.HTTPRequest = signer.SignV2(
+				*req.HTTPRequest,
+				credentials.AccessKeyID,
+				credentials.SecretAccessKey,
+				!cfg.S3ForcePathStyle,
+			)
+		},
+	}
+}
+
 func buildS3Config(cfg S3Config) (*aws.Config, []string, error) {
 	var s3Config *aws.Config
 	var err error
diff --git a/storage/factory.go b/storage/factory.go
index 704ca5f29ef0f..6097b434b1033 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -117,6 +117,9 @@ func (cfg *Config) Validate() error {
 	if err := cfg.AzureStorageConfig.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Azure Storage config")
 	}
+	if err := cfg.AWSStorageConfig.Validate(); err != nil {
+		return errors.Wrap(err, "invalid AWS Storage config")
+	}
 	return nil
 }
 

From 8689a5710c9c62814c73d7fe94283dcec23f4bce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 4 Jan 2021 16:20:33 +0100
Subject: [PATCH 625/660] Perform final tenant cleanup after last block is
 deleted. (#3613)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/blocks_purger_api.go | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/purger/blocks_purger_api.go b/purger/blocks_purger_api.go
index 543865739dc66..930eb24c4e7a8 100644
--- a/purger/blocks_purger_api.go
+++ b/purger/blocks_purger_api.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"net/http"
 	"strings"
+	"time"
 
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
@@ -44,7 +45,7 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID)
+	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID, cortex_tsdb.NewTenantDeletionMark(time.Now()))
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -58,8 +59,8 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request)
 type DeleteTenantStatusResponse struct {
 	TenantID                  string `json:"tenant_id"`
 	BlocksDeleted             bool   `json:"blocks_deleted"`
-	RuleGroupsDeleted         bool   `json:"rule_groups_deleted"`
-	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted"`
+	RuleGroupsDeleted         bool   `json:"rule_groups_deleted,omitempty"`          // Not yet supported.
+	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted,omitempty"` // Not yet supported.
 }
 
 func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {

From 887bebe11b125146f50b25c376895c6acfbe30e5 Mon Sep 17 00:00:00 2001
From: Christian Simon <simon@swine.de>
Date: Wed, 6 Jan 2021 15:17:43 +0000
Subject: [PATCH 626/660] Implement result cache for tenant query federation
 (#3640)

Signed-off-by: Christian Simon <simon@swine.de>
---
 composite_store.go        |  8 ++---
 purger/tombstones.go      | 61 +++++++++++++++++++++++++++++---
 purger/tombstones_test.go | 74 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 133 insertions(+), 10 deletions(-)

diff --git a/composite_store.go b/composite_store.go
index a3c5a22b20efa..d3c79013bbfc4 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -19,7 +19,7 @@ type StoreLimits interface {
 }
 
 type CacheGenNumLoader interface {
-	GetStoreCacheGenNumber(userID string) string
+	GetStoreCacheGenNumber(tenantIDs []string) string
 }
 
 // Store for chunks.
@@ -217,7 +217,7 @@ func (c compositeStore) forStores(ctx context.Context, userID string, from, thro
 		return nil
 	}
 
-	ctx = c.injectCacheGen(ctx, userID)
+	ctx = c.injectCacheGen(ctx, []string{userID})
 
 	// first, find the schema with the highest start _before or at_ from
 	i := sort.Search(len(c.stores), func(i int) bool {
@@ -262,10 +262,10 @@ func (c compositeStore) forStores(ctx context.Context, userID string, from, thro
 	return nil
 }
 
-func (c compositeStore) injectCacheGen(ctx context.Context, userID string) context.Context {
+func (c compositeStore) injectCacheGen(ctx context.Context, tenantIDs []string) context.Context {
 	if c.cacheGenNumLoader == nil {
 		return ctx
 	}
 
-	return cache.InjectCacheGenNumber(ctx, c.cacheGenNumLoader.GetStoreCacheGenNumber(userID))
+	return cache.InjectCacheGenNumber(ctx, c.cacheGenNumLoader.GetStoreCacheGenNumber(tenantIDs))
 }
diff --git a/purger/tombstones.go b/purger/tombstones.go
index 1f1ad1b5bec9f..d04788d2f2867 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -3,6 +3,7 @@ package purger
 import (
 	"context"
 	"sort"
+	"strconv"
 	"sync"
 	"time"
 
@@ -246,14 +247,64 @@ func (tl *TombstonesLoader) loadPendingTombstones(userID string) error {
 }
 
 // GetStoreCacheGenNumber returns store cache gen number for a user
-func (tl *TombstonesLoader) GetStoreCacheGenNumber(userID string) string {
-	return tl.getCacheGenNumbers(userID).store
-
+func (tl *TombstonesLoader) GetStoreCacheGenNumber(tenantIDs []string) string {
+	return tl.getCacheGenNumbersPerTenants(tenantIDs).store
 }
 
 // GetResultsCacheGenNumber returns results cache gen number for a user
-func (tl *TombstonesLoader) GetResultsCacheGenNumber(userID string) string {
-	return tl.getCacheGenNumbers(userID).results
+func (tl *TombstonesLoader) GetResultsCacheGenNumber(tenantIDs []string) string {
+	return tl.getCacheGenNumbersPerTenants(tenantIDs).results
+}
+
+func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *cacheGenNumbers {
+	var result cacheGenNumbers
+
+	if len(tenantIDs) == 0 {
+		return &result
+	}
+
+	// keep the maximum value that's currently in result
+	var maxResults, maxStore int
+
+	for pos, tenantID := range tenantIDs {
+		numbers := tl.getCacheGenNumbers(tenantID)
+
+		// handle first tenant in the list
+		if pos == 0 {
+			// short cut if there is only one tenant
+			if len(tenantIDs) == 1 {
+				return numbers
+			}
+
+			// set first tenant string whatever happens next
+			result.results = numbers.results
+			result.store = numbers.store
+		}
+
+		// set results number string if it's higher than the ones before
+		if numbers.results != "" {
+			results, err := strconv.Atoi(numbers.results)
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error parsing resultsCacheGenNumber", "tenant", tenantID, "err", err)
+			} else if maxResults < results {
+				maxResults = results
+				result.results = numbers.results
+			}
+		}
+
+		// set store number string if it's higher than the ones before
+		if numbers.store != "" {
+			store, err := strconv.Atoi(numbers.store)
+			if err != nil {
+				level.Error(util.Logger).Log("msg", "error parsing storeCacheGenNumber", "tenant", tenantID, "err", err)
+			} else if maxStore < store {
+				maxStore = store
+				result.store = numbers.store
+			}
+		}
+	}
+
+	return &result
 }
 
 func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers {
diff --git a/purger/tombstones_test.go b/purger/tombstones_test.go
index 5f17d75dafd9f..e04d6ef02fc39 100644
--- a/purger/tombstones_test.go
+++ b/purger/tombstones_test.go
@@ -8,6 +8,7 @@ import (
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/promql/parser"
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 
@@ -133,6 +134,70 @@ func TestTombstonesLoader(t *testing.T) {
 	}
 }
 
+func TestTombstonesLoader_GetCacheGenNumber(t *testing.T) {
+	s := &store{
+		numbers: map[string]*cacheGenNumbers{
+			"tenant-a": {
+				results: "1000",
+				store:   "2050",
+			},
+			"tenant-b": {
+				results: "1050",
+				store:   "2000",
+			},
+			"tenant-c": {
+				results: "",
+				store:   "",
+			},
+			"tenant-d": {
+				results: "results-c",
+				store:   "store-c",
+			},
+		},
+	}
+	tombstonesLoader := NewTombstonesLoader(s, nil)
+
+	for _, tc := range []struct {
+		name                          string
+		expectedResultsCacheGenNumber string
+		expectedStoreCacheGenNumber   string
+		tenantIDs                     []string
+	}{
+		{
+			name:                          "single tenant with numeric values",
+			tenantIDs:                     []string{"tenant-a"},
+			expectedResultsCacheGenNumber: "1000",
+			expectedStoreCacheGenNumber:   "2050",
+		},
+		{
+			name:                          "single tenant with non-numeric values",
+			tenantIDs:                     []string{"tenant-d"},
+			expectedResultsCacheGenNumber: "results-c",
+			expectedStoreCacheGenNumber:   "store-c",
+		},
+		{
+			name:                          "multiple tenants with numeric values",
+			tenantIDs:                     []string{"tenant-a", "tenant-b"},
+			expectedResultsCacheGenNumber: "1050",
+			expectedStoreCacheGenNumber:   "2050",
+		},
+		{
+			name:                          "multiple tenants with numeric and non-numeric values",
+			tenantIDs:                     []string{"tenant-d", "tenant-c", "tenant-b", "tenant-a"},
+			expectedResultsCacheGenNumber: "1050",
+			expectedStoreCacheGenNumber:   "2050",
+		},
+		{
+			name: "no tenants", // not really an expected call, edge case check to avoid any panics
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			assert.Equal(t, tc.expectedResultsCacheGenNumber, tombstonesLoader.GetResultsCacheGenNumber(tc.tenantIDs))
+			assert.Equal(t, tc.expectedStoreCacheGenNumber, tombstonesLoader.GetStoreCacheGenNumber(tc.tenantIDs))
+		})
+	}
+}
+
 func TestTombstonesReloadDoesntDeadlockOnFailure(t *testing.T) {
 	s := &store{}
 	tombstonesLoader := NewTombstonesLoader(s, nil)
@@ -146,10 +211,17 @@ func TestTombstonesReloadDoesntDeadlockOnFailure(t *testing.T) {
 }
 
 type store struct {
-	err error
+	numbers map[string]*cacheGenNumbers
+	err     error
 }
 
 func (f *store) getCacheGenerationNumbers(ctx context.Context, user string) (*cacheGenNumbers, error) {
+	if f.numbers != nil {
+		number, ok := f.numbers[user]
+		if ok {
+			return number, nil
+		}
+	}
 	return &cacheGenNumbers{}, f.err
 }
 

From b9229faf2792b45759de0ea9a752e27ddaa50987 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Thu, 7 Jan 2021 13:38:18 +0530
Subject: [PATCH 627/660] poll for metric update to avoid race causing a flaky
 test (#3657)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 purger/purger_test.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/purger/purger_test.go b/purger/purger_test.go
index fdbc536d60fad..33b158d8c3595 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -435,9 +435,15 @@ func TestPurger_Metrics(t *testing.T) {
 		return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal)
 	})
 
-	// there must be 0 pending delete requests so the age for oldest pending must be 0
-	require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
-	require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
+	// wait until oldest pending request age becomes 0
+	test.Poll(t, 2*time.Second, float64(0), func() interface{} {
+		return testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds)
+	})
+
+	// wait until pending delete requests count becomes 0
+	test.Poll(t, 2*time.Second, float64(0), func() interface{} {
+		return testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount)
+	})
 }
 
 func TestPurger_retryFailedRequests(t *testing.T) {

From 8a534ca4136eb3f84a3c62a02c2d1564a0961f2e Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 8 Jan 2021 15:42:08 +0100
Subject: [PATCH 628/660] Upgraded Thanos (#3661)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Upgraded Thanos

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Update CHANGELOG.md

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <pstibrany@gmail.com>

* Upgraded Thanos to fix race condition

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Peter Štibraný <pstibrany@gmail.com>
---
 openstack/swift_object_client.go | 40 +++++++++++---------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/openstack/swift_object_client.go b/openstack/swift_object_client.go
index 7cab474833087..c5b9672710395 100644
--- a/openstack/swift_object_client.go
+++ b/openstack/swift_object_client.go
@@ -9,9 +9,9 @@ import (
 	"io/ioutil"
 
 	"github.com/ncw/swift"
-	thanos "github.com/thanos-io/thanos/pkg/objstore/swift"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	cortex_swift "github.com/cortexproject/cortex/pkg/storage/bucket/swift"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -22,7 +22,7 @@ type SwiftObjectClient struct {
 
 // SwiftConfig is config for the Swift Chunk Client.
 type SwiftConfig struct {
-	thanos.SwiftConfig `yaml:",inline"`
+	cortex_swift.Config `yaml:",inline"`
 }
 
 // RegisterFlags registers flags.
@@ -37,20 +37,7 @@ func (cfg *SwiftConfig) Validate() error {
 
 // RegisterFlagsWithPrefix registers flags with prefix.
 func (cfg *SwiftConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	f.StringVar(&cfg.ContainerName, prefix+"swift.container-name", "cortex", "Name of the Swift container to put chunks in.")
-	f.StringVar(&cfg.DomainName, prefix+"swift.domain-name", "", "Openstack user's domain name.")
-	f.StringVar(&cfg.DomainId, prefix+"swift.domain-id", "", "Openstack user's domain id.")
-	f.StringVar(&cfg.UserDomainName, prefix+"swift.user-domain-name", "", "Openstack user's domain name.")
-	f.StringVar(&cfg.UserDomainID, prefix+"swift.user-domain-id", "", "Openstack user's domain id.")
-	f.StringVar(&cfg.Username, prefix+"swift.username", "", "Openstack username for the api.")
-	f.StringVar(&cfg.UserId, prefix+"swift.user-id", "", "Openstack userid for the api.")
-	f.StringVar(&cfg.Password, prefix+"swift.password", "", "Openstack api key.")
-	f.StringVar(&cfg.AuthUrl, prefix+"swift.auth-url", "", "Openstack authentication URL.")
-	f.StringVar(&cfg.RegionName, prefix+"swift.region-name", "", "Openstack Region to use eg LON, ORD - default is use first region (v2,v3 auth only)")
-	f.StringVar(&cfg.ProjectName, prefix+"swift.project-name", "", "Openstack project name (v2,v3 auth only).")
-	f.StringVar(&cfg.ProjectID, prefix+"swift.project-id", "", "Openstack project id (v2,v3 auth only).")
-	f.StringVar(&cfg.ProjectDomainName, prefix+"swift.project-domain-name", "", "Name of the project's domain (v3 auth only), only needed if it differs from the user domain.")
-	f.StringVar(&cfg.ProjectDomainID, prefix+"swift.project-domain-id", "", "Id of the project's domain (v3 auth only), only needed if it differs the from user domain.")
+	cfg.Config.RegisterFlagsWithPrefix(prefix, f)
 }
 
 // NewSwiftObjectClient makes a new chunk.Client that writes chunks to OpenStack Swift.
@@ -59,20 +46,21 @@ func NewSwiftObjectClient(cfg SwiftConfig) (*SwiftObjectClient, error) {
 
 	// Create a connection
 	c := &swift.Connection{
-		AuthUrl:  cfg.AuthUrl,
-		ApiKey:   cfg.Password,
-		UserName: cfg.Username,
-		UserId:   cfg.UserId,
-
+		AuthVersion:    cfg.AuthVersion,
+		AuthUrl:        cfg.AuthURL,
+		ApiKey:         cfg.Password,
+		UserName:       cfg.Username,
+		UserId:         cfg.UserID,
+		Retries:        cfg.MaxRetries,
+		ConnectTimeout: cfg.ConnectTimeout,
+		Timeout:        cfg.RequestTimeout,
 		TenantId:       cfg.ProjectID,
 		Tenant:         cfg.ProjectName,
 		TenantDomain:   cfg.ProjectDomainName,
 		TenantDomainId: cfg.ProjectDomainID,
-
-		Domain:   cfg.DomainName,
-		DomainId: cfg.DomainId,
-
-		Region: cfg.RegionName,
+		Domain:         cfg.DomainName,
+		DomainId:       cfg.DomainID,
+		Region:         cfg.RegionName,
 	}
 
 	switch {

From 7a5933b4ccb38b0432741b886c74cb166e92a592 Mon Sep 17 00:00:00 2001
From: Christian Simon <simon@swine.de>
Date: Tue, 12 Jan 2021 14:51:57 +0000
Subject: [PATCH 629/660] Log error tenant ID using the user label (#3680)

This makes the log messages consistent with others in Cortex.

Signed-off-by: Christian Simon <simon@swine.de>
---
 purger/tombstones.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/purger/tombstones.go b/purger/tombstones.go
index d04788d2f2867..73348bf40ada2 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -285,7 +285,7 @@ func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *ca
 		if numbers.results != "" {
 			results, err := strconv.Atoi(numbers.results)
 			if err != nil {
-				level.Error(util.Logger).Log("msg", "error parsing resultsCacheGenNumber", "tenant", tenantID, "err", err)
+				level.Error(util.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err)
 			} else if maxResults < results {
 				maxResults = results
 				result.results = numbers.results
@@ -296,7 +296,7 @@ func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *ca
 		if numbers.store != "" {
 			store, err := strconv.Atoi(numbers.store)
 			if err != nil {
-				level.Error(util.Logger).Log("msg", "error parsing storeCacheGenNumber", "tenant", tenantID, "err", err)
+				level.Error(util.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err)
 			} else if maxStore < store {
 				maxStore = store
 				result.store = numbers.store

From ba96dcbdc3b6d0312d6c978b287ed76966467a6e Mon Sep 17 00:00:00 2001
From: Kevin Minehart <kmineh0151@gmail.com>
Date: Tue, 26 Jan 2021 11:47:33 -0600
Subject: [PATCH 630/660] Move some utility functions out of `util` and into
 their own packages (#3734)

* separate out some util packages

Signed-off-by: Kevin Minehart <kmineh0151@gmail.com>

* use goimports with -local

Signed-off-by: Kevin Minehart <kmineh0151@gmail.com>

* refactor: reduce the number of transitive imports when using math and logging functions

Signed-off-by: Kevin Minehart <kmineh0151@gmail.com>

* add deprecation warning to pkg/log/log.go

Signed-off-by: Kevin Minehart <kmineh0151@gmail.com>
---
 aws/dynamodb_storage_client.go   | 14 ++++++++------
 aws/dynamodb_table_client.go     | 15 ++++++++-------
 azure/blob_storage_client.go     |  3 ++-
 cache/fifo_cache.go              |  4 ++--
 cache/memcached.go               |  4 ++--
 cache/memcached_client.go        |  4 ++--
 cache/redis_cache.go             |  4 ++--
 chunk_store.go                   |  3 ++-
 gcp/bigtable_index_client.go     |  4 ++--
 gcp/bigtable_object_client.go    |  4 ++--
 inmemory_storage_client.go       |  6 +++---
 openstack/swift_object_client.go |  4 ++--
 purger/purger.go                 | 18 +++++++++---------
 schema_config.go                 | 15 ++++++++-------
 util/util.go                     |  4 ++--
 15 files changed, 56 insertions(+), 50 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 6b47005052650..84564fb4f0d64 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -31,6 +31,8 @@ import (
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/log"
+	"github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -212,8 +214,8 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 				continue
 			} else if ok && awsErr.Code() == validationException {
 				// this write will never work, so the only option is to drop the offending items and continue.
-				level.Warn(util.Logger).Log("msg", "Data lost while flushing to DynamoDB", "err", awsErr)
-				level.Debug(util.Logger).Log("msg", "Dropped request details", "requests", requests)
+				level.Warn(log.Logger).Log("msg", "Data lost while flushing to DynamoDB", "err", awsErr)
+				level.Debug(log.Logger).Log("msg", "Dropped request details", "requests", requests)
 				util.Event().Log("msg", "ValidationException", "requests", requests)
 				// recording the drop counter separately from recordDynamoError(), as the error code alone may not provide enough context
 				// to determine if a request was dropped (or not)
@@ -684,11 +686,11 @@ func (b dynamoDBWriteBatch) TakeReqs(from dynamoDBWriteBatch, max int) {
 	outLen, inLen := b.Len(), from.Len()
 	toFill := inLen
 	if max > 0 {
-		toFill = util.Min(inLen, max-outLen)
+		toFill = math.Min(inLen, max-outLen)
 	}
 	for toFill > 0 {
 		for tableName, fromReqs := range from {
-			taken := util.Min(len(fromReqs), toFill)
+			taken := math.Min(len(fromReqs), toFill)
 			if taken > 0 {
 				b[tableName] = append(b[tableName], fromReqs[:taken]...)
 				from[tableName] = fromReqs[taken:]
@@ -731,11 +733,11 @@ func (b dynamoDBReadRequest) TakeReqs(from dynamoDBReadRequest, max int) {
 	outLen, inLen := b.Len(), from.Len()
 	toFill := inLen
 	if max > 0 {
-		toFill = util.Min(inLen, max-outLen)
+		toFill = math.Min(inLen, max-outLen)
 	}
 	for toFill > 0 {
 		for tableName, fromReqs := range from {
-			taken := util.Min(len(fromReqs.Keys), toFill)
+			taken := math.Min(len(fromReqs.Keys), toFill)
 			if taken > 0 {
 				if _, ok := b[tableName]; !ok {
 					b[tableName] = &dynamodb.KeysAndAttributes{
diff --git a/aws/dynamodb_table_client.go b/aws/dynamodb_table_client.go
index de1798b7eb80c..fa7b5f5fad77f 100644
--- a/aws/dynamodb_table_client.go
+++ b/aws/dynamodb_table_client.go
@@ -16,6 +16,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // Pluggable auto-scaler implementation
@@ -83,7 +84,7 @@ func (d callManager) backoffAndRetry(ctx context.Context, fn func(context.Contex
 	for backoff.Ongoing() {
 		if err := fn(ctx); err != nil {
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "ThrottlingException" {
-				level.Warn(util.WithContext(ctx, util.Logger)).Log("msg", "got error, backing off and retrying", "err", err, "retry", backoff.NumRetries())
+				level.Warn(log.WithContext(ctx, log.Logger)).Log("msg", "got error, backing off and retrying", "err", err, "retry", backoff.NumRetries())
 				backoff.Wait()
 				continue
 			} else {
@@ -291,7 +292,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 			return err
 		}
 	}
-	level.Debug(util.Logger).Log("msg", "Updating Table",
+	level.Debug(log.Logger).Log("msg", "Updating Table",
 		"expectedWrite", expected.ProvisionedWrite,
 		"currentWrite", current.ProvisionedWrite,
 		"expectedRead", expected.ProvisionedRead,
@@ -301,7 +302,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 	if (current.ProvisionedRead != expected.ProvisionedRead ||
 		current.ProvisionedWrite != expected.ProvisionedWrite) &&
 		!expected.UseOnDemandIOMode {
-		level.Info(util.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
+		level.Info(log.Logger).Log("msg", "updating provisioned throughput on table", "table", expected.Name, "old_read", current.ProvisionedRead, "old_write", current.ProvisionedWrite, "new_read", expected.ProvisionedRead, "new_write", expected.ProvisionedWrite)
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				var dynamoBillingMode string
@@ -315,7 +316,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 				// an error if we set a table to the billing mode it is currently on.
 				if current.UseOnDemandIOMode != expected.UseOnDemandIOMode {
 					dynamoBillingMode = dynamodb.BillingModeProvisioned
-					level.Info(util.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
+					level.Info(log.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
 					updateTableInput.BillingMode = aws.String(dynamoBillingMode)
 				}
 
@@ -325,7 +326,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		}); err != nil {
 			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable", d.metrics)
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
-				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
+				level.Warn(log.Logger).Log("msg", "update limit exceeded", "err", err)
 			} else {
 				return err
 			}
@@ -335,7 +336,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		// settings used in provisioned mode. Unfortunately the boilerplate wrappers for retry and tracking needed to be copied.
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-				level.Info(util.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
+				level.Info(log.Logger).Log("msg", "updating billing mode on table", "table", expected.Name, "old_mode", current.UseOnDemandIOMode, "new_mode", expected.UseOnDemandIOMode)
 				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name), BillingMode: aws.String(dynamodb.BillingModePayPerRequest)}
 				_, err := d.DynamoDB.UpdateTableWithContext(ctx, updateTableInput)
 				return err
@@ -343,7 +344,7 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		}); err != nil {
 			recordDynamoError(expected.Name, err, "DynamoDB.UpdateTable", d.metrics)
 			if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() == "LimitExceededException" {
-				level.Warn(util.Logger).Log("msg", "update limit exceeded", "err", err)
+				level.Warn(log.Logger).Log("msg", "update limit exceeded", "err", err)
 			} else {
 				return err
 			}
diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index 7a68262233c0c..c6a1dee6242b3 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -16,6 +16,7 @@ import (
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	"github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const (
@@ -93,7 +94,7 @@ type BlobStorage struct {
 
 // NewBlobStorage creates a new instance of the BlobStorage struct.
 func NewBlobStorage(cfg *BlobStorageConfig) (*BlobStorage, error) {
-	util.WarnExperimentalUse("Azure Blob Storage")
+	log.WarnExperimentalUse("Azure Blob Storage")
 	blobStorage := &BlobStorage{
 		cfg: cfg,
 	}
diff --git a/cache/fifo_cache.go b/cache/fifo_cache.go
index 81432d1a1e5b7..1215a6cff11d9 100644
--- a/cache/fifo_cache.go
+++ b/cache/fifo_cache.go
@@ -15,8 +15,8 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const (
@@ -93,7 +93,7 @@ type cacheEntry struct {
 
 // NewFifoCache returns a new initialised FifoCache of size.
 func NewFifoCache(name string, cfg FifoCacheConfig, reg prometheus.Registerer, logger log.Logger) *FifoCache {
-	util.WarnExperimentalUse("In-memory (FIFO) cache")
+	util_log.WarnExperimentalUse("In-memory (FIFO) cache")
 
 	if cfg.DeprecatedSize > 0 {
 		flagext.DeprecatedFlagsUsed.Inc()
diff --git a/cache/memcached.go b/cache/memcached.go
index 8b89d3e1fe5cc..5f2fa0dc4f664 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -16,7 +16,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus/promauto"
 	instr "github.com/weaveworks/common/instrument"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -186,7 +186,7 @@ func (c *Memcached) fetchKeysBatched(ctx context.Context, keys []string) (found
 
 	go func() {
 		for i, j := 0, 0; i < len(keys); i += batchSize {
-			batchKeys := keys[i:util.Min(i+batchSize, len(keys))]
+			batchKeys := keys[i:math.Min(i+batchSize, len(keys))]
 			c.inputCh <- &work{
 				keys:     batchKeys,
 				ctx:      ctx,
diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index ddb089720492a..b0826d9bfd820 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -19,7 +19,7 @@ import (
 	"github.com/sony/gobreaker"
 	"github.com/thanos-io/thanos/pkg/discovery/dns"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // MemcachedClient interface exists for mocking memcacheClient.
@@ -132,7 +132,7 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 	}
 
 	if len(cfg.Addresses) > 0 {
-		util.WarnExperimentalUse("DNS-based memcached service discovery")
+		util_log.WarnExperimentalUse("DNS-based memcached service discovery")
 		newClient.addresses = strings.Split(cfg.Addresses, ",")
 	}
 
diff --git a/cache/redis_cache.go b/cache/redis_cache.go
index 5887bd84eedf1..efe1e786e77ac 100644
--- a/cache/redis_cache.go
+++ b/cache/redis_cache.go
@@ -6,7 +6,7 @@ import (
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // RedisCache type caches chunks in redis
@@ -18,7 +18,7 @@ type RedisCache struct {
 
 // NewRedisCache creates a new RedisCache
 func NewRedisCache(name string, redisClient *RedisClient, logger log.Logger) *RedisCache {
-	util.WarnExperimentalUse("Redis cache")
+	util_log.WarnExperimentalUse("Redis cache")
 	cache := &RedisCache{
 		name:   name,
 		redis:  redisClient,
diff --git a/chunk_store.go b/chunk_store.go
index 0c212abb2fe99..15f566ffbb40d 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -21,6 +21,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 )
@@ -539,7 +540,7 @@ func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQ
 		return true
 	})
 	if err != nil {
-		level.Error(util.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
+		level.Error(util_log.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
 	}
 	return entries, err
 }
diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 042507eef459e..7c6f0df5bf05f 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -17,8 +17,8 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/grpcclient"
+	"github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -244,7 +244,7 @@ func (s *storageClientColumnKey) QueryPages(ctx context.Context, queries []chunk
 		table := s.client.Open(tq.name)
 
 		for i := 0; i < len(tq.rows); i += maxRowReads {
-			page := tq.rows[i:util.Min(i+maxRowReads, len(tq.rows))]
+			page := tq.rows[i:math.Min(i+maxRowReads, len(tq.rows))]
 			go func(page bigtable.RowList, tq tableQuery) {
 				var processingErr error
 				// rows are returned in key order, not order in row list
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index a0cc62013b735..ee722e1fa75a6 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -10,7 +10,7 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/math"
 )
 
 type bigtableObjectClient struct {
@@ -109,7 +109,7 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 		)
 
 		for i := 0; i < len(keys); i += maxRowReads {
-			page := keys[i:util.Min(i+maxRowReads, len(keys))]
+			page := keys[i:math.Min(i+maxRowReads, len(keys))]
 			go func(page bigtable.RowList) {
 				decodeContext := chunk.NewDecodeContext()
 
diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index f982c59d7456d..e9ae3282e1ffd 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -13,7 +13,7 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/log"
 )
 
 type MockStorageMode int
@@ -174,7 +174,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error {
 		}
 		seenWrites[key] = true
 
-		level.Debug(util.WithContext(ctx, util.Logger)).Log("msg", "write", "hash", req.hashValue, "range", req.rangeValue)
+		level.Debug(log.WithContext(ctx, log.Logger)).Log("msg", "write", "hash", req.hashValue, "range", req.rangeValue)
 
 		items := table.items[req.hashValue]
 
@@ -247,7 +247,7 @@ func (m *MockStorage) QueryPages(ctx context.Context, queries []IndexQuery, call
 }
 
 func (m *MockStorage) query(ctx context.Context, query IndexQuery, callback func(ReadBatch) (shouldContinue bool)) error {
-	logger := util.WithContext(ctx, util.Logger)
+	logger := log.WithContext(ctx, log.Logger)
 	level.Debug(logger).Log("msg", "QueryPages", "query", query.HashValue)
 
 	table, ok := m.tables[query.TableName]
diff --git a/openstack/swift_object_client.go b/openstack/swift_object_client.go
index c5b9672710395..19935844c0e7f 100644
--- a/openstack/swift_object_client.go
+++ b/openstack/swift_object_client.go
@@ -12,7 +12,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	cortex_swift "github.com/cortexproject/cortex/pkg/storage/bucket/swift"
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/log"
 )
 
 type SwiftObjectClient struct {
@@ -42,7 +42,7 @@ func (cfg *SwiftConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet)
 
 // NewSwiftObjectClient makes a new chunk.Client that writes chunks to OpenStack Swift.
 func NewSwiftObjectClient(cfg SwiftConfig) (*SwiftObjectClient, error) {
-	util.WarnExperimentalUse("OpenStack Swift Storage")
+	log.WarnExperimentalUse("OpenStack Swift Storage")
 
 	// Create a connection
 	c := &swift.Connection{
diff --git a/purger/purger.go b/purger/purger.go
index 5e7131a587d4d..42a222803fcc5 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -22,7 +22,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
@@ -140,7 +140,7 @@ type Purger struct {
 
 // NewPurger creates a new Purger
 func NewPurger(cfg Config, deleteStore *DeleteStore, chunkStore chunk.Store, storageClient chunk.ObjectClient, registerer prometheus.Registerer) (*Purger, error) {
-	util.WarnExperimentalUse("Delete series API")
+	util_log.WarnExperimentalUse("Delete series API")
 
 	purger := Purger{
 		cfg:                      cfg,
@@ -180,7 +180,7 @@ func (p *Purger) loop(ctx context.Context) error {
 		err := p.pullDeleteRequestsToPlanDeletes()
 		if err != nil {
 			status = statusFail
-			level.Error(util.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
+			level.Error(util_log.Logger).Log("msg", "error pulling delete requests for building plans", "err", err)
 		}
 
 		p.metrics.loadPendingRequestsAttempsTotal.WithLabelValues(status).Inc()
@@ -221,14 +221,14 @@ func (p *Purger) retryFailedRequests() {
 	for _, userID := range userIDsWithFailedRequest {
 		deleteRequest := p.inProcessRequests.get(userID)
 		if deleteRequest == nil {
-			level.Error(util.Logger).Log("msg", "expected an in-process delete request", "user", userID)
+			level.Error(util_log.Logger).Log("msg", "expected an in-process delete request", "user", userID)
 			continue
 		}
 
 		p.inProcessRequests.unsetFailedRequestForUser(userID)
 		err := p.resumeStalledRequest(*deleteRequest)
 		if err != nil {
-			reqWithLogger := makeDeleteRequestWithLogger(*deleteRequest, util.Logger)
+			reqWithLogger := makeDeleteRequestWithLogger(*deleteRequest, util_log.Logger)
 			level.Error(reqWithLogger.logger).Log("msg", "failed to resume failed request", "err", err)
 		}
 	}
@@ -407,7 +407,7 @@ func (p *Purger) loadInprocessDeleteRequests() error {
 	for i := range inprocessRequests {
 		deleteRequest := inprocessRequests[i]
 		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
-		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+		req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 
 		level.Info(req.logger).Log("msg", "resuming in process delete requests", "status", deleteRequest.Status)
 		err = p.resumeStalledRequest(deleteRequest)
@@ -421,7 +421,7 @@ func (p *Purger) loadInprocessDeleteRequests() error {
 }
 
 func (p *Purger) resumeStalledRequest(deleteRequest DeleteRequest) error {
-	req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+	req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 
 	if deleteRequest.Status == StatusBuildingPlan {
 		err := p.buildDeletePlan(req)
@@ -479,7 +479,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 			p.usersWithPendingRequests[deleteRequest.UserID] = struct{}{}
 			p.usersWithPendingRequestsMtx.Unlock()
 
-			level.Debug(util.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
+			level.Debug(util_log.Logger).Log("msg", "skipping delete request processing for now since another request from same user is already in process",
 				"inprocess_request_id", inprocessDeleteRequest.RequestID,
 				"skipped_request_id", deleteRequest.RequestID, "user_id", deleteRequest.UserID)
 			continue
@@ -492,7 +492,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
 
 		deleteRequest.Status = StatusBuildingPlan
 		p.inProcessRequests.set(deleteRequest.UserID, &deleteRequest)
-		req := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+		req := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 
 		level.Info(req.logger).Log("msg", "building plan for a new delete request")
 
diff --git a/schema_config.go b/schema_config.go
index 9a37bb0c61e86..ce37df393b392 100644
--- a/schema_config.go
+++ b/schema_config.go
@@ -13,7 +13,8 @@ import (
 	"github.com/weaveworks/common/mtime"
 	yaml "gopkg.in/yaml.v2"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/log"
+	"github.com/cortexproject/cortex/pkg/util/math"
 )
 
 const (
@@ -260,8 +261,8 @@ func (cfg *PeriodConfig) hourlyBuckets(from, through model.Time, userID string)
 	)
 
 	for i := fromHour; i <= throughHour; i++ {
-		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInHour))
-		relativeThrough := util.Min64(millisecondsInHour, int64(through)-(i*millisecondsInHour))
+		relativeFrom := math.Max64(0, int64(from)-(i*millisecondsInHour))
+		relativeThrough := math.Min64(millisecondsInHour, int64(through)-(i*millisecondsInHour))
 		result = append(result, Bucket{
 			from:       uint32(relativeFrom),
 			through:    uint32(relativeThrough),
@@ -291,8 +292,8 @@ func (cfg *PeriodConfig) dailyBuckets(from, through model.Time, userID string) [
 		// include in the range keys - we use a uint32 - as we then have to base 32
 		// encode it.
 
-		relativeFrom := util.Max64(0, int64(from)-(i*millisecondsInDay))
-		relativeThrough := util.Min64(millisecondsInDay, int64(through)-(i*millisecondsInDay))
+		relativeFrom := math.Max64(0, int64(from)-(i*millisecondsInDay))
+		relativeThrough := math.Min64(millisecondsInDay, int64(through)-(i*millisecondsInDay))
 		result = append(result, Bucket{
 			from:       uint32(relativeFrom),
 			through:    uint32(relativeThrough),
@@ -394,7 +395,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		if (i*periodSecs)-beginGraceSecs <= now && now < (i*periodSecs)+periodSecs+endGraceSecs {
 			table = pCfg.ActiveTableProvisionConfig.BuildTableDesc(tableName, cfg.Tags)
 
-			level.Debug(util.Logger).Log("msg", "Table is Active",
+			level.Debug(log.Logger).Log("msg", "Table is Active",
 				"tableName", table.Name,
 				"provisionedRead", table.ProvisionedRead,
 				"provisionedWrite", table.ProvisionedWrite,
@@ -409,7 +410,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 			disableAutoscale := i < (nowWeek - pCfg.InactiveWriteScaleLastN)
 			table = pCfg.InactiveTableProvisionConfig.BuildTableDesc(tableName, cfg.Tags, disableAutoscale)
 
-			level.Debug(util.Logger).Log("msg", "Table is Inactive",
+			level.Debug(log.Logger).Log("msg", "Table is Inactive",
 				"tableName", table.Name,
 				"provisionedRead", table.ProvisionedRead,
 				"provisionedWrite", table.ProvisionedWrite,
diff --git a/util/util.go b/util/util.go
index b6af344458935..3241d74943650 100644
--- a/util/util.go
+++ b/util/util.go
@@ -11,7 +11,7 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/cortexproject/cortex/pkg/util/math"
 )
 
 // Callback from an IndexQuery.
@@ -36,7 +36,7 @@ func DoParallelQueries(
 
 	queue := make(chan chunk.IndexQuery)
 	incomingErrors := make(chan error)
-	n := util.Min(len(queries), QueryParallelism)
+	n := math.Min(len(queries), QueryParallelism)
 	// Run n parallel goroutines fetching queries from the queue
 	for i := 0; i < n; i++ {
 		go func() {

From 9da230c5a3b0b31ad33968092c4e124378632804 Mon Sep 17 00:00:00 2001
From: Lucas Vieira Miguel <lucasvieira.dev@gmail.com>
Date: Thu, 28 Jan 2021 10:49:26 +0000
Subject: [PATCH 631/660] Adds support to S3 server side encryption using AWS
 KMS (#3651)

* Adds support to S3 server side encryption using AWS KMS

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* refatored based on PR review

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* small refactor

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* rebased master

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* rebased master correctly

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* added new line

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* refactored

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* reordered changelog

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* refactored NewSSEParsedConfig

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

* removed unused struct

Signed-off-by: Lucas Miguel <lucasvieira.dev@gmail.com>

Co-authored-by: Lucas Vieira <lucas.vieira@ASSUMESEEQUITE.workdayinternal.com>
---
 aws/s3_storage_client.go | 61 +++++++++++++++++++--------
 aws/sse_config.go        | 86 ++++++++++++++++++++++++++++++++++++++
 aws/sse_config_test.go   | 90 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 220 insertions(+), 17 deletions(-)
 create mode 100644 aws/sse_config.go
 create mode 100644 aws/sse_config_test.go

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index db4bafcf38413..2c7ec0fa71fa9 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -72,6 +72,7 @@ type S3Config struct {
 	SSEEncryption    bool       `yaml:"sse_encryption"`
 	HTTPConfig       HTTPConfig `yaml:"http_config"`
 	SignatureVersion string     `yaml:"signature_version"`
+	SSEConfig        SSEConfig  `yaml:"sse"`
 
 	Inject InjectRequestMiddleware `yaml:"-"`
 }
@@ -100,7 +101,11 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.AccessKeyID, prefix+"s3.access-key-id", "", "AWS Access Key ID")
 	f.StringVar(&cfg.SecretAccessKey, prefix+"s3.secret-access-key", "", "AWS Secret Access Key")
 	f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "Disable https on s3 connection.")
-	f.BoolVar(&cfg.SSEEncryption, prefix+"s3.sse-encryption", false, "Enable AES256 AWS Server Side Encryption")
+
+	// TODO Remove in Cortex 1.9.0
+	f.BoolVar(&cfg.SSEEncryption, prefix+"s3.sse-encryption", false, "Enable AWS Server Side Encryption [Deprecated: Use .sse instead. if s3.sse-encryption is enabled, it assumes .sse.type SSE-S3]")
+
+	cfg.SSEConfig.RegisterFlagsWithPrefix(prefix+"s3.sse.", f)
 
 	f.DurationVar(&cfg.HTTPConfig.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The maximum amount of time an idle connection will be held open.")
 	f.DurationVar(&cfg.HTTPConfig.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 0, "If non-zero, specifies the amount of time to wait for a server's response headers after fully writing the request.")
@@ -117,9 +122,9 @@ func (cfg *S3Config) Validate() error {
 }
 
 type S3ObjectClient struct {
-	bucketNames   []string
-	S3            s3iface.S3API
-	sseEncryption *string
+	bucketNames []string
+	S3          s3iface.S3API
+	sseConfig   *SSEParsedConfig
 }
 
 // NewS3ObjectClient makes a new S3-backed ObjectClient.
@@ -140,19 +145,34 @@ func NewS3ObjectClient(cfg S3Config) (*S3ObjectClient, error) {
 		s3Client.Handlers.Sign.Swap(v4.SignRequestHandler.Name, v2SignRequestHandler(cfg))
 	}
 
-	var sseEncryption *string
-	if cfg.SSEEncryption {
-		sseEncryption = aws.String("AES256")
+	sseCfg, err := buildSSEParsedConfig(cfg)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to build SSE config")
 	}
 
 	client := S3ObjectClient{
-		S3:            s3Client,
-		bucketNames:   bucketNames,
-		sseEncryption: sseEncryption,
+		S3:          s3Client,
+		bucketNames: bucketNames,
+		sseConfig:   sseCfg,
 	}
 	return &client, nil
 }
 
+func buildSSEParsedConfig(cfg S3Config) (*SSEParsedConfig, error) {
+	if cfg.SSEConfig.Type != "" {
+		return NewSSEParsedConfig(cfg.SSEConfig)
+	}
+
+	// deprecated, but if used it assumes SSE-S3 type
+	if cfg.SSEEncryption {
+		return NewSSEParsedConfig(SSEConfig{
+			Type: SSES3,
+		})
+	}
+
+	return nil, nil
+}
+
 func v2SignRequestHandler(cfg S3Config) request.NamedHandler {
 	return request.NamedHandler{
 		Name: "v2.SignRequestHandler",
@@ -324,15 +344,22 @@ func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 	return resp.Body, nil
 }
 
-// Put object into the store
+// PutObject into the store
 func (a *S3ObjectClient) PutObject(ctx context.Context, objectKey string, object io.ReadSeeker) error {
 	return instrument.CollectedRequest(ctx, "S3.PutObject", s3RequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
-		_, err := a.S3.PutObjectWithContext(ctx, &s3.PutObjectInput{
-			Body:                 object,
-			Bucket:               aws.String(a.bucketFromKey(objectKey)),
-			Key:                  aws.String(objectKey),
-			ServerSideEncryption: a.sseEncryption,
-		})
+		putObjectInput := &s3.PutObjectInput{
+			Body:   object,
+			Bucket: aws.String(a.bucketFromKey(objectKey)),
+			Key:    aws.String(objectKey),
+		}
+
+		if a.sseConfig != nil {
+			putObjectInput.ServerSideEncryption = aws.String(a.sseConfig.ServerSideEncryption)
+			putObjectInput.SSEKMSKeyId = a.sseConfig.KMSKeyID
+			putObjectInput.SSEKMSEncryptionContext = a.sseConfig.KMSEncryptionContext
+		}
+
+		_, err := a.S3.PutObjectWithContext(ctx, putObjectInput)
 		return err
 	})
 }
diff --git a/aws/sse_config.go b/aws/sse_config.go
new file mode 100644
index 0000000000000..b62000fdc4d49
--- /dev/null
+++ b/aws/sse_config.go
@@ -0,0 +1,86 @@
+package aws
+
+import (
+	"encoding/base64"
+	"encoding/json"
+	"flag"
+
+	"github.com/pkg/errors"
+)
+
+const (
+	// SSEKMS config type constant to configure S3 server side encryption using KMS
+	// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html
+	SSEKMS     = "SSE-KMS"
+	sseKMSType = "aws:kms"
+	// SSES3 config type constant to configure S3 server side encryption with AES-256
+	// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html
+	SSES3     = "SSE-S3"
+	sseS3Type = "AES256"
+)
+
+// SSEParsedConfig configures server side encryption (SSE)
+// struct used internally to configure AWS S3
+type SSEParsedConfig struct {
+	ServerSideEncryption string
+	KMSKeyID             *string
+	KMSEncryptionContext *string
+}
+
+// SSEConfig configures S3 server side encryption
+// struct that is going to receive user input (through config file or CLI)
+type SSEConfig struct {
+	Type                 string `yaml:"type"`
+	KMSKeyID             string `yaml:"kms_key_id"`
+	KMSEncryptionContext string `yaml:"kms_encryption_context"`
+}
+
+// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
+func (cfg *SSEConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
+	f.StringVar(&cfg.Type, prefix+"type", "", "Enable AWS Server Side Encryption. Only SSE-S3 and SSE-KMS are supported")
+	f.StringVar(&cfg.KMSKeyID, prefix+"kms-key-id", "", "KMS Key ID used to encrypt objects in S3")
+	f.StringVar(&cfg.KMSEncryptionContext, prefix+"kms-encryption-context", "", "KMS Encryption Context used for object encryption. It expects a JSON as a string.")
+}
+
+// NewSSEParsedConfig creates a struct to configure server side encryption (SSE)
+func NewSSEParsedConfig(cfg SSEConfig) (*SSEParsedConfig, error) {
+	switch cfg.Type {
+	case SSES3:
+		return &SSEParsedConfig{
+			ServerSideEncryption: sseS3Type,
+		}, nil
+	case SSEKMS:
+		if cfg.KMSKeyID == "" {
+			return nil, errors.New("KMS key id must be passed when SSE-KMS encryption is selected")
+		}
+
+		parsedKMSEncryptionContext, err := parseKMSEncryptionContext(cfg.KMSEncryptionContext)
+		if err != nil {
+			return nil, errors.Wrap(err, "failed to parse KMS encryption context")
+		}
+
+		return &SSEParsedConfig{
+			ServerSideEncryption: sseKMSType,
+			KMSKeyID:             &cfg.KMSKeyID,
+			KMSEncryptionContext: parsedKMSEncryptionContext,
+		}, nil
+	default:
+		return nil, errors.New("SSE type is empty or invalid")
+	}
+}
+
+func parseKMSEncryptionContext(kmsEncryptionContext string) (*string, error) {
+	if kmsEncryptionContext == "" {
+		return nil, nil
+	}
+
+	// validates if kmsEncryptionContext is a valid JSON
+	jsonKMSEncryptionContext, err := json.Marshal(json.RawMessage(kmsEncryptionContext))
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to marshal KMS encryption context")
+	}
+
+	parsedKMSEncryptionContext := base64.StdEncoding.EncodeToString([]byte(jsonKMSEncryptionContext))
+
+	return &parsedKMSEncryptionContext, nil
+}
diff --git a/aws/sse_config_test.go b/aws/sse_config_test.go
new file mode 100644
index 0000000000000..7c6cfc4247f4b
--- /dev/null
+++ b/aws/sse_config_test.go
@@ -0,0 +1,90 @@
+package aws
+
+import (
+	"testing"
+
+	"github.com/pkg/errors"
+	"github.com/stretchr/testify/assert"
+)
+
+func TestNewSSEParsedConfig(t *testing.T) {
+	kmsKeyID := "test"
+	kmsEncryptionContext := `{"a": "bc", "b": "cd"}`
+	// compact form of kmsEncryptionContext
+	parsedKMSEncryptionContext := "eyJhIjoiYmMiLCJiIjoiY2QifQ=="
+
+	tests := []struct {
+		name        string
+		params      SSEConfig
+		expected    *SSEParsedConfig
+		expectedErr error
+	}{
+		{
+			name: "Test SSE encryption with SSES3 type",
+			params: SSEConfig{
+				Type: SSES3,
+			},
+			expected: &SSEParsedConfig{
+				ServerSideEncryption: sseS3Type,
+			},
+		},
+		{
+			name: "Test SSE encryption with SSEKMS type without context",
+			params: SSEConfig{
+				Type:     SSEKMS,
+				KMSKeyID: kmsKeyID,
+			},
+			expected: &SSEParsedConfig{
+				ServerSideEncryption: sseKMSType,
+				KMSKeyID:             &kmsKeyID,
+			},
+		},
+		{
+			name: "Test SSE encryption with SSEKMS type with context",
+			params: SSEConfig{
+				Type:                 SSEKMS,
+				KMSKeyID:             kmsKeyID,
+				KMSEncryptionContext: kmsEncryptionContext,
+			},
+			expected: &SSEParsedConfig{
+				ServerSideEncryption: sseKMSType,
+				KMSKeyID:             &kmsKeyID,
+				KMSEncryptionContext: &parsedKMSEncryptionContext,
+			},
+		},
+		{
+			name: "Test invalid SSE type",
+			params: SSEConfig{
+				Type: "invalid",
+			},
+			expectedErr: errors.New("SSE type is empty or invalid"),
+		},
+		{
+			name: "Test SSE encryption with SSEKMS type without KMS Key ID",
+			params: SSEConfig{
+				Type:     SSEKMS,
+				KMSKeyID: "",
+			},
+			expectedErr: errors.New("KMS key id must be passed when SSE-KMS encryption is selected"),
+		},
+		{
+			name: "Test SSE with invalid KMS encryption context JSON",
+			params: SSEConfig{
+				Type:                 SSEKMS,
+				KMSKeyID:             kmsKeyID,
+				KMSEncryptionContext: `INVALID_JSON`,
+			},
+			expectedErr: errors.New("failed to parse KMS encryption context: failed to marshal KMS encryption context: json: error calling MarshalJSON for type json.RawMessage: invalid character 'I' looking for beginning of value"),
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := NewSSEParsedConfig(tt.params)
+			if tt.expectedErr != nil {
+				assert.Equal(t, tt.expectedErr.Error(), err.Error())
+			}
+			assert.Equal(t, tt.expected, result)
+		})
+	}
+}

From f960556e60c760b12a6d0484bc20692e1c9009f6 Mon Sep 17 00:00:00 2001
From: Christian Simon <simon@swine.de>
Date: Fri, 29 Jan 2021 16:42:52 +0000
Subject: [PATCH 632/660] Support other TLS modes than mutual auth in Client
 (#3156)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Allow to override server name

This allows to override the expected server name during TLS server
validation. This simplifies the TLS setup as a ServerName can be more
predictable than for example IP addresses. Fixes #3063

Improve TLS client test coverage

Add integration tests that spin up a HTTP/GRPC server and verify that
the client options behave in the expected way.

Allow configuration of non-mutual TLS

Explicitly enable TLS in the client with the flag
`-<prefix>.tls-enabled`. This flag is implicitly enabled when any other
TLS flag is set.

This flag will only be respected by the GRPC client, as for the
HTTP client the scheme used in the URL will take precedence.

Signed-off-by: Christian Simon <simon@swine.de>
Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
Co-authored-by: Christian Simon <simon@swine.de>
---
 gcp/bigtable_index_client.go  | 16 ++++++++++++----
 gcp/bigtable_object_client.go |  7 +++++--
 gcp/table_client.go           |  7 +++++--
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/gcp/bigtable_index_client.go b/gcp/bigtable_index_client.go
index 7c6f0df5bf05f..8992f358d5ed7 100644
--- a/gcp/bigtable_index_client.go
+++ b/gcp/bigtable_index_client.go
@@ -51,6 +51,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.TableCacheEnabled, "bigtable.table-cache.enabled", true, "If enabled, once a tables info is fetched, it is cached.")
 	f.DurationVar(&cfg.TableCacheExpiration, "bigtable.table-cache.expiration", 30*time.Minute, "Duration to cache tables before checking again.")
 
+	// This overrides our default from TLS disabled to TLS enabled
+	cfg.GRPCClientConfig.TLSEnabled = true
 	cfg.GRPCClientConfig.RegisterFlagsWithPrefix("bigtable", f)
 }
 
@@ -73,8 +75,11 @@ type storageClientV1 struct {
 
 // NewStorageClientV1 returns a new v1 StorageClient.
 func NewStorageClientV1(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
+	dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation())
+	if err != nil {
+		return nil, err
+	}
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...)
 	if err != nil {
 		return nil, err
 	}
@@ -97,8 +102,11 @@ func newStorageClientV1(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtab
 
 // NewStorageClientColumnKey returns a new v2 StorageClient.
 func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.IndexClient, error) {
-	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
+	dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation())
+	if err != nil {
+		return nil, err
+	}
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/bigtable_object_client.go b/gcp/bigtable_object_client.go
index ee722e1fa75a6..2a18195a4b9f1 100644
--- a/gcp/bigtable_object_client.go
+++ b/gcp/bigtable_object_client.go
@@ -22,8 +22,11 @@ type bigtableObjectClient struct {
 // NewBigtableObjectClient makes a new chunk.Client that stores chunks in
 // Bigtable.
 func NewBigtableObjectClient(ctx context.Context, cfg Config, schemaCfg chunk.SchemaConfig) (chunk.Client, error) {
-	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
-	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, opts...)
+	dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation())
+	if err != nil {
+		return nil, err
+	}
+	client, err := bigtable.NewClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...)
 	if err != nil {
 		return nil, err
 	}
diff --git a/gcp/table_client.go b/gcp/table_client.go
index 268e087c579ab..26d032b483121 100644
--- a/gcp/table_client.go
+++ b/gcp/table_client.go
@@ -24,8 +24,11 @@ type tableClient struct {
 
 // NewTableClient returns a new TableClient.
 func NewTableClient(ctx context.Context, cfg Config) (chunk.TableClient, error) {
-	opts := toOptions(cfg.GRPCClientConfig.DialOption(bigtableInstrumentation()))
-	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, opts...)
+	dialOpts, err := cfg.GRPCClientConfig.DialOption(bigtableInstrumentation())
+	if err != nil {
+		return nil, err
+	}
+	client, err := bigtable.NewAdminClient(ctx, cfg.Project, cfg.Instance, toOptions(dialOpts)...)
 	if err != nil {
 		return nil, err
 	}

From 19210b92c7699ef9fad3af2ebcf715db8c0900d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Wed, 3 Feb 2021 17:53:29 +0100
Subject: [PATCH 633/660] Remove util.Logger and move util.InitLogger to
 util/log package. (#3781)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Remove util.Logger and move util.InitLogger to util/log package.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 aws/dynamodb_storage_client.go |  2 +-
 aws/metrics_autoscaling.go     | 20 ++++++++++----------
 aws/mock.go                    |  4 ++--
 cassandra/storage_client.go    |  6 +++---
 chunk_store.go                 |  2 +-
 chunk_store_utils.go           |  4 ++--
 local/boltdb_index_client.go   |  6 +++---
 local/fs_object_client.go      |  6 +++---
 purger/purger_test.go          |  8 ++++----
 purger/request_handler.go      | 11 ++++++-----
 purger/tombstones.go           | 10 +++++-----
 schema.go                      |  4 ++--
 storage/factory.go             | 10 +++++-----
 table_manager.go               | 28 ++++++++++++++--------------
 14 files changed, 61 insertions(+), 60 deletions(-)

diff --git a/aws/dynamodb_storage_client.go b/aws/dynamodb_storage_client.go
index 84564fb4f0d64..d3c67f710542d 100644
--- a/aws/dynamodb_storage_client.go
+++ b/aws/dynamodb_storage_client.go
@@ -790,7 +790,7 @@ func awsSessionFromURL(awsURL *url.URL) (client.ConfigProvider, error) {
 	}
 	path := strings.TrimPrefix(awsURL.Path, "/")
 	if len(path) > 0 {
-		level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
+		level.Warn(log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 	}
 	config, err := awscommon.ConfigFromURL(awsURL)
 	if err != nil {
diff --git a/aws/metrics_autoscaling.go b/aws/metrics_autoscaling.go
index fea098c82334a..b8aae77f5235b 100644
--- a/aws/metrics_autoscaling.go
+++ b/aws/metrics_autoscaling.go
@@ -14,7 +14,7 @@ import (
 	"github.com/weaveworks/common/mtime"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const (
@@ -112,7 +112,7 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 		throttleRate := m.throttleRates[expected.Name]
 		usageRate := m.usageRates[expected.Name]
 
-		level.Info(util.Logger).Log("msg", "checking write metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "throttleRate", throttleRate, "usageRate", usageRate)
+		level.Info(util_log.Logger).Log("msg", "checking write metrics", "table", current.Name, "queueLengths", fmt.Sprint(m.queueLengths), "throttleRate", throttleRate, "usageRate", usageRate)
 
 		switch {
 		case throttleRate < throttleFractionScaledown*float64(current.ProvisionedWrite) &&
@@ -170,7 +170,7 @@ func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc,
 		readUsageRate := m.usageReadRates[expected.Name]
 		readErrorRate := m.readErrorRates[expected.Name]
 
-		level.Info(util.Logger).Log("msg", "checking read metrics", "table", current.Name, "errorRate", readErrorRate, "readUsageRate", readUsageRate)
+		level.Info(util_log.Logger).Log("msg", "checking read metrics", "table", current.Name, "errorRate", readErrorRate, "readUsageRate", readUsageRate)
 		// Read Scaling
 		switch {
 		// the table is at low/minimum capacity and it is being used -> scale up
@@ -235,14 +235,14 @@ func scaleDown(tableName string, currentValue, minValue int64, newValue int64, l
 
 	earliest := lastUpdated[tableName].Add(time.Duration(coolDown) * time.Second)
 	if earliest.After(mtime.Now()) {
-		level.Info(util.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest, "op", operation)
+		level.Info(util_log.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest, "op", operation)
 		return currentValue
 	}
 
 	// Reject a change that is less than 20% - AWS rate-limits scale-downs so save
 	// our chances until it makes a bigger difference
 	if newValue > currentValue*4/5 {
-		level.Info(util.Logger).Log("msg", "rejected de minimis "+msg, "table", tableName, "current", currentValue, "proposed", newValue, "op", operation)
+		level.Info(util_log.Logger).Log("msg", "rejected de minimis "+msg, "table", tableName, "current", currentValue, "proposed", newValue, "op", operation)
 		return currentValue
 	}
 
@@ -254,12 +254,12 @@ func scaleDown(tableName string, currentValue, minValue int64, newValue int64, l
 			totalUsage += u
 		}
 		if totalUsage < minUsageForScaledown {
-			level.Info(util.Logger).Log("msg", "rejected low usage "+msg, "table", tableName, "totalUsage", totalUsage, "op", operation)
+			level.Info(util_log.Logger).Log("msg", "rejected low usage "+msg, "table", tableName, "totalUsage", totalUsage, "op", operation)
 			return currentValue
 		}
 	}
 
-	level.Info(util.Logger).Log("msg", msg, "table", tableName, operation, newValue)
+	level.Info(util_log.Logger).Log("msg", msg, "table", tableName, operation, newValue)
 	lastUpdated[tableName] = mtime.Now()
 	return newValue
 }
@@ -270,12 +270,12 @@ func scaleUp(tableName string, currentValue, maxValue int64, newValue int64, las
 	}
 	earliest := lastUpdated[tableName].Add(time.Duration(coolDown) * time.Second)
 	if !earliest.After(mtime.Now()) && newValue > currentValue {
-		level.Info(util.Logger).Log("msg", msg, "table", tableName, operation, newValue)
+		level.Info(util_log.Logger).Log("msg", msg, "table", tableName, operation, newValue)
 		lastUpdated[tableName] = mtime.Now()
 		return newValue
 	}
 
-	level.Info(util.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest)
+	level.Info(util_log.Logger).Log("msg", "deferring "+msg, "table", tableName, "till", earliest)
 	return currentValue
 }
 
@@ -362,7 +362,7 @@ func promQuery(ctx context.Context, promAPI promV1.API, query string, duration,
 		return nil, err
 	}
 	if wrngs != nil {
-		level.Warn(util.Logger).Log(
+		level.Warn(util_log.Logger).Log(
 			"query", query,
 			"start", queryRange.Start,
 			"end", queryRange.End,
diff --git a/aws/mock.go b/aws/mock.go
index 864e410dd5c13..a6e09b361498c 100644
--- a/aws/mock.go
+++ b/aws/mock.go
@@ -18,7 +18,7 @@ import (
 	"github.com/aws/aws-sdk-go/service/s3/s3iface"
 	"github.com/go-kit/kit/log/level"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const arnPrefix = "arn:"
@@ -234,7 +234,7 @@ func (m *mockDynamoDBClient) QueryPagesWithContext(ctx aws.Context, input *dynam
 						continue
 					}
 				} else {
-					level.Warn(util.Logger).Log("msg", "unsupported FilterExpression", "expression", *input.FilterExpression)
+					level.Warn(util_log.Logger).Log("msg", "unsupported FilterExpression", "expression", *input.FilterExpression)
 				}
 			}
 		}
diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 1e638f6091f0f..2c802b764fab8 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -19,8 +19,8 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
-	pkgutil "github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // Config for a StorageClient
@@ -109,7 +109,7 @@ func (cfg *Config) session(name string, reg prometheus.Registerer) (*gocql.Sessi
 	cluster.ConnectTimeout = cfg.ConnectTimeout
 	cluster.ReconnectInterval = cfg.ReconnectInterval
 	cluster.NumConns = cfg.NumConnections
-	cluster.Logger = log.With(pkgutil.Logger, "module", "gocql", "client", name)
+	cluster.Logger = log.With(util_log.Logger, "module", "gocql", "client", name)
 	cluster.Registerer = prometheus.WrapRegistererWith(
 		prometheus.Labels{"client": name}, reg)
 	if cfg.Retries > 0 {
@@ -536,7 +536,7 @@ type noopConvictionPolicy struct{}
 // Convicted means connections are removed - we don't want that.
 // Implementats gocql.ConvictionPolicy.
 func (noopConvictionPolicy) AddFailure(err error, host *gocql.HostInfo) bool {
-	level.Error(pkgutil.Logger).Log("msg", "Cassandra host failure", "err", err, "host", host.String())
+	level.Error(util_log.Logger).Log("msg", "Cassandra host failure", "err", err, "host", host.String())
 	return false
 }
 
diff --git a/chunk_store.go b/chunk_store.go
index 15f566ffbb40d..0fc096d3c801e 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -540,7 +540,7 @@ func (c *baseStore) lookupEntriesByQueries(ctx context.Context, queries []IndexQ
 		return true
 	})
 	if err != nil {
-		level.Error(util_log.WithContext(ctx, util.Logger)).Log("msg", "error querying storage", "err", err)
+		level.Error(util_log.WithContext(ctx, util_log.Logger)).Log("msg", "error querying storage", "err", err)
 	}
 	return entries, err
 }
diff --git a/chunk_store_utils.go b/chunk_store_utils.go
index 061a9b1c638a1..aeb9048854097 100644
--- a/chunk_store_utils.go
+++ b/chunk_store_utils.go
@@ -10,7 +10,7 @@ import (
 	"github.com/prometheus/prometheus/promql"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
 
@@ -211,7 +211,7 @@ func (c *Fetcher) processCacheResponse(ctx context.Context, chunks []Chunk, keys
 			missing = append(missing, chunks[i])
 			i++
 		} else if chunkKey > keys[j] {
-			level.Warn(util.Logger).Log("msg", "got chunk from cache we didn't ask for")
+			level.Warn(util_log.Logger).Log("msg", "got chunk from cache we didn't ask for")
 			j++
 		} else {
 			requests = append(requests, decodeRequest{
diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index 6d76abf9e3d09..bbb814badadf2 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -17,7 +17,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 var (
@@ -94,7 +94,7 @@ func (b *BoltIndexClient) reload() {
 	for name := range b.dbs {
 		if _, err := os.Stat(path.Join(b.cfg.Directory, name)); err != nil && os.IsNotExist(err) {
 			removedDBs = append(removedDBs, name)
-			level.Debug(util.Logger).Log("msg", "boltdb file got removed", "filename", name)
+			level.Debug(util_log.Logger).Log("msg", "boltdb file got removed", "filename", name)
 			continue
 		}
 	}
@@ -106,7 +106,7 @@ func (b *BoltIndexClient) reload() {
 
 		for _, name := range removedDBs {
 			if err := b.dbs[name].Close(); err != nil {
-				level.Error(util.Logger).Log("msg", "failed to close removed boltdb", "filename", name, "err", err)
+				level.Error(util_log.Logger).Log("msg", "failed to close removed boltdb", "filename", name, "err", err)
 				continue
 			}
 			delete(b.dbs, name)
diff --git a/local/fs_object_client.go b/local/fs_object_client.go
index f64776943481c..ff9b5e44b2c9d 100644
--- a/local/fs_object_client.go
+++ b/local/fs_object_client.go
@@ -14,7 +14,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
-	pkgUtil "github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // FSConfig is the config for a FSObjectClient.
@@ -80,7 +80,7 @@ func (f *FSObjectClient) PutObject(_ context.Context, objectKey string, object i
 		return err
 	}
 
-	defer runutil.CloseWithLogOnErr(pkgUtil.Logger, fl, "fullPath: %s", fullPath)
+	defer runutil.CloseWithLogOnErr(util_log.Logger, fl, "fullPath: %s", fullPath)
 
 	_, err = io.Copy(fl, object)
 	if err != nil {
@@ -187,7 +187,7 @@ func (f *FSObjectClient) DeleteObject(ctx context.Context, objectKey string) err
 func (f *FSObjectClient) DeleteChunksBefore(ctx context.Context, ts time.Time) error {
 	return filepath.Walk(f.cfg.Directory, func(path string, info os.FileInfo, err error) error {
 		if !info.IsDir() && info.ModTime().Before(ts) {
-			level.Info(pkgUtil.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
+			level.Info(util_log.Logger).Log("msg", "file has exceeded the retention period, removing it", "filepath", info.Name())
 			if err := os.Remove(path); err != nil {
 				return err
 			}
diff --git a/purger/purger_test.go b/purger/purger_test.go
index 33b158d8c3595..c48398bef9b81 100644
--- a/purger/purger_test.go
+++ b/purger/purger_test.go
@@ -17,8 +17,8 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/testutils"
-	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 	"github.com/cortexproject/cortex/pkg/util/test"
 )
@@ -202,7 +202,7 @@ func TestPurger_BuildPlan(t *testing.T) {
 				require.NoError(t, err)
 
 				deleteRequest := deleteRequests[0]
-				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 
 				err = purger.buildDeletePlan(requestWithLogger)
 				require.NoError(t, err)
@@ -295,7 +295,7 @@ func TestPurger_ExecutePlan(t *testing.T) {
 				require.NoError(t, err)
 
 				deleteRequest := deleteRequests[0]
-				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+				requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 				err = purger.buildDeletePlan(requestWithLogger)
 				require.NoError(t, err)
 
@@ -346,7 +346,7 @@ func TestPurger_Restarts(t *testing.T) {
 	require.NoError(t, err)
 
 	deleteRequest := deleteRequests[0]
-	requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util.Logger)
+	requestWithLogger := makeDeleteRequestWithLogger(deleteRequest, util_log.Logger)
 	err = purger.buildDeletePlan(requestWithLogger)
 	require.NoError(t, err)
 
diff --git a/purger/request_handler.go b/purger/request_handler.go
index 0799716afad54..d8fc70d788d51 100644
--- a/purger/request_handler.go
+++ b/purger/request_handler.go
@@ -15,6 +15,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/tenant"
 	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 type deleteRequestHandlerMetrics struct {
@@ -107,7 +108,7 @@ func (dm *DeleteRequestHandler) AddDeleteRequestHandler(w http.ResponseWriter, r
 	}
 
 	if err := dm.deleteStore.AddDeleteRequest(ctx, userID, model.Time(startTime), model.Time(endTime), match); err != nil {
-		level.Error(util.Logger).Log("msg", "error adding delete request to the store", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error adding delete request to the store", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -127,13 +128,13 @@ func (dm *DeleteRequestHandler) GetAllDeleteRequestsHandler(w http.ResponseWrite
 
 	deleteRequests, err := dm.deleteStore.GetAllDeleteRequestsForUser(ctx, userID)
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "error getting delete requests from the store", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error getting delete requests from the store", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 
 	if err := json.NewEncoder(w).Encode(deleteRequests); err != nil {
-		level.Error(util.Logger).Log("msg", "error marshalling response", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error marshalling response", "err", err)
 		http.Error(w, fmt.Sprintf("Error marshalling response: %v", err), http.StatusInternalServerError)
 	}
 }
@@ -152,7 +153,7 @@ func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter
 
 	deleteRequest, err := dm.deleteStore.GetDeleteRequest(ctx, userID, requestID)
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "error getting delete request from the store", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error getting delete request from the store", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
@@ -173,7 +174,7 @@ func (dm *DeleteRequestHandler) CancelDeleteRequestHandler(w http.ResponseWriter
 	}
 
 	if err := dm.deleteStore.RemoveDeleteRequest(ctx, userID, requestID, deleteRequest.CreatedAt, deleteRequest.StartTime, deleteRequest.EndTime); err != nil {
-		level.Error(util.Logger).Log("msg", "error cancelling the delete request", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error cancelling the delete request", "err", err)
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
diff --git a/purger/tombstones.go b/purger/tombstones.go
index 73348bf40ada2..fdf2cc0914de1 100644
--- a/purger/tombstones.go
+++ b/purger/tombstones.go
@@ -15,7 +15,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql/parser"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const tombstonesReloadDuration = 5 * time.Minute
@@ -97,7 +97,7 @@ func (tl *TombstonesLoader) loop() {
 		case <-tombstonesReloadTimer.C:
 			err := tl.reloadTombstones()
 			if err != nil {
-				level.Error(util.Logger).Log("msg", "error reloading tombstones", "err", err)
+				level.Error(util_log.Logger).Log("msg", "error reloading tombstones", "err", err)
 			}
 		case <-tl.quit:
 			return
@@ -285,7 +285,7 @@ func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *ca
 		if numbers.results != "" {
 			results, err := strconv.Atoi(numbers.results)
 			if err != nil {
-				level.Error(util.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err)
+				level.Error(util_log.Logger).Log("msg", "error parsing resultsCacheGenNumber", "user", tenantID, "err", err)
 			} else if maxResults < results {
 				maxResults = results
 				result.results = numbers.results
@@ -296,7 +296,7 @@ func (tl *TombstonesLoader) getCacheGenNumbersPerTenants(tenantIDs []string) *ca
 		if numbers.store != "" {
 			store, err := strconv.Atoi(numbers.store)
 			if err != nil {
-				level.Error(util.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err)
+				level.Error(util_log.Logger).Log("msg", "error parsing storeCacheGenNumber", "user", tenantID, "err", err)
 			} else if maxStore < store {
 				maxStore = store
 				result.store = numbers.store
@@ -326,7 +326,7 @@ func (tl *TombstonesLoader) getCacheGenNumbers(userID string) *cacheGenNumbers {
 
 	genNumbers, err := tl.deleteStore.getCacheGenerationNumbers(context.Background(), userID)
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "error loading cache generation numbers", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error loading cache generation numbers", "err", err)
 		tl.metrics.cacheGenLoadFailures.Inc()
 		return &cacheGenNumbers{}
 	}
diff --git a/schema.go b/schema.go
index 441fc5f84cdd9..6f6cc8f4d3018 100644
--- a/schema.go
+++ b/schema.go
@@ -14,7 +14,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 const (
@@ -882,7 +882,7 @@ func (v10Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.Shard
 		s := strings.Split(query.HashValue, ":")[0]
 		n, err := strconv.Atoi(s)
 		if err != nil {
-			level.Error(util.Logger).Log(
+			level.Error(util_log.Logger).Log(
 				"msg",
 				"Unable to determine shard from IndexQuery",
 				"HashValue",
diff --git a/storage/factory.go b/storage/factory.go
index 6097b434b1033..310404cd13352 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -23,7 +23,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/chunk/openstack"
 	"github.com/cortexproject/cortex/pkg/chunk/purger"
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 )
 
 // Supported storage engines
@@ -105,7 +105,7 @@ func (cfg *Config) Validate() error {
 	if err := cfg.CassandraStorageConfig.Validate(); err != nil {
 		return errors.Wrap(err, "invalid Cassandra Storage config")
 	}
-	if err := cfg.GCPStorageConfig.Validate(util.Logger); err != nil {
+	if err := cfg.GCPStorageConfig.Validate(util_log.Logger); err != nil {
 		return errors.Wrap(err, "invalid GCP Storage Storage config")
 	}
 	if err := cfg.Swift.Validate(); err != nil {
@@ -222,7 +222,7 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 		}
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
-			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
+			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBIndexClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
 	case "gcp":
@@ -256,7 +256,7 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 		}
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
-			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
+			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
 	case "azure":
@@ -308,7 +308,7 @@ func NewTableClient(name string, cfg Config, registerer prometheus.Registerer) (
 		}
 		path := strings.TrimPrefix(cfg.AWSStorageConfig.DynamoDB.URL.Path, "/")
 		if len(path) > 0 {
-			level.Warn(util.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
+			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig, registerer)
 	case "gcp", "gcp-columnkey", "bigtable", "bigtable-hashed":
diff --git a/table_manager.go b/table_manager.go
index eda8a83f753fd..c4f46830471ea 100644
--- a/table_manager.go
+++ b/table_manager.go
@@ -18,7 +18,7 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/cortexproject/cortex/pkg/util"
+	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 )
 
@@ -215,7 +215,7 @@ func (m *TableManager) loop(ctx context.Context) error {
 	if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error {
 		return m.SyncTables(ctx)
 	}); err != nil {
-		level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error syncing tables", "err", err)
 	}
 
 	// Sleep for a bit to spread the sync load across different times if the tablemanagers are all started at once.
@@ -231,7 +231,7 @@ func (m *TableManager) loop(ctx context.Context) error {
 			if err := instrument.CollectedRequest(context.Background(), "TableManager.SyncTables", instrument.NewHistogramCollector(m.metrics.syncTableDuration), instrument.ErrorCode, func(ctx context.Context) error {
 				return m.SyncTables(ctx)
 			}); err != nil {
-				level.Error(util.Logger).Log("msg", "error syncing tables", "err", err)
+				level.Error(util_log.Logger).Log("msg", "error syncing tables", "err", err)
 			}
 		case <-ctx.Done():
 			return nil
@@ -254,7 +254,7 @@ func (m *TableManager) checkAndCreateExtraTables() error {
 		for _, tableDesc := range extraTables.Tables {
 			if _, ok := existingTablesMap[tableDesc.Name]; !ok {
 				// creating table
-				level.Info(util.Logger).Log("msg", "creating extra table",
+				level.Info(util_log.Logger).Log("msg", "creating extra table",
 					"tableName", tableDesc.Name,
 					"provisionedRead", tableDesc.ProvisionedRead,
 					"provisionedWrite", tableDesc.ProvisionedWrite,
@@ -272,7 +272,7 @@ func (m *TableManager) checkAndCreateExtraTables() error {
 				continue
 			}
 
-			level.Info(util.Logger).Log("msg", "checking throughput of extra table", "table", tableDesc.Name)
+			level.Info(util_log.Logger).Log("msg", "checking throughput of extra table", "table", tableDesc.Name)
 			// table already exists, lets check actual throughput for tables is same as what is in configurations, if not let us update it
 			current, _, err := extraTables.TableClient.DescribeTable(context.Background(), tableDesc.Name)
 			if err != nil {
@@ -280,7 +280,7 @@ func (m *TableManager) checkAndCreateExtraTables() error {
 			}
 
 			if !current.Equals(tableDesc) {
-				level.Info(util.Logger).Log("msg", "updating throughput of extra table",
+				level.Info(util_log.Logger).Log("msg", "updating throughput of extra table",
 					"table", tableDesc.Name,
 					"tableName", tableDesc.Name,
 					"provisionedRead", tableDesc.ProvisionedRead,
@@ -305,7 +305,7 @@ func (m *TableManager) bucketRetentionIteration(ctx context.Context) error {
 	err := m.bucketClient.DeleteChunksBefore(ctx, mtime.Now().Add(-m.cfg.RetentionPeriod))
 
 	if err != nil {
-		level.Error(util.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
+		level.Error(util_log.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
 	}
 
 	// don't return error, otherwise timer service would stop.
@@ -321,7 +321,7 @@ func (m *TableManager) SyncTables(ctx context.Context) error {
 	}
 
 	expected := m.calculateExpectedTables()
-	level.Info(util.Logger).Log("msg", "synching tables", "expected_tables", len(expected))
+	level.Info(util_log.Logger).Log("msg", "synching tables", "expected_tables", len(expected))
 
 	toCreate, toCheckThroughput, toDelete, err := m.partitionTables(ctx, expected)
 	if err != nil {
@@ -473,7 +473,7 @@ func (m *TableManager) createTables(ctx context.Context, descriptions []TableDes
 	merr := tsdb_errors.NewMulti()
 
 	for _, desc := range descriptions {
-		level.Info(util.Logger).Log("msg", "creating table", "table", desc.Name)
+		level.Info(util_log.Logger).Log("msg", "creating table", "table", desc.Name)
 		err := m.client.CreateTable(ctx, desc)
 		if err != nil {
 			numFailures++
@@ -490,12 +490,12 @@ func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDes
 	merr := tsdb_errors.NewMulti()
 
 	for _, desc := range descriptions {
-		level.Info(util.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)
+		level.Info(util_log.Logger).Log("msg", "table has exceeded the retention period", "table", desc.Name)
 		if !m.cfg.RetentionDeletesEnabled {
 			continue
 		}
 
-		level.Info(util.Logger).Log("msg", "deleting table", "table", desc.Name)
+		level.Info(util_log.Logger).Log("msg", "deleting table", "table", desc.Name)
 		err := m.client.DeleteTable(ctx, desc.Name)
 		if err != nil {
 			numFailures++
@@ -509,7 +509,7 @@ func (m *TableManager) deleteTables(ctx context.Context, descriptions []TableDes
 
 func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDesc) error {
 	for _, expected := range descriptions {
-		level.Debug(util.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
+		level.Debug(util_log.Logger).Log("msg", "checking provisioned throughput on table", "table", expected.Name)
 		current, isActive, err := m.client.DescribeTable(ctx, expected.Name)
 		if err != nil {
 			return err
@@ -523,12 +523,12 @@ func (m *TableManager) updateTables(ctx context.Context, descriptions []TableDes
 		}
 
 		if !isActive {
-			level.Info(util.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name)
+			level.Info(util_log.Logger).Log("msg", "skipping update on table, not yet ACTIVE", "table", expected.Name)
 			continue
 		}
 
 		if expected.Equals(current) {
-			level.Info(util.Logger).Log("msg", "provisioned throughput on table, skipping", "table", current.Name, "read", current.ProvisionedRead, "write", current.ProvisionedWrite)
+			level.Info(util_log.Logger).Log("msg", "provisioned throughput on table, skipping", "table", current.Name, "read", current.ProvisionedRead, "write", current.ProvisionedWrite)
 			continue
 		}
 

From 84c0916a2ea27e09e70503efe5dddd7b5c6d7391 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Thu, 11 Feb 2021 11:31:00 +0100
Subject: [PATCH 634/660] Add S3 KMS support to blocks storage client (#3810)

* Add S3 KMS support to blocks storage client

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed integration test

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Removed named return arguments from parseKMSEncryptionContext()

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Update pkg/storage/bucket/s3/config.go

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Jacob Lisi <jlisi@grafana.com>

* Rebuilt doc

Signed-off-by: Marco Pracucci <marco@pracucci.com>

Co-authored-by: Jacob Lisi <jlisi@grafana.com>
---
 aws/s3_storage_client.go | 23 ++++++++++++-----------
 aws/sse_config.go        | 32 ++++++--------------------------
 aws/sse_config_test.go   | 26 ++++++++++++++------------
 3 files changed, 32 insertions(+), 49 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 2c7ec0fa71fa9..494ec89bfa2f6 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -27,6 +27,7 @@ import (
 	"github.com/weaveworks/common/instrument"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	cortex_s3 "github.com/cortexproject/cortex/pkg/storage/bucket/s3"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 )
@@ -64,15 +65,15 @@ type S3Config struct {
 	S3ForcePathStyle bool
 
 	BucketNames      string
-	Endpoint         string     `yaml:"endpoint"`
-	Region           string     `yaml:"region"`
-	AccessKeyID      string     `yaml:"access_key_id"`
-	SecretAccessKey  string     `yaml:"secret_access_key"`
-	Insecure         bool       `yaml:"insecure"`
-	SSEEncryption    bool       `yaml:"sse_encryption"`
-	HTTPConfig       HTTPConfig `yaml:"http_config"`
-	SignatureVersion string     `yaml:"signature_version"`
-	SSEConfig        SSEConfig  `yaml:"sse"`
+	Endpoint         string              `yaml:"endpoint"`
+	Region           string              `yaml:"region"`
+	AccessKeyID      string              `yaml:"access_key_id"`
+	SecretAccessKey  string              `yaml:"secret_access_key"`
+	Insecure         bool                `yaml:"insecure"`
+	SSEEncryption    bool                `yaml:"sse_encryption"`
+	HTTPConfig       HTTPConfig          `yaml:"http_config"`
+	SignatureVersion string              `yaml:"signature_version"`
+	SSEConfig        cortex_s3.SSEConfig `yaml:"sse"`
 
 	Inject InjectRequestMiddleware `yaml:"-"`
 }
@@ -165,8 +166,8 @@ func buildSSEParsedConfig(cfg S3Config) (*SSEParsedConfig, error) {
 
 	// deprecated, but if used it assumes SSE-S3 type
 	if cfg.SSEEncryption {
-		return NewSSEParsedConfig(SSEConfig{
-			Type: SSES3,
+		return NewSSEParsedConfig(cortex_s3.SSEConfig{
+			Type: cortex_s3.SSES3,
 		})
 	}
 
diff --git a/aws/sse_config.go b/aws/sse_config.go
index b62000fdc4d49..172534a1f5429 100644
--- a/aws/sse_config.go
+++ b/aws/sse_config.go
@@ -3,20 +3,15 @@ package aws
 import (
 	"encoding/base64"
 	"encoding/json"
-	"flag"
 
 	"github.com/pkg/errors"
+
+	cortex_s3 "github.com/cortexproject/cortex/pkg/storage/bucket/s3"
 )
 
 const (
-	// SSEKMS config type constant to configure S3 server side encryption using KMS
-	// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html
-	SSEKMS     = "SSE-KMS"
 	sseKMSType = "aws:kms"
-	// SSES3 config type constant to configure S3 server side encryption with AES-256
-	// https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html
-	SSES3     = "SSE-S3"
-	sseS3Type = "AES256"
+	sseS3Type  = "AES256"
 )
 
 // SSEParsedConfig configures server side encryption (SSE)
@@ -27,29 +22,14 @@ type SSEParsedConfig struct {
 	KMSEncryptionContext *string
 }
 
-// SSEConfig configures S3 server side encryption
-// struct that is going to receive user input (through config file or CLI)
-type SSEConfig struct {
-	Type                 string `yaml:"type"`
-	KMSKeyID             string `yaml:"kms_key_id"`
-	KMSEncryptionContext string `yaml:"kms_encryption_context"`
-}
-
-// RegisterFlagsWithPrefix adds the flags required to config this to the given FlagSet
-func (cfg *SSEConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
-	f.StringVar(&cfg.Type, prefix+"type", "", "Enable AWS Server Side Encryption. Only SSE-S3 and SSE-KMS are supported")
-	f.StringVar(&cfg.KMSKeyID, prefix+"kms-key-id", "", "KMS Key ID used to encrypt objects in S3")
-	f.StringVar(&cfg.KMSEncryptionContext, prefix+"kms-encryption-context", "", "KMS Encryption Context used for object encryption. It expects a JSON as a string.")
-}
-
 // NewSSEParsedConfig creates a struct to configure server side encryption (SSE)
-func NewSSEParsedConfig(cfg SSEConfig) (*SSEParsedConfig, error) {
+func NewSSEParsedConfig(cfg cortex_s3.SSEConfig) (*SSEParsedConfig, error) {
 	switch cfg.Type {
-	case SSES3:
+	case cortex_s3.SSES3:
 		return &SSEParsedConfig{
 			ServerSideEncryption: sseS3Type,
 		}, nil
-	case SSEKMS:
+	case cortex_s3.SSEKMS:
 		if cfg.KMSKeyID == "" {
 			return nil, errors.New("KMS key id must be passed when SSE-KMS encryption is selected")
 		}
diff --git a/aws/sse_config_test.go b/aws/sse_config_test.go
index 7c6cfc4247f4b..95fe153157370 100644
--- a/aws/sse_config_test.go
+++ b/aws/sse_config_test.go
@@ -5,6 +5,8 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/stretchr/testify/assert"
+
+	cortex_s3 "github.com/cortexproject/cortex/pkg/storage/bucket/s3"
 )
 
 func TestNewSSEParsedConfig(t *testing.T) {
@@ -15,14 +17,14 @@ func TestNewSSEParsedConfig(t *testing.T) {
 
 	tests := []struct {
 		name        string
-		params      SSEConfig
+		params      cortex_s3.SSEConfig
 		expected    *SSEParsedConfig
 		expectedErr error
 	}{
 		{
 			name: "Test SSE encryption with SSES3 type",
-			params: SSEConfig{
-				Type: SSES3,
+			params: cortex_s3.SSEConfig{
+				Type: cortex_s3.SSES3,
 			},
 			expected: &SSEParsedConfig{
 				ServerSideEncryption: sseS3Type,
@@ -30,8 +32,8 @@ func TestNewSSEParsedConfig(t *testing.T) {
 		},
 		{
 			name: "Test SSE encryption with SSEKMS type without context",
-			params: SSEConfig{
-				Type:     SSEKMS,
+			params: cortex_s3.SSEConfig{
+				Type:     cortex_s3.SSEKMS,
 				KMSKeyID: kmsKeyID,
 			},
 			expected: &SSEParsedConfig{
@@ -41,8 +43,8 @@ func TestNewSSEParsedConfig(t *testing.T) {
 		},
 		{
 			name: "Test SSE encryption with SSEKMS type with context",
-			params: SSEConfig{
-				Type:                 SSEKMS,
+			params: cortex_s3.SSEConfig{
+				Type:                 cortex_s3.SSEKMS,
 				KMSKeyID:             kmsKeyID,
 				KMSEncryptionContext: kmsEncryptionContext,
 			},
@@ -54,23 +56,23 @@ func TestNewSSEParsedConfig(t *testing.T) {
 		},
 		{
 			name: "Test invalid SSE type",
-			params: SSEConfig{
+			params: cortex_s3.SSEConfig{
 				Type: "invalid",
 			},
 			expectedErr: errors.New("SSE type is empty or invalid"),
 		},
 		{
 			name: "Test SSE encryption with SSEKMS type without KMS Key ID",
-			params: SSEConfig{
-				Type:     SSEKMS,
+			params: cortex_s3.SSEConfig{
+				Type:     cortex_s3.SSEKMS,
 				KMSKeyID: "",
 			},
 			expectedErr: errors.New("KMS key id must be passed when SSE-KMS encryption is selected"),
 		},
 		{
 			name: "Test SSE with invalid KMS encryption context JSON",
-			params: SSEConfig{
-				Type:                 SSEKMS,
+			params: cortex_s3.SSEConfig{
+				Type:                 cortex_s3.SSEKMS,
 				KMSKeyID:             kmsKeyID,
 				KMSEncryptionContext: `INVALID_JSON`,
 			},

From 348d6d39cf1ad5d110ad8120b2ae06d3dd32e37b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 15 Feb 2021 13:36:11 +0100
Subject: [PATCH 635/660] Moved common protobuf code to new cortexpb package
 and introduced `distributor.Distributor` service name. (#3798)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Moved common protobuf code to new cortexpb package.
Created distributorpb package, and moved cortex.IngesterOnlyPush to distributor.Distributor.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Comment every deprecated element individually to get warnings for all of them.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Ignore lint warnings from deprecated.go.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix lint warnings.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Verify that type alias for cortexpb.Sample uses correct functions for json marshalling.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/delete_plan.pb.go | 69 ++++++++++++++++++++--------------------
 purger/delete_plan.proto |  4 +--
 2 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/purger/delete_plan.pb.go b/purger/delete_plan.pb.go
index 336634b1c3e0c..5646b2b4eb674 100644
--- a/purger/delete_plan.pb.go
+++ b/purger/delete_plan.pb.go
@@ -5,8 +5,8 @@ package purger
 
 import (
 	fmt "fmt"
-	_ "github.com/cortexproject/cortex/pkg/ingester/client"
-	github_com_cortexproject_cortex_pkg_ingester_client "github.com/cortexproject/cortex/pkg/ingester/client"
+	_ "github.com/cortexproject/cortex/pkg/cortexpb"
+	github_com_cortexproject_cortex_pkg_cortexpb "github.com/cortexproject/cortex/pkg/cortexpb"
 	_ "github.com/gogo/protobuf/gogoproto"
 	proto "github.com/gogo/protobuf/proto"
 	io "io"
@@ -82,8 +82,8 @@ func (m *DeletePlan) GetChunksGroup() []ChunksGroup {
 
 // ChunksGroup holds ChunkDetails and Labels for a group of chunks which have same series ID
 type ChunksGroup struct {
-	Labels []github_com_cortexproject_cortex_pkg_ingester_client.LabelAdapter `protobuf:"bytes,1,rep,name=labels,proto3,customtype=github.com/cortexproject/cortex/pkg/ingester/client.LabelAdapter" json:"labels"`
-	Chunks []ChunkDetails                                                     `protobuf:"bytes,2,rep,name=chunks,proto3" json:"chunks"`
+	Labels []github_com_cortexproject_cortex_pkg_cortexpb.LabelAdapter `protobuf:"bytes,1,rep,name=labels,proto3,customtype=github.com/cortexproject/cortex/pkg/cortexpb.LabelAdapter" json:"labels"`
+	Chunks []ChunkDetails                                              `protobuf:"bytes,2,rep,name=chunks,proto3" json:"chunks"`
 }
 
 func (m *ChunksGroup) Reset()      { *m = ChunksGroup{} }
@@ -237,36 +237,35 @@ func init() {
 func init() { proto.RegisterFile("delete_plan.proto", fileDescriptor_c38868cf63b27372) }
 
 var fileDescriptor_c38868cf63b27372 = []byte{
-	// 454 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x52, 0x31, 0x6f, 0xd3, 0x40,
-	0x18, 0xf5, 0xb9, 0x28, 0xa2, 0x97, 0x50, 0xb5, 0x87, 0x04, 0x51, 0x86, 0x6b, 0x95, 0x29, 0x03,
-	0xd8, 0x52, 0x11, 0x12, 0x03, 0x12, 0x10, 0x22, 0x41, 0x24, 0x90, 0x8a, 0xc5, 0xc4, 0x62, 0x9d,
-	0xed, 0x0f, 0xd7, 0xf4, 0xe2, 0x3b, 0xee, 0xce, 0x08, 0x36, 0x36, 0x56, 0x7e, 0x06, 0x7f, 0x81,
-	0x7f, 0xd0, 0x31, 0x63, 0xc5, 0x50, 0x11, 0x67, 0x61, 0xec, 0x4f, 0x40, 0x3e, 0xdb, 0x89, 0x41,
-	0x62, 0x61, 0xf3, 0xfb, 0xde, 0xfb, 0xde, 0xbd, 0x7b, 0x3e, 0x7c, 0x90, 0x00, 0x07, 0x03, 0xa1,
-	0xe4, 0x2c, 0xf7, 0xa4, 0x12, 0x46, 0x90, 0x5d, 0x59, 0xa8, 0x14, 0xaa, 0xc1, 0xe8, 0x6e, 0x9a,
-	0x99, 0xd3, 0x22, 0xf2, 0x62, 0xb1, 0xf0, 0x53, 0x91, 0x0a, 0xdf, 0x2a, 0xa2, 0xe2, 0xad, 0x45,
-	0x16, 0xd8, 0xaf, 0x7a, 0x73, 0xf4, 0xb8, 0x23, 0x8f, 0x85, 0x32, 0xf0, 0x51, 0x2a, 0xf1, 0x0e,
-	0x62, 0xd3, 0x20, 0x5f, 0x9e, 0xa5, 0x7e, 0x96, 0xa7, 0xa0, 0x0d, 0x28, 0x3f, 0xe6, 0x19, 0xe4,
-	0x2d, 0x55, 0x3b, 0x8c, 0xbf, 0x20, 0x8c, 0x67, 0x36, 0xd1, 0x09, 0x67, 0x39, 0x79, 0x80, 0x6f,
-	0x54, 0x39, 0xc2, 0x2c, 0x37, 0xa0, 0x3e, 0x30, 0x3e, 0x44, 0x47, 0x68, 0xd2, 0x3f, 0xbe, 0xe9,
-	0x6d, 0x22, 0x7a, 0xf3, 0x86, 0x0a, 0x06, 0x15, 0x6c, 0x11, 0x79, 0x84, 0x07, 0xf1, 0x69, 0x91,
-	0x9f, 0xe9, 0x30, 0x55, 0xa2, 0x90, 0x43, 0xf7, 0x68, 0x67, 0xd2, 0x3f, 0xbe, 0xd5, 0x59, 0x7c,
-	0x6a, 0xe9, 0x67, 0x15, 0x3b, 0xbd, 0x76, 0x7e, 0x79, 0xe8, 0x04, 0xfd, 0x78, 0x3b, 0x1a, 0x7f,
-	0x47, 0xb8, 0xdf, 0x91, 0x10, 0x8d, 0x7b, 0x9c, 0x45, 0xc0, 0xf5, 0x10, 0x59, 0xab, 0x03, 0xaf,
-	0x09, 0xfe, 0xa2, 0x9a, 0x9e, 0xb0, 0x4c, 0x4d, 0x9f, 0x57, 0x2e, 0x3f, 0x2e, 0x0f, 0xff, 0xa7,
-	0x86, 0xda, 0xe6, 0x49, 0xc2, 0xa4, 0x01, 0x15, 0x34, 0x47, 0x91, 0xfb, 0xb8, 0x57, 0x67, 0x6a,
-	0xf2, 0xdf, 0xfe, 0x3b, 0xff, 0x0c, 0x0c, 0xcb, 0xb8, 0x6e, 0x2e, 0xd0, 0x88, 0xc7, 0xef, 0xf1,
-	0xa0, 0xcb, 0x92, 0x3d, 0xec, 0xce, 0x67, 0xb6, 0xbb, 0xdd, 0xc0, 0x9d, 0xcf, 0xc8, 0x2b, 0x3c,
-	0x92, 0x4c, 0x99, 0x8c, 0x71, 0xfe, 0x29, 0xac, 0x1f, 0x40, 0xb2, 0xed, 0xd8, 0xfd, 0x77, 0xc7,
-	0xc3, 0xcd, 0x5a, 0xfd, 0x93, 0x92, 0x96, 0x19, 0x47, 0xf8, 0xfa, 0xa6, 0xfb, 0x3b, 0x98, 0x68,
-	0xc3, 0x94, 0x09, 0x4d, 0xb6, 0x00, 0x6d, 0xd8, 0x42, 0x86, 0x0b, 0x6d, 0x8f, 0xdf, 0x09, 0xf6,
-	0x2d, 0xf3, 0xba, 0x25, 0x5e, 0x6a, 0x32, 0xc1, 0xfb, 0x90, 0x27, 0x7f, 0x6a, 0x5d, 0xab, 0xdd,
-	0x83, 0x3c, 0xe9, 0x28, 0xa7, 0x0f, 0x97, 0x2b, 0xea, 0x5c, 0xac, 0xa8, 0x73, 0xb5, 0xa2, 0xe8,
-	0x73, 0x49, 0xd1, 0xb7, 0x92, 0xa2, 0xf3, 0x92, 0xa2, 0x65, 0x49, 0xd1, 0xcf, 0x92, 0xa2, 0x5f,
-	0x25, 0x75, 0xae, 0x4a, 0x8a, 0xbe, 0xae, 0xa9, 0xb3, 0x5c, 0x53, 0xe7, 0x62, 0x4d, 0x9d, 0x37,
-	0x3d, 0x7b, 0x0f, 0x15, 0xf5, 0xec, 0x0b, 0xbb, 0xf7, 0x3b, 0x00, 0x00, 0xff, 0xff, 0x03, 0x08,
-	0xac, 0x7f, 0xf2, 0x02, 0x00, 0x00,
+	// 446 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x52, 0x41, 0x8b, 0xd4, 0x30,
+	0x18, 0x6d, 0xba, 0x52, 0xdc, 0x74, 0x5c, 0xd6, 0x2c, 0x68, 0x99, 0x43, 0x76, 0xe9, 0x69, 0x0e,
+	0xda, 0x81, 0x15, 0x41, 0x41, 0x90, 0x1d, 0x0b, 0x32, 0xa0, 0xb0, 0x16, 0x4f, 0x5e, 0x4a, 0xda,
+	0xc6, 0x6e, 0xdd, 0xb4, 0x89, 0x69, 0x2a, 0x7a, 0xf3, 0xe6, 0xd5, 0x9f, 0xe1, 0x0f, 0xf0, 0x47,
+	0xec, 0x71, 0x8e, 0x8b, 0x87, 0xc1, 0xe9, 0x5c, 0x3c, 0xce, 0x4f, 0x90, 0xa6, 0xed, 0x4c, 0x15,
+	0x3c, 0x78, 0xcb, 0xfb, 0xde, 0x7b, 0xc9, 0xcb, 0x4b, 0xe0, 0xed, 0x84, 0x32, 0xaa, 0x68, 0x28,
+	0x18, 0x29, 0x3c, 0x21, 0xb9, 0xe2, 0x68, 0x5f, 0x54, 0x32, 0xa5, 0xcd, 0x60, 0x7c, 0x3f, 0xcd,
+	0xd4, 0x45, 0x15, 0x79, 0x31, 0xcf, 0xa7, 0x29, 0x4f, 0xf9, 0x54, 0x2b, 0xa2, 0xea, 0xad, 0x46,
+	0x1a, 0xe8, 0x55, 0xeb, 0x1c, 0x3f, 0x1e, 0xc8, 0x63, 0x2e, 0x15, 0xfd, 0x28, 0x24, 0x7f, 0x47,
+	0x63, 0xd5, 0xa1, 0xa9, 0xb8, 0x4c, 0x7b, 0x22, 0xea, 0x16, 0xad, 0xd5, 0xfd, 0x02, 0x20, 0xf4,
+	0x75, 0x94, 0x73, 0x46, 0x0a, 0xf4, 0x08, 0xde, 0x6a, 0x02, 0x84, 0x59, 0xa1, 0xa8, 0xfc, 0x40,
+	0x98, 0x03, 0x4e, 0xc0, 0xc4, 0x3e, 0x3d, 0xf2, 0xb6, 0xd9, 0xbc, 0x79, 0x47, 0x05, 0xa3, 0x06,
+	0xf6, 0x08, 0x3d, 0x85, 0xa3, 0xf8, 0xa2, 0x2a, 0x2e, 0xcb, 0x30, 0x95, 0xbc, 0x12, 0x8e, 0x79,
+	0xb2, 0x37, 0xb1, 0x4f, 0xef, 0x0c, 0x8c, 0xcf, 0x34, 0xfd, 0xbc, 0x61, 0x67, 0x37, 0xae, 0x96,
+	0xc7, 0x46, 0x60, 0xc7, 0xbb, 0x91, 0xfb, 0x1d, 0x40, 0x7b, 0x20, 0x41, 0x05, 0xb4, 0x18, 0x89,
+	0x28, 0x2b, 0x1d, 0xa0, 0xb7, 0x3a, 0xf2, 0xfa, 0x1b, 0x78, 0x2f, 0x9a, 0xf9, 0x39, 0xc9, 0xe4,
+	0xec, 0xac, 0xd9, 0xe7, 0xc7, 0xf2, 0xf8, 0xbf, 0x1a, 0x68, 0xfd, 0x67, 0x09, 0x11, 0x8a, 0xca,
+	0xa0, 0x3b, 0x05, 0x3d, 0x84, 0x56, 0x1b, 0xa7, 0x8b, 0x7e, 0xf7, 0xef, 0xe8, 0x3e, 0x55, 0x24,
+	0x63, 0x65, 0x97, 0xbd, 0x13, 0xbb, 0xef, 0xe1, 0x68, 0xc8, 0xa2, 0x03, 0x68, 0xce, 0x7d, 0x5d,
+	0xdb, 0x7e, 0x60, 0xce, 0x7d, 0xf4, 0x0a, 0x8e, 0x05, 0x91, 0x2a, 0x23, 0x8c, 0x7d, 0x0a, 0xdb,
+	0x47, 0x4f, 0x76, 0xf5, 0x9a, 0xff, 0xae, 0xd7, 0xd9, 0xda, 0xda, 0xf7, 0x49, 0x7a, 0xc6, 0x8d,
+	0xe0, 0xcd, 0x6d, 0xed, 0xf7, 0x20, 0x2a, 0x15, 0x91, 0x2a, 0x54, 0x59, 0x4e, 0x4b, 0x45, 0x72,
+	0x11, 0xe6, 0xa5, 0x3e, 0x7e, 0x2f, 0x38, 0xd4, 0xcc, 0xeb, 0x9e, 0x78, 0x59, 0xa2, 0x09, 0x3c,
+	0xa4, 0x45, 0xf2, 0xa7, 0xd6, 0xd4, 0xda, 0x03, 0x5a, 0x24, 0x03, 0xe5, 0xec, 0xc9, 0x62, 0x85,
+	0x8d, 0xeb, 0x15, 0x36, 0x36, 0x2b, 0x0c, 0x3e, 0xd7, 0x18, 0x7c, 0xab, 0x31, 0xb8, 0xaa, 0x31,
+	0x58, 0xd4, 0x18, 0xfc, 0xac, 0x31, 0xf8, 0x55, 0x63, 0x63, 0x53, 0x63, 0xf0, 0x75, 0x8d, 0x8d,
+	0xc5, 0x1a, 0x1b, 0xd7, 0x6b, 0x6c, 0xbc, 0xb1, 0xf4, 0x3d, 0x64, 0x64, 0xe9, 0xcf, 0xf5, 0xe0,
+	0x77, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf5, 0x46, 0x96, 0xf6, 0xe6, 0x02, 0x00, 0x00,
 }
 
 func (this *DeletePlan) Equal(that interface{}) bool {
@@ -967,7 +966,7 @@ func (m *ChunksGroup) Unmarshal(dAtA []byte) error {
 			if postIndex > l {
 				return io.ErrUnexpectedEOF
 			}
-			m.Labels = append(m.Labels, github_com_cortexproject_cortex_pkg_ingester_client.LabelAdapter{})
+			m.Labels = append(m.Labels, github_com_cortexproject_cortex_pkg_cortexpb.LabelAdapter{})
 			if err := m.Labels[len(m.Labels)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil {
 				return err
 			}
diff --git a/purger/delete_plan.proto b/purger/delete_plan.proto
index 2eaf1182103b0..834fc087498ca 100644
--- a/purger/delete_plan.proto
+++ b/purger/delete_plan.proto
@@ -5,7 +5,7 @@ package purgeplan;
 option go_package = "purger";
 
 import "github.com/gogo/protobuf/gogoproto/gogo.proto";
-import "github.com/cortexproject/cortex/pkg/ingester/client/cortex.proto";
+import "github.com/cortexproject/cortex/pkg/cortexpb/cortex.proto";
 
 option (gogoproto.marshaler_all) = true;
 option (gogoproto.unmarshaler_all) = true;
@@ -19,7 +19,7 @@ message DeletePlan {
 
 // ChunksGroup holds ChunkDetails and Labels for a group of chunks which have same series ID
 message ChunksGroup {
-  repeated cortex.LabelPair labels = 1 [(gogoproto.nullable) = false, (gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/ingester/client.LabelAdapter"];
+  repeated cortexpb.LabelPair labels = 1 [(gogoproto.nullable) = false, (gogoproto.customtype) = "github.com/cortexproject/cortex/pkg/cortexpb.LabelAdapter"];
   repeated ChunkDetails chunks = 2 [(gogoproto.nullable) = false];
 }
 

From 038c5af6635267f6516ec11fb3d902d41a7e859b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 15 Feb 2021 14:40:40 +0100
Subject: [PATCH 636/660] Tenant deletion: ruler support. (#3750)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Tenant deletion: ruler support.

Ruler now deletes local files for users that it no longer should process rules for.

Tenant deletion API now works with rule groups directly (deletes, lists).

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Make support for rule deletion optional.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Review feedback.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Auto-detect if rule store supports modifications, and only clean rules if it does.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Add test for read-only rule store.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix docs.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix tests.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix changelog.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Fix comment.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>

* Disable rule groups deletion in purger, if storage is not configured.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 inmemory_storage_client.go       |  19 +++++
 purger/blocks_purger_api.go      |  63 ++++++++++++++--
 purger/blocks_purger_api_test.go | 122 ++++++++++++++++++++++++++++++-
 3 files changed, 193 insertions(+), 11 deletions(-)

diff --git a/inmemory_storage_client.go b/inmemory_storage_client.go
index e9ae3282e1ffd..793bbf3a8fce0 100644
--- a/inmemory_storage_client.go
+++ b/inmemory_storage_client.go
@@ -55,6 +55,25 @@ func NewMockStorage() *MockStorage {
 	}
 }
 
+func (m *MockStorage) GetSortedObjectKeys() []string {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	keys := make([]string, 0, len(m.objects))
+	for k := range m.objects {
+		keys = append(keys, k)
+	}
+	sort.Strings(keys)
+	return keys
+}
+
+func (m *MockStorage) GetObjectCount() int {
+	m.mtx.RLock()
+	defer m.mtx.RUnlock()
+
+	return len(m.objects)
+}
+
 // Stop doesn't do anything.
 func (*MockStorage) Stop() {
 }
diff --git a/purger/blocks_purger_api.go b/purger/blocks_purger_api.go
index 930eb24c4e7a8..048c292a33174 100644
--- a/purger/blocks_purger_api.go
+++ b/purger/blocks_purger_api.go
@@ -13,6 +13,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/thanos-io/thanos/pkg/objstore"
 
+	"github.com/cortexproject/cortex/pkg/ruler/rules"
 	"github.com/cortexproject/cortex/pkg/storage/bucket"
 	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
 	"github.com/cortexproject/cortex/pkg/tenant"
@@ -21,20 +22,21 @@ import (
 
 type BlocksPurgerAPI struct {
 	bucketClient objstore.Bucket
+	ruleStore    rules.RuleStore
 	logger       log.Logger
 }
 
-func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
+func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
 	bucketClient, err := createBucketClient(storageCfg, logger, reg)
 	if err != nil {
 		return nil, err
 	}
 
-	return newBlocksPurgerAPI(bucketClient, logger), nil
+	return newBlocksPurgerAPI(bucketClient, ruleStore, logger), nil
 }
 
-func newBlocksPurgerAPI(bkt objstore.Bucket, logger log.Logger) *BlocksPurgerAPI {
-	return &BlocksPurgerAPI{bucketClient: bkt, logger: logger}
+func newBlocksPurgerAPI(bkt objstore.Bucket, ruleStore rules.RuleStore, logger log.Logger) *BlocksPurgerAPI {
+	return &BlocksPurgerAPI{bucketClient: bkt, ruleStore: ruleStore, logger: logger}
 }
 
 func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) {
@@ -47,19 +49,45 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request)
 
 	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID, cortex_tsdb.NewTenantDeletionMark(time.Now()))
 	if err != nil {
+		level.Error(api.logger).Log("msg", "failed to write tenant deletion mark", "user", userID, "err", err)
+
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
 
-	level.Info(api.logger).Log("msg", "tenant deletion marker created", "user", userID)
+	level.Info(api.logger).Log("msg", "tenant deletion mark in blocks storage created", "user", userID)
+
+	if api.ruleStore != nil {
+		err := api.deleteRules(r.Context(), userID)
+		if err != nil {
+			level.Error(api.logger).Log("msg", "failed to delete tenant rule groups", "user", userID, "err", err)
+			http.Error(w, errors.Wrapf(err, "failed to delete tenant rule groups").Error(), http.StatusInternalServerError)
+			return
+		}
+	}
 
 	w.WriteHeader(http.StatusOK)
 }
 
+func (api *BlocksPurgerAPI) deleteRules(ctx context.Context, userID string) error {
+	if !api.ruleStore.SupportsModifications() {
+		level.Warn(api.logger).Log("msg", "cannot delete tenant rule groups, using read-only rule store", "user", userID)
+		return nil
+	}
+
+	err := api.ruleStore.DeleteNamespace(ctx, userID, "") // Empty namespace = delete all rule groups.
+	if err != nil && !errors.Is(err, rules.ErrGroupNamespaceNotFound) {
+		return err
+	}
+
+	level.Info(api.logger).Log("msg", "deleted all tenant rule groups", "user", userID)
+	return nil
+}
+
 type DeleteTenantStatusResponse struct {
 	TenantID                  string `json:"tenant_id"`
 	BlocksDeleted             bool   `json:"blocks_deleted"`
-	RuleGroupsDeleted         bool   `json:"rule_groups_deleted,omitempty"`          // Not yet supported.
+	RuleGroupsDeleted         bool   `json:"rule_groups_deleted"`
 	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted,omitempty"` // Not yet supported.
 }
 
@@ -73,8 +101,13 @@ func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Re
 
 	result := DeleteTenantStatusResponse{}
 	result.TenantID = userID
-	result.BlocksDeleted, err = api.checkBlocksForUser(ctx, userID)
+	result.BlocksDeleted, err = api.isBlocksForUserDeleted(ctx, userID)
+	if err != nil {
+		http.Error(w, err.Error(), http.StatusInternalServerError)
+		return
+	}
 
+	result.RuleGroupsDeleted, err = api.isRulesForUserDeleted(ctx, userID)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
@@ -83,7 +116,21 @@ func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Re
 	util.WriteJSONResponse(w, result)
 }
 
-func (api *BlocksPurgerAPI) checkBlocksForUser(ctx context.Context, userID string) (bool, error) {
+func (api *BlocksPurgerAPI) isRulesForUserDeleted(ctx context.Context, userID string) (bool, error) {
+	if api.ruleStore == nil {
+		// If purger doesn't have access to rule store, then we cannot say that rules have been deleted.
+		return false, nil
+	}
+
+	list, err := api.ruleStore.ListRuleGroupsForUserAndNamespace(ctx, userID, "")
+	if err != nil {
+		return false, errors.Wrap(err, "failed to list rule groups for tenant")
+	}
+
+	return len(list) == 0, nil
+}
+
+func (api *BlocksPurgerAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
 	var errBlockFound = errors.New("block found")
 
 	userBucket := bucket.NewUserBucketClient(userID, api.bucketClient)
diff --git a/purger/blocks_purger_api_test.go b/purger/blocks_purger_api_test.go
index 8de16431bf834..1a96885e5a451 100644
--- a/purger/blocks_purger_api_test.go
+++ b/purger/blocks_purger_api_test.go
@@ -3,22 +3,27 @@ package purger
 import (
 	"bytes"
 	"context"
+	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"path"
 	"testing"
 
 	"github.com/go-kit/kit/log"
+	"github.com/prometheus/prometheus/pkg/rulefmt"
 	"github.com/stretchr/testify/require"
 	"github.com/thanos-io/thanos/pkg/objstore"
 	"github.com/weaveworks/common/user"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/ruler/rules"
+	"github.com/cortexproject/cortex/pkg/ruler/rules/objectclient"
 	"github.com/cortexproject/cortex/pkg/storage/tsdb"
 )
 
 func TestDeleteTenant(t *testing.T) {
 	bkt := objstore.NewInMemBucket()
-	api := newBlocksPurgerAPI(bkt, log.NewNopLogger())
+	api := newBlocksPurgerAPI(bkt, nil, log.NewNopLogger())
 
 	{
 		resp := httptest.NewRecorder()
@@ -80,11 +85,122 @@ func TestDeleteTenantStatus(t *testing.T) {
 				require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
 			}
 
-			api := newBlocksPurgerAPI(bkt, log.NewNopLogger())
+			api := newBlocksPurgerAPI(bkt, nil, log.NewNopLogger())
 
-			res, err := api.checkBlocksForUser(context.Background(), username)
+			res, err := api.isBlocksForUserDeleted(context.Background(), username)
 			require.NoError(t, err)
 			require.Equal(t, tc.expectedBlocksDeleted, res)
 		})
 	}
 }
+
+func TestDeleteTenantRuleGroups(t *testing.T) {
+	ruleGroups := []ruleGroupKey{
+		{user: "userA", namespace: "namespace", group: "group"},
+		{user: "userB", namespace: "namespace1", group: "group"},
+		{user: "userB", namespace: "namespace2", group: "group"},
+	}
+
+	obj, rs := setupRuleGroupsStore(t, ruleGroups)
+	require.Equal(t, 3, obj.GetObjectCount())
+
+	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), rs, log.NewNopLogger())
+
+	{
+		callDeleteTenantAPI(t, api, "user-with-no-rule-groups")
+		require.Equal(t, 3, obj.GetObjectCount())
+
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false)
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
+	}
+
+	{
+		callDeleteTenantAPI(t, api, "userA")
+		require.Equal(t, 2, obj.GetObjectCount())
+
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true)                    // Just deleted.
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
+	}
+
+	{
+		callDeleteTenantAPI(t, api, "userB")
+		require.Equal(t, 0, obj.GetObjectCount())
+
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true)                    // Deleted previously
+		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", true)                    // Just deleted
+	}
+}
+
+func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
+	ruleGroups := []ruleGroupKey{
+		{user: "userA", namespace: "namespace", group: "group"},
+		{user: "userB", namespace: "namespace1", group: "group"},
+		{user: "userB", namespace: "namespace2", group: "group"},
+	}
+
+	obj, rs := setupRuleGroupsStore(t, ruleGroups)
+	require.Equal(t, 3, obj.GetObjectCount())
+
+	rs = &readOnlyRuleStore{RuleStore: rs}
+
+	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), rs, log.NewNopLogger())
+
+	// Make sure there is no error reported.
+	callDeleteTenantAPI(t, api, "userA")
+	require.Equal(t, 3, obj.GetObjectCount())
+
+	verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false) // Cannot delete from read-only store.
+	verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
+}
+
+func callDeleteTenantAPI(t *testing.T, api *BlocksPurgerAPI, userID string) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+
+	req := &http.Request{}
+	resp := httptest.NewRecorder()
+	api.DeleteTenant(resp, req.WithContext(ctx))
+
+	require.Equal(t, http.StatusOK, resp.Code)
+}
+
+func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *BlocksPurgerAPI, userID string, expected bool) {
+	ctx := user.InjectOrgID(context.Background(), userID)
+
+	req := &http.Request{}
+	resp := httptest.NewRecorder()
+	api.DeleteTenantStatus(resp, req.WithContext(ctx))
+
+	require.Equal(t, http.StatusOK, resp.Code)
+
+	deleteResp := &DeleteTenantStatusResponse{}
+	require.NoError(t, json.Unmarshal(resp.Body.Bytes(), deleteResp))
+	require.Equal(t, expected, deleteResp.RuleGroupsDeleted)
+}
+
+func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rules.RuleStore) {
+	obj := chunk.NewMockStorage()
+	rs := objectclient.NewRuleStore(obj, 5)
+
+	// "upload" rule groups
+	for _, key := range ruleGroups {
+		desc := rules.ToProto(key.user, key.namespace, rulefmt.RuleGroup{Name: key.group})
+		require.NoError(t, rs.SetRuleGroup(context.Background(), key.user, key.namespace, desc))
+	}
+
+	return obj, rs
+}
+
+type ruleGroupKey struct {
+	user, namespace, group string
+}
+
+type readOnlyRuleStore struct {
+	rules.RuleStore
+}
+
+func (r *readOnlyRuleStore) SupportsModifications() bool {
+	return false
+}

From f6e3b2908c2071dd5c8ef655bdc72bc1e381f65e Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 22 Feb 2021 14:02:35 +0100
Subject: [PATCH 637/660] Add per-tenant S3 KMS SSE encryption key support to
 blocks storage (#3811)

* Allow to override S3 SSE KMS key and encryption context on a per-tenant basis.

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed unit tests and rolledback unnecessary changes

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Allow to override SSE type too

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Added guide

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed CHANGELOG and updated CLI flag description

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Updated CHANGELOG entry

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 purger/blocks_purger_api.go      | 18 ++++++++++++------
 purger/blocks_purger_api_test.go |  8 ++++----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/purger/blocks_purger_api.go b/purger/blocks_purger_api.go
index 048c292a33174..3fa51f7648c9a 100644
--- a/purger/blocks_purger_api.go
+++ b/purger/blocks_purger_api.go
@@ -24,19 +24,25 @@ type BlocksPurgerAPI struct {
 	bucketClient objstore.Bucket
 	ruleStore    rules.RuleStore
 	logger       log.Logger
+	cfgProvider  bucket.TenantConfigProvider
 }
 
-func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
+func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
 	bucketClient, err := createBucketClient(storageCfg, logger, reg)
 	if err != nil {
 		return nil, err
 	}
 
-	return newBlocksPurgerAPI(bucketClient, ruleStore, logger), nil
+	return newBlocksPurgerAPI(bucketClient, cfgProvider, ruleStore, logger), nil
 }
 
-func newBlocksPurgerAPI(bkt objstore.Bucket, ruleStore rules.RuleStore, logger log.Logger) *BlocksPurgerAPI {
-	return &BlocksPurgerAPI{bucketClient: bkt, ruleStore: ruleStore, logger: logger}
+func newBlocksPurgerAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger) *BlocksPurgerAPI {
+	return &BlocksPurgerAPI{
+		bucketClient: bkt,
+		ruleStore:    ruleStore,
+		cfgProvider:  cfgProvider,
+		logger:       logger,
+	}
 }
 
 func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) {
@@ -47,7 +53,7 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request)
 		return
 	}
 
-	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID, cortex_tsdb.NewTenantDeletionMark(time.Now()))
+	err = cortex_tsdb.WriteTenantDeletionMark(r.Context(), api.bucketClient, userID, api.cfgProvider, cortex_tsdb.NewTenantDeletionMark(time.Now()))
 	if err != nil {
 		level.Error(api.logger).Log("msg", "failed to write tenant deletion mark", "user", userID, "err", err)
 
@@ -133,7 +139,7 @@ func (api *BlocksPurgerAPI) isRulesForUserDeleted(ctx context.Context, userID st
 func (api *BlocksPurgerAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
 	var errBlockFound = errors.New("block found")
 
-	userBucket := bucket.NewUserBucketClient(userID, api.bucketClient)
+	userBucket := bucket.NewUserBucketClient(userID, api.bucketClient, api.cfgProvider)
 	err := userBucket.Iter(ctx, "", func(s string) error {
 		s = strings.TrimSuffix(s, "/")
 
diff --git a/purger/blocks_purger_api_test.go b/purger/blocks_purger_api_test.go
index 1a96885e5a451..a095a136150b3 100644
--- a/purger/blocks_purger_api_test.go
+++ b/purger/blocks_purger_api_test.go
@@ -23,7 +23,7 @@ import (
 
 func TestDeleteTenant(t *testing.T) {
 	bkt := objstore.NewInMemBucket()
-	api := newBlocksPurgerAPI(bkt, nil, log.NewNopLogger())
+	api := newBlocksPurgerAPI(bkt, nil, nil, log.NewNopLogger())
 
 	{
 		resp := httptest.NewRecorder()
@@ -85,7 +85,7 @@ func TestDeleteTenantStatus(t *testing.T) {
 				require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
 			}
 
-			api := newBlocksPurgerAPI(bkt, nil, log.NewNopLogger())
+			api := newBlocksPurgerAPI(bkt, nil, nil, log.NewNopLogger())
 
 			res, err := api.isBlocksForUserDeleted(context.Background(), username)
 			require.NoError(t, err)
@@ -104,7 +104,7 @@ func TestDeleteTenantRuleGroups(t *testing.T) {
 	obj, rs := setupRuleGroupsStore(t, ruleGroups)
 	require.Equal(t, 3, obj.GetObjectCount())
 
-	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), rs, log.NewNopLogger())
+	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
 
 	{
 		callDeleteTenantAPI(t, api, "user-with-no-rule-groups")
@@ -146,7 +146,7 @@ func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
 
 	rs = &readOnlyRuleStore{RuleStore: rs}
 
-	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), rs, log.NewNopLogger())
+	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
 
 	// Make sure there is no error reported.
 	callDeleteTenantAPI(t, api, "userA")

From 8d69016151321343ede98882e01ad6fff9b086ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Mon, 22 Feb 2021 16:13:51 +0100
Subject: [PATCH 638/660] Rename Blocks Purger to Tenant Deletion. (#3852)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Rename Blocks Purger to Tenant Deletion.

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 ...s_purger_api.go => tenant_deletion_api.go} | 22 +++++++++----------
 ...pi_test.go => tenant_deletion_api_test.go} | 12 +++++-----
 2 files changed, 17 insertions(+), 17 deletions(-)
 rename purger/{blocks_purger_api.go => tenant_deletion_api.go} (80%)
 rename purger/{blocks_purger_api_test.go => tenant_deletion_api_test.go} (91%)

diff --git a/purger/blocks_purger_api.go b/purger/tenant_deletion_api.go
similarity index 80%
rename from purger/blocks_purger_api.go
rename to purger/tenant_deletion_api.go
index 3fa51f7648c9a..794ea3af425d4 100644
--- a/purger/blocks_purger_api.go
+++ b/purger/tenant_deletion_api.go
@@ -20,24 +20,24 @@ import (
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
-type BlocksPurgerAPI struct {
+type TenantDeletionAPI struct {
 	bucketClient objstore.Bucket
 	ruleStore    rules.RuleStore
 	logger       log.Logger
 	cfgProvider  bucket.TenantConfigProvider
 }
 
-func NewBlocksPurgerAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*BlocksPurgerAPI, error) {
+func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
 	bucketClient, err := createBucketClient(storageCfg, logger, reg)
 	if err != nil {
 		return nil, err
 	}
 
-	return newBlocksPurgerAPI(bucketClient, cfgProvider, ruleStore, logger), nil
+	return newTenantDeletionAPI(bucketClient, cfgProvider, ruleStore, logger), nil
 }
 
-func newBlocksPurgerAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger) *BlocksPurgerAPI {
-	return &BlocksPurgerAPI{
+func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger) *TenantDeletionAPI {
+	return &TenantDeletionAPI{
 		bucketClient: bkt,
 		ruleStore:    ruleStore,
 		cfgProvider:  cfgProvider,
@@ -45,7 +45,7 @@ func newBlocksPurgerAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProv
 	}
 }
 
-func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) {
+func (api *TenantDeletionAPI) DeleteTenant(w http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
 	userID, err := tenant.TenantID(ctx)
 	if err != nil {
@@ -75,7 +75,7 @@ func (api *BlocksPurgerAPI) DeleteTenant(w http.ResponseWriter, r *http.Request)
 	w.WriteHeader(http.StatusOK)
 }
 
-func (api *BlocksPurgerAPI) deleteRules(ctx context.Context, userID string) error {
+func (api *TenantDeletionAPI) deleteRules(ctx context.Context, userID string) error {
 	if !api.ruleStore.SupportsModifications() {
 		level.Warn(api.logger).Log("msg", "cannot delete tenant rule groups, using read-only rule store", "user", userID)
 		return nil
@@ -97,7 +97,7 @@ type DeleteTenantStatusResponse struct {
 	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted,omitempty"` // Not yet supported.
 }
 
-func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {
+func (api *TenantDeletionAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {
 	ctx := r.Context()
 	userID, err := tenant.TenantID(ctx)
 	if err != nil {
@@ -122,9 +122,9 @@ func (api *BlocksPurgerAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Re
 	util.WriteJSONResponse(w, result)
 }
 
-func (api *BlocksPurgerAPI) isRulesForUserDeleted(ctx context.Context, userID string) (bool, error) {
+func (api *TenantDeletionAPI) isRulesForUserDeleted(ctx context.Context, userID string) (bool, error) {
 	if api.ruleStore == nil {
-		// If purger doesn't have access to rule store, then we cannot say that rules have been deleted.
+		// If API doesn't have access to rule store, then we cannot say that rules have been deleted.
 		return false, nil
 	}
 
@@ -136,7 +136,7 @@ func (api *BlocksPurgerAPI) isRulesForUserDeleted(ctx context.Context, userID st
 	return len(list) == 0, nil
 }
 
-func (api *BlocksPurgerAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
+func (api *TenantDeletionAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
 	var errBlockFound = errors.New("block found")
 
 	userBucket := bucket.NewUserBucketClient(userID, api.bucketClient, api.cfgProvider)
diff --git a/purger/blocks_purger_api_test.go b/purger/tenant_deletion_api_test.go
similarity index 91%
rename from purger/blocks_purger_api_test.go
rename to purger/tenant_deletion_api_test.go
index a095a136150b3..82d4999d6a4ed 100644
--- a/purger/blocks_purger_api_test.go
+++ b/purger/tenant_deletion_api_test.go
@@ -23,7 +23,7 @@ import (
 
 func TestDeleteTenant(t *testing.T) {
 	bkt := objstore.NewInMemBucket()
-	api := newBlocksPurgerAPI(bkt, nil, nil, log.NewNopLogger())
+	api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
 
 	{
 		resp := httptest.NewRecorder()
@@ -85,7 +85,7 @@ func TestDeleteTenantStatus(t *testing.T) {
 				require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
 			}
 
-			api := newBlocksPurgerAPI(bkt, nil, nil, log.NewNopLogger())
+			api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
 
 			res, err := api.isBlocksForUserDeleted(context.Background(), username)
 			require.NoError(t, err)
@@ -104,7 +104,7 @@ func TestDeleteTenantRuleGroups(t *testing.T) {
 	obj, rs := setupRuleGroupsStore(t, ruleGroups)
 	require.Equal(t, 3, obj.GetObjectCount())
 
-	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
+	api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
 
 	{
 		callDeleteTenantAPI(t, api, "user-with-no-rule-groups")
@@ -146,7 +146,7 @@ func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
 
 	rs = &readOnlyRuleStore{RuleStore: rs}
 
-	api := newBlocksPurgerAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
+	api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
 
 	// Make sure there is no error reported.
 	callDeleteTenantAPI(t, api, "userA")
@@ -156,7 +156,7 @@ func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
 	verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
 }
 
-func callDeleteTenantAPI(t *testing.T, api *BlocksPurgerAPI, userID string) {
+func callDeleteTenantAPI(t *testing.T, api *TenantDeletionAPI, userID string) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 
 	req := &http.Request{}
@@ -166,7 +166,7 @@ func callDeleteTenantAPI(t *testing.T, api *BlocksPurgerAPI, userID string) {
 	require.Equal(t, http.StatusOK, resp.Code)
 }
 
-func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *BlocksPurgerAPI, userID string, expected bool) {
+func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *TenantDeletionAPI, userID string, expected bool) {
 	ctx := user.InjectOrgID(context.Background(), userID)
 
 	req := &http.Request{}

From 270b37e27e28a9a3c80b686e72089552d5648d90 Mon Sep 17 00:00:00 2001
From: Jacob Lisi <jacob.t.lisi@gmail.com>
Date: Tue, 23 Feb 2021 13:36:08 -0500
Subject: [PATCH 639/660] chore: remove global logger from the ruler package
 (#3820)

* chore: remove global logger from the ruler package

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>

* fix rebase issues

Signed-off-by: Jacob Lisi <jacob.t.lisi@gmail.com>
---
 purger/tenant_deletion_api_test.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/purger/tenant_deletion_api_test.go b/purger/tenant_deletion_api_test.go
index 82d4999d6a4ed..c3002ccd233a4 100644
--- a/purger/tenant_deletion_api_test.go
+++ b/purger/tenant_deletion_api_test.go
@@ -182,7 +182,7 @@ func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *TenantDeletionAPI
 
 func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rules.RuleStore) {
 	obj := chunk.NewMockStorage()
-	rs := objectclient.NewRuleStore(obj, 5)
+	rs := objectclient.NewRuleStore(obj, 5, log.NewNopLogger())
 
 	// "upload" rule groups
 	for _, key := range ruleGroups {

From e28f61c72c2c9e5c3d728ad7bb801ef5a22bdbc5 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Mon, 1 Mar 2021 14:27:35 +0100
Subject: [PATCH 640/660] Refactor rulerstore packages (#3887)

* Moved bucketclient implementation to pkg/ruler/rulestore/bucketclient

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Moved local and objectclient rule store under pkg/ruler/rulestore/...

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Renamed NewRuleStorage() to NewLegacyRuleStore()

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Renamed pkg/ruler/rules to pkg/ruler/rulespb

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Rebuilt pkg/ruler/ruler.proto

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed comment

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed comment

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed import

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 purger/tenant_deletion_api.go      | 10 +++++-----
 purger/tenant_deletion_api_test.go | 11 ++++++-----
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/purger/tenant_deletion_api.go b/purger/tenant_deletion_api.go
index 794ea3af425d4..71c9ed52ece0f 100644
--- a/purger/tenant_deletion_api.go
+++ b/purger/tenant_deletion_api.go
@@ -13,7 +13,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/thanos-io/thanos/pkg/objstore"
 
-	"github.com/cortexproject/cortex/pkg/ruler/rules"
+	"github.com/cortexproject/cortex/pkg/ruler/rulestore"
 	"github.com/cortexproject/cortex/pkg/storage/bucket"
 	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
 	"github.com/cortexproject/cortex/pkg/tenant"
@@ -22,12 +22,12 @@ import (
 
 type TenantDeletionAPI struct {
 	bucketClient objstore.Bucket
-	ruleStore    rules.RuleStore
+	ruleStore    rulestore.RuleStore
 	logger       log.Logger
 	cfgProvider  bucket.TenantConfigProvider
 }
 
-func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
+func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
 	bucketClient, err := createBucketClient(storageCfg, logger, reg)
 	if err != nil {
 		return nil, err
@@ -36,7 +36,7 @@ func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvide
 	return newTenantDeletionAPI(bucketClient, cfgProvider, ruleStore, logger), nil
 }
 
-func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rules.RuleStore, logger log.Logger) *TenantDeletionAPI {
+func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger) *TenantDeletionAPI {
 	return &TenantDeletionAPI{
 		bucketClient: bkt,
 		ruleStore:    ruleStore,
@@ -82,7 +82,7 @@ func (api *TenantDeletionAPI) deleteRules(ctx context.Context, userID string) er
 	}
 
 	err := api.ruleStore.DeleteNamespace(ctx, userID, "") // Empty namespace = delete all rule groups.
-	if err != nil && !errors.Is(err, rules.ErrGroupNamespaceNotFound) {
+	if err != nil && !errors.Is(err, rulestore.ErrGroupNamespaceNotFound) {
 		return err
 	}
 
diff --git a/purger/tenant_deletion_api_test.go b/purger/tenant_deletion_api_test.go
index c3002ccd233a4..b0319a343ee31 100644
--- a/purger/tenant_deletion_api_test.go
+++ b/purger/tenant_deletion_api_test.go
@@ -16,8 +16,9 @@ import (
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/ruler/rules"
-	"github.com/cortexproject/cortex/pkg/ruler/rules/objectclient"
+	"github.com/cortexproject/cortex/pkg/ruler/rulespb"
+	"github.com/cortexproject/cortex/pkg/ruler/rulestore"
+	"github.com/cortexproject/cortex/pkg/ruler/rulestore/objectclient"
 	"github.com/cortexproject/cortex/pkg/storage/tsdb"
 )
 
@@ -180,13 +181,13 @@ func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *TenantDeletionAPI
 	require.Equal(t, expected, deleteResp.RuleGroupsDeleted)
 }
 
-func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rules.RuleStore) {
+func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rulestore.RuleStore) {
 	obj := chunk.NewMockStorage()
 	rs := objectclient.NewRuleStore(obj, 5, log.NewNopLogger())
 
 	// "upload" rule groups
 	for _, key := range ruleGroups {
-		desc := rules.ToProto(key.user, key.namespace, rulefmt.RuleGroup{Name: key.group})
+		desc := rulespb.ToProto(key.user, key.namespace, rulefmt.RuleGroup{Name: key.group})
 		require.NoError(t, rs.SetRuleGroup(context.Background(), key.user, key.namespace, desc))
 	}
 
@@ -198,7 +199,7 @@ type ruleGroupKey struct {
 }
 
 type readOnlyRuleStore struct {
-	rules.RuleStore
+	rulestore.RuleStore
 }
 
 func (r *readOnlyRuleStore) SupportsModifications() bool {

From fc38d4df16568b22930b81478fbe4f9062f84fb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Thu, 4 Mar 2021 10:49:35 +0100
Subject: [PATCH 641/660] Stream chunks from blocks-ingester to querier (#3889)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 encoding/chunk_test.go       |  11 ++-
 encoding/factory.go          |   8 ++
 encoding/prometheus_chunk.go | 151 +++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 4 deletions(-)
 create mode 100644 encoding/prometheus_chunk.go

diff --git a/encoding/chunk_test.go b/encoding/chunk_test.go
index 5b06cd3e0e311..d2d00f5f54cc3 100644
--- a/encoding/chunk_test.go
+++ b/encoding/chunk_test.go
@@ -29,7 +29,7 @@ import (
 
 func TestLen(t *testing.T) {
 	chunks := []Chunk{}
-	for _, encoding := range []Encoding{DoubleDelta, Varbit, Bigchunk} {
+	for _, encoding := range []Encoding{DoubleDelta, Varbit, Bigchunk, PrometheusXorChunk} {
 		c, err := NewForEncoding(encoding)
 		if err != nil {
 			t.Fatal(err)
@@ -63,6 +63,7 @@ func TestChunk(t *testing.T) {
 		{DoubleDelta, 989},
 		{Varbit, 2048},
 		{Bigchunk, 4096},
+		{PrometheusXorChunk, 2048},
 	} {
 		for samples := tc.maxSamples / 10; samples < tc.maxSamples; samples += tc.maxSamples / 10 {
 
@@ -87,9 +88,11 @@ func TestChunk(t *testing.T) {
 				testChunkBatch(t, tc.encoding, samples)
 			})
 
-			t.Run(fmt.Sprintf("testChunkRebound/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
-				testChunkRebound(t, tc.encoding, samples)
-			})
+			if tc.encoding != PrometheusXorChunk {
+				t.Run(fmt.Sprintf("testChunkRebound/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
+					testChunkRebound(t, tc.encoding, samples)
+				})
+			}
 		}
 	}
 }
diff --git a/encoding/factory.go b/encoding/factory.go
index adf3a0b18240e..efd88b9c2b1eb 100644
--- a/encoding/factory.go
+++ b/encoding/factory.go
@@ -52,6 +52,8 @@ const (
 	Varbit
 	// Bigchunk encoding
 	Bigchunk
+	// PrometheusXorChunk is a wrapper around Prometheus XOR-encoded chunk.
+	PrometheusXorChunk
 )
 
 type encoding struct {
@@ -78,6 +80,12 @@ var encodings = map[Encoding]encoding{
 			return newBigchunk()
 		},
 	},
+	PrometheusXorChunk: {
+		Name: "PrometheusXorChunk",
+		New: func() Chunk {
+			return newPrometheusXorChunk()
+		},
+	},
 }
 
 // Set implements flag.Value.
diff --git a/encoding/prometheus_chunk.go b/encoding/prometheus_chunk.go
new file mode 100644
index 0000000000000..00ccb44dfbb58
--- /dev/null
+++ b/encoding/prometheus_chunk.go
@@ -0,0 +1,151 @@
+package encoding
+
+import (
+	"io"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/tsdb/chunkenc"
+)
+
+// Wrapper around Prometheus chunk.
+type prometheusXorChunk struct {
+	chunk chunkenc.Chunk
+}
+
+func newPrometheusXorChunk() *prometheusXorChunk {
+	return &prometheusXorChunk{}
+}
+
+// Add adds another sample to the chunk. While Add works, it is only implemented
+// to make tests work, and should not be used in production. In particular, it appends
+// all samples to single chunk, and uses new Appender for each Add.
+func (p *prometheusXorChunk) Add(m model.SamplePair) (Chunk, error) {
+	if p.chunk == nil {
+		p.chunk = chunkenc.NewXORChunk()
+	}
+
+	app, err := p.chunk.Appender()
+	if err != nil {
+		return nil, err
+	}
+
+	app.Append(int64(m.Timestamp), float64(m.Value))
+	return nil, nil
+}
+
+func (p *prometheusXorChunk) NewIterator(iterator Iterator) Iterator {
+	if p.chunk == nil {
+		return errorIterator("Prometheus chunk is not set")
+	}
+
+	if pit, ok := iterator.(*prometheusChunkIterator); ok {
+		pit.c = p.chunk
+		pit.it = p.chunk.Iterator(pit.it)
+		return pit
+	}
+
+	return &prometheusChunkIterator{c: p.chunk, it: p.chunk.Iterator(nil)}
+}
+
+func (p *prometheusXorChunk) Marshal(i io.Writer) error {
+	if p.chunk == nil {
+		return errors.New("chunk data not set")
+	}
+	_, err := i.Write(p.chunk.Bytes())
+	return err
+}
+
+func (p *prometheusXorChunk) UnmarshalFromBuf(bytes []byte) error {
+	c, err := chunkenc.FromData(chunkenc.EncXOR, bytes)
+	if err != nil {
+		return errors.Wrap(err, "failed to create Prometheus chunk from bytes")
+	}
+
+	p.chunk = c
+	return nil
+}
+
+func (p *prometheusXorChunk) Encoding() Encoding {
+	return PrometheusXorChunk
+}
+
+func (p *prometheusXorChunk) Utilization() float64 {
+	// Used for reporting when chunk is used to store new data.
+	return 0
+}
+
+func (p *prometheusXorChunk) Slice(_, _ model.Time) Chunk {
+	return p
+}
+
+func (p *prometheusXorChunk) Rebound(from, to model.Time) (Chunk, error) {
+	return nil, errors.New("Rebound not supported by PrometheusXorChunk")
+}
+
+func (p *prometheusXorChunk) Len() int {
+	if p.chunk == nil {
+		return 0
+	}
+	return p.chunk.NumSamples()
+}
+
+func (p *prometheusXorChunk) Size() int {
+	if p.chunk == nil {
+		return 0
+	}
+	return len(p.chunk.Bytes())
+}
+
+type prometheusChunkIterator struct {
+	c  chunkenc.Chunk // we need chunk, because FindAtOrAfter needs to start with fresh iterator.
+	it chunkenc.Iterator
+}
+
+func (p *prometheusChunkIterator) Scan() bool {
+	return p.it.Next()
+}
+
+func (p *prometheusChunkIterator) FindAtOrAfter(time model.Time) bool {
+	// FindAtOrAfter must return OLDEST value at given time. That means we need to start with a fresh iterator,
+	// otherwise we cannot guarantee OLDEST.
+	p.it = p.c.Iterator(p.it)
+	return p.it.Seek(int64(time))
+}
+
+func (p *prometheusChunkIterator) Value() model.SamplePair {
+	ts, val := p.it.At()
+	return model.SamplePair{
+		Timestamp: model.Time(ts),
+		Value:     model.SampleValue(val),
+	}
+}
+
+func (p *prometheusChunkIterator) Batch(size int) Batch {
+	var batch Batch
+	j := 0
+	for j < size {
+		t, v := p.it.At()
+		batch.Timestamps[j] = t
+		batch.Values[j] = v
+		j++
+		if j < size && !p.it.Next() {
+			break
+		}
+	}
+	batch.Index = 0
+	batch.Length = j
+	return batch
+}
+
+func (p *prometheusChunkIterator) Err() error {
+	return p.it.Err()
+}
+
+type errorIterator string
+
+func (e errorIterator) Scan() bool                         { return false }
+func (e errorIterator) FindAtOrAfter(time model.Time) bool { return false }
+func (e errorIterator) Value() model.SamplePair            { panic("no values") }
+func (e errorIterator) Batch(size int) Batch               { panic("no values") }
+func (e errorIterator) Err() error                         { return errors.New(string(e)) }

From 808c79bf3a8ae46f6cb2b97dfef3076661468576 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= <peter.stibrany@grafana.com>
Date: Fri, 5 Mar 2021 16:55:46 +0100
Subject: [PATCH 642/660] Move rules deletion from Purger to Ruler API. (#3899)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 purger/tenant_deletion_api.go      |  63 ++-------------
 purger/tenant_deletion_api_test.go | 123 +----------------------------
 2 files changed, 11 insertions(+), 175 deletions(-)

diff --git a/purger/tenant_deletion_api.go b/purger/tenant_deletion_api.go
index 71c9ed52ece0f..0babd0abbfa48 100644
--- a/purger/tenant_deletion_api.go
+++ b/purger/tenant_deletion_api.go
@@ -13,7 +13,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/thanos-io/thanos/pkg/objstore"
 
-	"github.com/cortexproject/cortex/pkg/ruler/rulestore"
 	"github.com/cortexproject/cortex/pkg/storage/bucket"
 	cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb"
 	"github.com/cortexproject/cortex/pkg/tenant"
@@ -22,24 +21,22 @@ import (
 
 type TenantDeletionAPI struct {
 	bucketClient objstore.Bucket
-	ruleStore    rulestore.RuleStore
 	logger       log.Logger
 	cfgProvider  bucket.TenantConfigProvider
 }
 
-func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
+func NewTenantDeletionAPI(storageCfg cortex_tsdb.BlocksStorageConfig, cfgProvider bucket.TenantConfigProvider, logger log.Logger, reg prometheus.Registerer) (*TenantDeletionAPI, error) {
 	bucketClient, err := createBucketClient(storageCfg, logger, reg)
 	if err != nil {
 		return nil, err
 	}
 
-	return newTenantDeletionAPI(bucketClient, cfgProvider, ruleStore, logger), nil
+	return newTenantDeletionAPI(bucketClient, cfgProvider, logger), nil
 }
 
-func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, ruleStore rulestore.RuleStore, logger log.Logger) *TenantDeletionAPI {
+func newTenantDeletionAPI(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, logger log.Logger) *TenantDeletionAPI {
 	return &TenantDeletionAPI{
 		bucketClient: bkt,
-		ruleStore:    ruleStore,
 		cfgProvider:  cfgProvider,
 		logger:       logger,
 	}
@@ -49,7 +46,9 @@ func (api *TenantDeletionAPI) DeleteTenant(w http.ResponseWriter, r *http.Reques
 	ctx := r.Context()
 	userID, err := tenant.TenantID(ctx)
 	if err != nil {
-		http.Error(w, err.Error(), http.StatusBadRequest)
+		// When Cortex is running, it uses Auth Middleware for checking X-Scope-OrgID and injecting tenant into context.
+		// Auth Middleware sends http.StatusUnauthorized if X-Scope-OrgID is missing, so we do too here, for consistency.
+		http.Error(w, err.Error(), http.StatusUnauthorized)
 		return
 	}
 
@@ -63,38 +62,12 @@ func (api *TenantDeletionAPI) DeleteTenant(w http.ResponseWriter, r *http.Reques
 
 	level.Info(api.logger).Log("msg", "tenant deletion mark in blocks storage created", "user", userID)
 
-	if api.ruleStore != nil {
-		err := api.deleteRules(r.Context(), userID)
-		if err != nil {
-			level.Error(api.logger).Log("msg", "failed to delete tenant rule groups", "user", userID, "err", err)
-			http.Error(w, errors.Wrapf(err, "failed to delete tenant rule groups").Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-
 	w.WriteHeader(http.StatusOK)
 }
 
-func (api *TenantDeletionAPI) deleteRules(ctx context.Context, userID string) error {
-	if !api.ruleStore.SupportsModifications() {
-		level.Warn(api.logger).Log("msg", "cannot delete tenant rule groups, using read-only rule store", "user", userID)
-		return nil
-	}
-
-	err := api.ruleStore.DeleteNamespace(ctx, userID, "") // Empty namespace = delete all rule groups.
-	if err != nil && !errors.Is(err, rulestore.ErrGroupNamespaceNotFound) {
-		return err
-	}
-
-	level.Info(api.logger).Log("msg", "deleted all tenant rule groups", "user", userID)
-	return nil
-}
-
 type DeleteTenantStatusResponse struct {
-	TenantID                  string `json:"tenant_id"`
-	BlocksDeleted             bool   `json:"blocks_deleted"`
-	RuleGroupsDeleted         bool   `json:"rule_groups_deleted"`
-	AlertManagerConfigDeleted bool   `json:"alert_manager_config_deleted,omitempty"` // Not yet supported.
+	TenantID      string `json:"tenant_id"`
+	BlocksDeleted bool   `json:"blocks_deleted"`
 }
 
 func (api *TenantDeletionAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.Request) {
@@ -113,29 +86,9 @@ func (api *TenantDeletionAPI) DeleteTenantStatus(w http.ResponseWriter, r *http.
 		return
 	}
 
-	result.RuleGroupsDeleted, err = api.isRulesForUserDeleted(ctx, userID)
-	if err != nil {
-		http.Error(w, err.Error(), http.StatusInternalServerError)
-		return
-	}
-
 	util.WriteJSONResponse(w, result)
 }
 
-func (api *TenantDeletionAPI) isRulesForUserDeleted(ctx context.Context, userID string) (bool, error) {
-	if api.ruleStore == nil {
-		// If API doesn't have access to rule store, then we cannot say that rules have been deleted.
-		return false, nil
-	}
-
-	list, err := api.ruleStore.ListRuleGroupsForUserAndNamespace(ctx, userID, "")
-	if err != nil {
-		return false, errors.Wrap(err, "failed to list rule groups for tenant")
-	}
-
-	return len(list) == 0, nil
-}
-
 func (api *TenantDeletionAPI) isBlocksForUserDeleted(ctx context.Context, userID string) (bool, error) {
 	var errBlockFound = errors.New("block found")
 
diff --git a/purger/tenant_deletion_api_test.go b/purger/tenant_deletion_api_test.go
index b0319a343ee31..ae8686c17140b 100644
--- a/purger/tenant_deletion_api_test.go
+++ b/purger/tenant_deletion_api_test.go
@@ -3,33 +3,27 @@ package purger
 import (
 	"bytes"
 	"context"
-	"encoding/json"
 	"net/http"
 	"net/http/httptest"
 	"path"
 	"testing"
 
 	"github.com/go-kit/kit/log"
-	"github.com/prometheus/prometheus/pkg/rulefmt"
 	"github.com/stretchr/testify/require"
 	"github.com/thanos-io/thanos/pkg/objstore"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/ruler/rulespb"
-	"github.com/cortexproject/cortex/pkg/ruler/rulestore"
-	"github.com/cortexproject/cortex/pkg/ruler/rulestore/objectclient"
 	"github.com/cortexproject/cortex/pkg/storage/tsdb"
 )
 
 func TestDeleteTenant(t *testing.T) {
 	bkt := objstore.NewInMemBucket()
-	api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
+	api := newTenantDeletionAPI(bkt, nil, log.NewNopLogger())
 
 	{
 		resp := httptest.NewRecorder()
 		api.DeleteTenant(resp, &http.Request{})
-		require.Equal(t, http.StatusBadRequest, resp.Code)
+		require.Equal(t, http.StatusUnauthorized, resp.Code)
 	}
 
 	{
@@ -86,7 +80,7 @@ func TestDeleteTenantStatus(t *testing.T) {
 				require.NoError(t, bkt.Upload(context.Background(), objName, bytes.NewReader(data)))
 			}
 
-			api := newTenantDeletionAPI(bkt, nil, nil, log.NewNopLogger())
+			api := newTenantDeletionAPI(bkt, nil, log.NewNopLogger())
 
 			res, err := api.isBlocksForUserDeleted(context.Background(), username)
 			require.NoError(t, err)
@@ -94,114 +88,3 @@ func TestDeleteTenantStatus(t *testing.T) {
 		})
 	}
 }
-
-func TestDeleteTenantRuleGroups(t *testing.T) {
-	ruleGroups := []ruleGroupKey{
-		{user: "userA", namespace: "namespace", group: "group"},
-		{user: "userB", namespace: "namespace1", group: "group"},
-		{user: "userB", namespace: "namespace2", group: "group"},
-	}
-
-	obj, rs := setupRuleGroupsStore(t, ruleGroups)
-	require.Equal(t, 3, obj.GetObjectCount())
-
-	api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
-
-	{
-		callDeleteTenantAPI(t, api, "user-with-no-rule-groups")
-		require.Equal(t, 3, obj.GetObjectCount())
-
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false)
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
-	}
-
-	{
-		callDeleteTenantAPI(t, api, "userA")
-		require.Equal(t, 2, obj.GetObjectCount())
-
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true)                    // Just deleted.
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
-	}
-
-	{
-		callDeleteTenantAPI(t, api, "userB")
-		require.Equal(t, 0, obj.GetObjectCount())
-
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "user-with-no-rule-groups", true) // Has no rule groups
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", true)                    // Deleted previously
-		verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", true)                    // Just deleted
-	}
-}
-
-func TestDeleteTenantRuleGroupsWithReadOnlyStore(t *testing.T) {
-	ruleGroups := []ruleGroupKey{
-		{user: "userA", namespace: "namespace", group: "group"},
-		{user: "userB", namespace: "namespace1", group: "group"},
-		{user: "userB", namespace: "namespace2", group: "group"},
-	}
-
-	obj, rs := setupRuleGroupsStore(t, ruleGroups)
-	require.Equal(t, 3, obj.GetObjectCount())
-
-	rs = &readOnlyRuleStore{RuleStore: rs}
-
-	api := newTenantDeletionAPI(objstore.NewInMemBucket(), nil, rs, log.NewNopLogger())
-
-	// Make sure there is no error reported.
-	callDeleteTenantAPI(t, api, "userA")
-	require.Equal(t, 3, obj.GetObjectCount())
-
-	verifyExpectedDeletedRuleGroupsForUser(t, api, "userA", false) // Cannot delete from read-only store.
-	verifyExpectedDeletedRuleGroupsForUser(t, api, "userB", false)
-}
-
-func callDeleteTenantAPI(t *testing.T, api *TenantDeletionAPI, userID string) {
-	ctx := user.InjectOrgID(context.Background(), userID)
-
-	req := &http.Request{}
-	resp := httptest.NewRecorder()
-	api.DeleteTenant(resp, req.WithContext(ctx))
-
-	require.Equal(t, http.StatusOK, resp.Code)
-}
-
-func verifyExpectedDeletedRuleGroupsForUser(t *testing.T, api *TenantDeletionAPI, userID string, expected bool) {
-	ctx := user.InjectOrgID(context.Background(), userID)
-
-	req := &http.Request{}
-	resp := httptest.NewRecorder()
-	api.DeleteTenantStatus(resp, req.WithContext(ctx))
-
-	require.Equal(t, http.StatusOK, resp.Code)
-
-	deleteResp := &DeleteTenantStatusResponse{}
-	require.NoError(t, json.Unmarshal(resp.Body.Bytes(), deleteResp))
-	require.Equal(t, expected, deleteResp.RuleGroupsDeleted)
-}
-
-func setupRuleGroupsStore(t *testing.T, ruleGroups []ruleGroupKey) (*chunk.MockStorage, rulestore.RuleStore) {
-	obj := chunk.NewMockStorage()
-	rs := objectclient.NewRuleStore(obj, 5, log.NewNopLogger())
-
-	// "upload" rule groups
-	for _, key := range ruleGroups {
-		desc := rulespb.ToProto(key.user, key.namespace, rulefmt.RuleGroup{Name: key.group})
-		require.NoError(t, rs.SetRuleGroup(context.Background(), key.user, key.namespace, desc))
-	}
-
-	return obj, rs
-}
-
-type ruleGroupKey struct {
-	user, namespace, group string
-}
-
-type readOnlyRuleStore struct {
-	rulestore.RuleStore
-}
-
-func (r *readOnlyRuleStore) SupportsModifications() bool {
-	return false
-}

From 8861c2f1a1f4b94faffc725803e756ea6879fb6c Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 10 Mar 2021 14:32:28 +0100
Subject: [PATCH 643/660] Removed usage of deprecated pkg/ingester/client stuff
 (#3915)

* Removed usage of deprecated pkg/ingester/client stuff

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review comments

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 purger/purger.go | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/purger/purger.go b/purger/purger.go
index 42a222803fcc5..faa62b1ebabc0 100644
--- a/purger/purger.go
+++ b/purger/purger.go
@@ -21,7 +21,7 @@ import (
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/cortexpb"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 )
@@ -364,7 +364,7 @@ func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Lo
 			}
 
 			err = p.chunkStore.DeleteChunk(ctx, chunkRef.From, chunkRef.Through, chunkRef.UserID,
-				chunkDetails.ID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels), partiallyDeletedInterval)
+				chunkDetails.ID, cortexpb.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels), partiallyDeletedInterval)
 			if err != nil {
 				if isMissingChunkErr(err) {
 					level.Error(logger).Log("msg", "chunk not found for deletion. We may have already deleted it",
@@ -379,7 +379,7 @@ func (p *Purger) executePlan(userID, requestID string, planNo int, logger log.Lo
 
 		// this is mostly required to clean up series ids from series store
 		err := p.chunkStore.DeleteSeriesIDs(ctx, model.Time(plan.PlanInterval.StartTimestampMs), model.Time(plan.PlanInterval.EndTimestampMs),
-			userID, client.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels))
+			userID, cortexpb.FromLabelAdaptersToLabels(plan.ChunksGroup[i].Labels))
 		if err != nil {
 			return err
 		}
@@ -692,7 +692,7 @@ func groupChunks(chunks []chunk.Chunk, deleteFrom, deleteThrough model.Time, inc
 		metricString := chk.Metric.String()
 		group, ok := metricToChunks[metricString]
 		if !ok {
-			group = ChunksGroup{Labels: client.FromLabelsToLabelAdapters(chk.Metric)}
+			group = ChunksGroup{Labels: cortexpb.FromLabelsToLabelAdapters(chk.Metric)}
 		}
 
 		chunkDetails := ChunkDetails{ID: chunkID}

From a3e2c60e9fd172374402c1059097c45336394297 Mon Sep 17 00:00:00 2001
From: Ben Ye <yb532204897@gmail.com>
Date: Thu, 11 Mar 2021 02:29:43 -0500
Subject: [PATCH 644/660] Support memcached max_item_size (#3929)

* support memcached max_item_size configuration

Signed-off-by: yeya24 <yb532204897@gmail.com>

* add new metric

Signed-off-by: yeya24 <yb532204897@gmail.com>

* dont cast to uint

Signed-off-by: yeya24 <yb532204897@gmail.com>
---
 cache/memcached_client.go | 43 ++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/cache/memcached_client.go b/cache/memcached_client.go
index b0826d9bfd820..021e02404abcb 100644
--- a/cache/memcached_client.go
+++ b/cache/memcached_client.go
@@ -52,10 +52,13 @@ type memcachedClient struct {
 	cbTimeout  time.Duration
 	cbInterval time.Duration
 
+	maxItemSize int
+
 	quit chan struct{}
 	wait sync.WaitGroup
 
 	numServers prometheus.Gauge
+	skipped    prometheus.Counter
 
 	logger log.Logger
 }
@@ -67,6 +70,7 @@ type MemcachedClientConfig struct {
 	Addresses      string        `yaml:"addresses"` // EXPERIMENTAL.
 	Timeout        time.Duration `yaml:"timeout"`
 	MaxIdleConns   int           `yaml:"max_idle_conns"`
+	MaxItemSize    int           `yaml:"max_item_size"`
 	UpdateInterval time.Duration `yaml:"update_interval"`
 	ConsistentHash bool          `yaml:"consistent_hash"`
 	CBFailures     uint          `yaml:"circuit_breaker_consecutive_failures"`
@@ -86,6 +90,7 @@ func (cfg *MemcachedClientConfig) RegisterFlagsWithPrefix(prefix, description st
 	f.UintVar(&cfg.CBFailures, prefix+"memcached.circuit-breaker-consecutive-failures", 10, description+"Trip circuit-breaker after this number of consecutive dial failures (if zero then circuit-breaker is disabled).")
 	f.DurationVar(&cfg.CBTimeout, prefix+"memcached.circuit-breaker-timeout", 10*time.Second, description+"Duration circuit-breaker remains open after tripping (if zero then 60 seconds is used).")
 	f.DurationVar(&cfg.CBInterval, prefix+"memcached.circuit-breaker-interval", 10*time.Second, description+"Reset circuit-breaker counts after this long (if zero then never reset).")
+	f.IntVar(&cfg.MaxItemSize, prefix+"memcached.max-item-size", 0, description+"The maximum size of an item stored in memcached. Bigger items are not stored. If set to 0, no maximum size is enforced.")
 }
 
 // NewMemcachedClient creates a new MemcacheClient that gets its server list
@@ -107,18 +112,19 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 	}, r))
 
 	newClient := &memcachedClient{
-		name:       name,
-		Client:     client,
-		serverList: selector,
-		hostname:   cfg.Host,
-		service:    cfg.Service,
-		logger:     logger,
-		provider:   dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
-		cbs:        make(map[string]*gobreaker.CircuitBreaker),
-		cbFailures: cfg.CBFailures,
-		cbInterval: cfg.CBInterval,
-		cbTimeout:  cfg.CBTimeout,
-		quit:       make(chan struct{}),
+		name:        name,
+		Client:      client,
+		serverList:  selector,
+		hostname:    cfg.Host,
+		service:     cfg.Service,
+		logger:      logger,
+		provider:    dns.NewProvider(logger, dnsProviderRegisterer, dns.GolangResolverType),
+		cbs:         make(map[string]*gobreaker.CircuitBreaker),
+		cbFailures:  cfg.CBFailures,
+		cbInterval:  cfg.CBInterval,
+		cbTimeout:   cfg.CBTimeout,
+		maxItemSize: cfg.MaxItemSize,
+		quit:        make(chan struct{}),
 
 		numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
 			Namespace:   "cortex",
@@ -126,6 +132,13 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 			Help:        "The number of memcache servers discovered.",
 			ConstLabels: prometheus.Labels{"name": name},
 		}),
+
+		skipped: promauto.With(r).NewCounter(prometheus.CounterOpts{
+			Namespace:   "cortex",
+			Name:        "memcache_client_set_skip_total",
+			Help:        "Total number of skipped set operations because of the value is larger than the max-item-size.",
+			ConstLabels: prometheus.Labels{"name": name},
+		}),
 	}
 	if cfg.CBFailures > 0 {
 		newClient.Client.DialTimeout = newClient.dialViaCircuitBreaker
@@ -183,6 +196,12 @@ func (c *memcachedClient) Stop() {
 }
 
 func (c *memcachedClient) Set(item *memcache.Item) error {
+	// Skip hitting memcached at all if the item is bigger than the max allowed size.
+	if c.maxItemSize > 0 && len(item.Value) > c.maxItemSize {
+		c.skipped.Inc()
+		return nil
+	}
+
 	err := c.Client.Set(item)
 	if err == nil {
 		return nil

From 7f7d7733261897ea769583197abec5bffb7d212f Mon Sep 17 00:00:00 2001
From: Arve Knudsen <arve.knudsen@gmail.com>
Date: Fri, 12 Mar 2021 08:29:36 +0100
Subject: [PATCH 645/660] Memcached: Fix comment typos (#3944)

Signed-off-by: Arve Knudsen <arve.knudsen@gmail.com>
---
 cache/memcached.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cache/memcached.go b/cache/memcached.go
index 5f2fa0dc4f664..c60d624e407e2 100644
--- a/cache/memcached.go
+++ b/cache/memcached.go
@@ -59,7 +59,7 @@ type Memcached struct {
 	logger log.Logger
 }
 
-// NewMemcached makes a new Memcache.
+// NewMemcached makes a new Memcached.
 func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg prometheus.Registerer, logger log.Logger) *Memcached {
 	c := &Memcached{
 		cfg:      cfg,
@@ -71,7 +71,7 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg
 				Namespace: "cortex",
 				Name:      "memcache_request_duration_seconds",
 				Help:      "Total time spent in seconds doing memcache requests.",
-				// Memecache requests are very quick: smallest bucket is 16us, biggest is 1s
+				// Memcached requests are very quick: smallest bucket is 16us, biggest is 1s
 				Buckets:     prometheus.ExponentialBuckets(0.000016, 4, 8),
 				ConstLabels: prometheus.Labels{"name": name},
 			}, []string{"method", "status_code"}),

From be6ae5ab5fd797feb65d756dd05a0ac13a5cc726 Mon Sep 17 00:00:00 2001
From: Nick Pillitteri <56quarters@users.noreply.github.com>
Date: Fri, 2 Apr 2021 08:47:52 -0400
Subject: [PATCH 646/660] Convert all validation.Limits durations to
 model.Duration (#4044)

* Test demonstrating time.Duration handling issue in JSON

Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>

* Convert all validation.Limits durations to model.Duration

Use the prometheus/common model.Duration for all duration related
limits instead of the standard library time.Duration type. This works
around an issue with the JSON representation of the standard library
type.

For example:
* The YAML representation of time.Duration(1 * time.Second) is the string `1s`
* The JSON representation of time.Duration(1 * time.Second) is the integer `1000000000`

By contrast:
* The YAML representation of model.Duration(1 * time.Second) is the string `1s`
* The JSON representation of model.Duration(1 * time.Second) is the string `1s`

Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>

* Update default configuration to match Limits struct

Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>

* Update CHANGELOG.md

Co-authored-by: Jacob Lisi <jlisi@grafana.com>
Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>

* Code review feedback

Signed-off-by: Nick Pillitteri <nick.pillitteri@grafana.com>

Co-authored-by: Jacob Lisi <jlisi@grafana.com>
---
 chunk_store_test.go    | 2 +-
 testutils/testutils.go | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/chunk_store_test.go b/chunk_store_test.go
index 1bab30c7a9848..09056cc999b0c 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -86,7 +86,7 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg Schema
 
 	var limits validation.Limits
 	flagext.DefaultValues(&limits)
-	limits.MaxQueryLength = 30 * 24 * time.Hour
+	limits.MaxQueryLength = model.Duration(30 * 24 * time.Hour)
 	overrides, err := validation.NewOverrides(limits, nil)
 	require.NoError(t, err)
 
diff --git a/testutils/testutils.go b/testutils/testutils.go
index 31583b9faa89e..1f20596e97d1d 100644
--- a/testutils/testutils.go
+++ b/testutils/testutils.go
@@ -126,7 +126,7 @@ func SetupTestChunkStoreWithClients(indexClient chunk.IndexClient, chunksClient
 
 	var limits validation.Limits
 	flagext.DefaultValues(&limits)
-	limits.MaxQueryLength = 30 * 24 * time.Hour
+	limits.MaxQueryLength = model.Duration(30 * 24 * time.Hour)
 	overrides, err := validation.NewOverrides(limits, nil)
 	if err != nil {
 		return nil, err

From 86f4f5245c50006d95265f1c938c2798dcbab832 Mon Sep 17 00:00:00 2001
From: Chin-Ying Li <chinying.li@mail.utoronto.ca>
Date: Thu, 8 Apr 2021 10:59:43 -0400
Subject: [PATCH 647/660] Use enums to represent the clients (#1395) (#4048)

* Use enums to represent the clients (#1395)

Signed-off-by: ChinYing-Li <chinying.li@mail.utoronto.ca>

Update PR number in CHANGELOG.md

* Use the StorageType enums in the case statements (#1395)

Signed-off-by: ChinYing-Li <chinying.li@mail.utoronto.ca>
---
 storage/factory.go | 89 ++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 35 deletions(-)

diff --git a/storage/factory.go b/storage/factory.go
index 310404cd13352..d59e5ec3e147d 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -32,6 +32,25 @@ const (
 	StorageEngineBlocks = "blocks"
 )
 
+// Supported storage clients
+const (
+	StorageTypeAWS            = "aws"
+	StorageTypeAWSDynamo      = "aws-dynamo"
+	StorageTypeAzure          = "azure"
+	StorageTypeBoltDB         = "boltdb"
+	StorageTypeCassandra      = "cassandra"
+	StorageTypeInMemory       = "inmemory"
+	StorageTypeBigTable       = "bigtable"
+	StorageTypeBigTableHashed = "bigtable-hashed"
+	StorageTypeFileSystem     = "filesystem"
+	StorageTypeGCP            = "gcp"
+	StorageTypeGCPColumnKey   = "gcp-columnkey"
+	StorageTypeGCS            = "gcs"
+	StorageTypeGrpc           = "grpc-store"
+	StorageTypeS3             = "s3"
+	StorageTypeSwift          = "swift"
+)
+
 type indexStoreFactories struct {
 	indexClientFactoryFunc IndexClientFactoryFunc
 	tableClientFactoryFunc TableClientFactoryFunc
@@ -213,10 +232,10 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 	}
 
 	switch name {
-	case "inmemory":
+	case StorageTypeInMemory:
 		store := chunk.NewMockStorage()
 		return store, nil
-	case "aws", "aws-dynamo":
+	case StorageTypeAWS, StorageTypeAWSDynamo:
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -225,32 +244,32 @@ func NewIndexClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBIndexClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
-	case "gcp":
+	case StorageTypeGCP:
 		return gcp.NewStorageClientV1(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "gcp-columnkey", "bigtable":
+	case StorageTypeGCPColumnKey, StorageTypeBigTable:
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "bigtable-hashed":
+	case StorageTypeBigTableHashed:
 		cfg.GCPStorageConfig.DistributeKeys = true
 		return gcp.NewStorageClientColumnKey(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "cassandra":
+	case StorageTypeCassandra:
 		return cassandra.NewStorageClient(cfg.CassandraStorageConfig, schemaCfg, registerer)
-	case "boltdb":
+	case StorageTypeBoltDB:
 		return local.NewBoltDBIndexClient(cfg.BoltDBConfig)
-	case "grpc-store":
+	case StorageTypeGrpc:
 		return grpc.NewStorageClient(cfg.GrpcConfig, schemaCfg)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: %v, %v, %v, %v, %v, %v", name, StorageTypeAWS, StorageTypeCassandra, StorageTypeInMemory, StorageTypeGCP, StorageTypeBigTable, StorageTypeBigTableHashed)
 	}
 }
 
 // NewChunkClient makes a new chunk.Client of the desired types.
 func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, registerer prometheus.Registerer) (chunk.Client, error) {
 	switch name {
-	case "inmemory":
+	case StorageTypeInMemory:
 		return chunk.NewMockStorage(), nil
-	case "aws", "s3":
+	case StorageTypeAWS, StorageTypeS3:
 		return newChunkClientFromStore(aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config))
-	case "aws-dynamo":
+	case StorageTypeAWSDynamo:
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -259,28 +278,28 @@ func NewChunkClient(name string, cfg Config, schemaCfg chunk.SchemaConfig, regis
 			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBChunkClient(cfg.AWSStorageConfig.DynamoDBConfig, schemaCfg, registerer)
-	case "azure":
+	case StorageTypeAzure:
 		return newChunkClientFromStore(azure.NewBlobStorage(&cfg.AzureStorageConfig))
-	case "gcp":
+	case StorageTypeGCP:
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "gcp-columnkey", "bigtable", "bigtable-hashed":
+	case StorageTypeGCPColumnKey, StorageTypeBigTable, StorageTypeBigTableHashed:
 		return gcp.NewBigtableObjectClient(context.Background(), cfg.GCPStorageConfig, schemaCfg)
-	case "gcs":
+	case StorageTypeGCS:
 		return newChunkClientFromStore(gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig))
-	case "swift":
+	case StorageTypeSwift:
 		return newChunkClientFromStore(openstack.NewSwiftObjectClient(cfg.Swift))
-	case "cassandra":
+	case StorageTypeCassandra:
 		return cassandra.NewObjectClient(cfg.CassandraStorageConfig, schemaCfg, registerer)
-	case "filesystem":
+	case StorageTypeFileSystem:
 		store, err := local.NewFSObjectClient(cfg.FSConfig)
 		if err != nil {
 			return nil, err
 		}
 		return objectclient.NewClient(store, objectclient.Base64Encoder), nil
-	case "grpc-store":
+	case StorageTypeGrpc:
 		return grpc.NewStorageClient(cfg.GrpcConfig, schemaCfg)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, azure, cassandra, inmemory, gcp, bigtable, bigtable-hashed, grpc-store", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: %v, %v, %v, %v, %v, %v, %v, %v", name, StorageTypeAWS, StorageTypeAzure, StorageTypeCassandra, StorageTypeInMemory, StorageTypeGCP, StorageTypeBigTable, StorageTypeBigTableHashed, StorageTypeGrpc)
 	}
 }
 
@@ -300,9 +319,9 @@ func NewTableClient(name string, cfg Config, registerer prometheus.Registerer) (
 	}
 
 	switch name {
-	case "inmemory":
+	case StorageTypeInMemory:
 		return chunk.NewMockStorage(), nil
-	case "aws", "aws-dynamo":
+	case StorageTypeAWS, StorageTypeAWSDynamo:
 		if cfg.AWSStorageConfig.DynamoDB.URL == nil {
 			return nil, fmt.Errorf("Must set -dynamodb.url in aws mode")
 		}
@@ -311,16 +330,16 @@ func NewTableClient(name string, cfg Config, registerer prometheus.Registerer) (
 			level.Warn(util_log.Logger).Log("msg", "ignoring DynamoDB URL path", "path", path)
 		}
 		return aws.NewDynamoDBTableClient(cfg.AWSStorageConfig.DynamoDBConfig, registerer)
-	case "gcp", "gcp-columnkey", "bigtable", "bigtable-hashed":
+	case StorageTypeGCP, StorageTypeGCPColumnKey, StorageTypeBigTable, StorageTypeBigTableHashed:
 		return gcp.NewTableClient(context.Background(), cfg.GCPStorageConfig)
-	case "cassandra":
+	case StorageTypeCassandra:
 		return cassandra.NewTableClient(context.Background(), cfg.CassandraStorageConfig, registerer)
-	case "boltdb":
+	case StorageTypeBoltDB:
 		return local.NewTableClient(cfg.BoltDBConfig.Directory)
-	case "grpc-store":
+	case StorageTypeGrpc:
 		return grpc.NewTableClient(cfg.GrpcConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, cassandra, inmemory, gcp, bigtable, bigtable-hashed, grpc-store", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: %v, %v, %v, %v, %v, %v, %v", name, StorageTypeAWS, StorageTypeCassandra, StorageTypeInMemory, StorageTypeGCP, StorageTypeBigTable, StorageTypeBigTableHashed, StorageTypeGrpc)
 	}
 }
 
@@ -336,19 +355,19 @@ func NewBucketClient(storageConfig Config) (chunk.BucketClient, error) {
 // NewObjectClient makes a new StorageClient of the desired types.
 func NewObjectClient(name string, cfg Config) (chunk.ObjectClient, error) {
 	switch name {
-	case "aws", "s3":
+	case StorageTypeAWS, StorageTypeS3:
 		return aws.NewS3ObjectClient(cfg.AWSStorageConfig.S3Config)
-	case "gcs":
+	case StorageTypeGCS:
 		return gcp.NewGCSObjectClient(context.Background(), cfg.GCSConfig)
-	case "azure":
+	case StorageTypeAzure:
 		return azure.NewBlobStorage(&cfg.AzureStorageConfig)
-	case "swift":
+	case StorageTypeSwift:
 		return openstack.NewSwiftObjectClient(cfg.Swift)
-	case "inmemory":
+	case StorageTypeInMemory:
 		return chunk.NewMockStorage(), nil
-	case "filesystem":
+	case StorageTypeFileSystem:
 		return local.NewFSObjectClient(cfg.FSConfig)
 	default:
-		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: aws, s3, gcs, azure, filesystem", name)
+		return nil, fmt.Errorf("Unrecognized storage client %v, choose one of: %v, %v, %v, %v, %v", name, StorageTypeAWS, StorageTypeS3, StorageTypeGCS, StorageTypeAzure, StorageTypeFileSystem)
 	}
 }

From 9ef4f53f9794b2f90e69c74cbd30dd78f539d4e1 Mon Sep 17 00:00:00 2001
From: Chin-Ying Li <chinying.li@mail.utoronto.ca>
Date: Fri, 16 Apr 2021 05:43:41 -0400
Subject: [PATCH 648/660] Allow configuration of Cassandra's host selection
 policy (#4069) (#4069)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: ChinYing-Li <chinying.li@mail.utoronto.ca>

Co-authored-by: Peter Štibraný <peter.stibrany@grafana.com>
---
 cassandra/storage_client.go | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 2c802b764fab8..8bb0611804ef8 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -33,6 +33,7 @@ type Config struct {
 	DisableInitialHostLookup bool                `yaml:"disable_initial_host_lookup"`
 	SSL                      bool                `yaml:"SSL"`
 	HostVerification         bool                `yaml:"host_verification"`
+	HostSelectionPolicy      string              `yaml:"host_selection_policy"`
 	CAPath                   string              `yaml:"CA_path"`
 	CertPath                 string              `yaml:"tls_cert_path"`
 	KeyPath                  string              `yaml:"tls_key_path"`
@@ -53,6 +54,11 @@ type Config struct {
 	TableOptions             string              `yaml:"table_options"`
 }
 
+const (
+	HostPolicyRoundRobin = "round-robin"
+	HostPolicyTokenAware = "token-aware"
+)
+
 // RegisterFlags adds the flags required to config this to the given FlagSet
 func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.Addresses, "cassandra.addresses", "", "Comma-separated hostnames or IPs of Cassandra instances.")
@@ -63,6 +69,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.BoolVar(&cfg.DisableInitialHostLookup, "cassandra.disable-initial-host-lookup", false, "Instruct the cassandra driver to not attempt to get host info from the system.peers table.")
 	f.BoolVar(&cfg.SSL, "cassandra.ssl", false, "Use SSL when connecting to cassandra instances.")
 	f.BoolVar(&cfg.HostVerification, "cassandra.host-verification", true, "Require SSL certificate validation.")
+	f.StringVar(&cfg.HostSelectionPolicy, "cassandra.host-selection-policy", HostPolicyRoundRobin, "Policy for selecting Cassandra host. Supported values are: round-robin, token-aware.")
 	f.StringVar(&cfg.CAPath, "cassandra.ca-path", "", "Path to certificate file to verify the peer.")
 	f.StringVar(&cfg.CertPath, "cassandra.tls-cert-path", "", "Path to certificate file used by TLS.")
 	f.StringVar(&cfg.KeyPath, "cassandra.tls-key-path", "", "Path to private key file used by TLS.")
@@ -180,6 +187,15 @@ func (cfg *Config) setClusterConfig(cluster *gocql.ClusterConfig) error {
 			}
 		}
 	}
+
+	if cfg.HostSelectionPolicy == HostPolicyRoundRobin {
+		cluster.PoolConfig.HostSelectionPolicy = gocql.RoundRobinHostPolicy()
+	} else if cfg.HostSelectionPolicy == HostPolicyTokenAware {
+		cluster.PoolConfig.HostSelectionPolicy = gocql.TokenAwareHostPolicy(gocql.RoundRobinHostPolicy())
+	} else {
+		return errors.New("Unknown host selection policy")
+	}
+
 	if cfg.Auth {
 		password := cfg.Password.Value
 		if cfg.PasswordFile != "" {

From 479644bc8ae929bc918f0dfb11d056277907060e Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Fri, 23 Apr 2021 09:43:15 +0200
Subject: [PATCH 649/660] Remove config options deprecated in Cortex 1.6
 (#4101)

* Remove config options deprecated in Cortex 1.6

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Marked some features as stable

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 aws/s3_storage_client.go |  2 +-
 chunk_store.go           | 17 -------------
 chunk_store_test.go      | 54 ----------------------------------------
 3 files changed, 1 insertion(+), 72 deletions(-)

diff --git a/aws/s3_storage_client.go b/aws/s3_storage_client.go
index 494ec89bfa2f6..e13f9842bbb9c 100644
--- a/aws/s3_storage_client.go
+++ b/aws/s3_storage_client.go
@@ -103,7 +103,7 @@ func (cfg *S3Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.SecretAccessKey, prefix+"s3.secret-access-key", "", "AWS Secret Access Key")
 	f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "Disable https on s3 connection.")
 
-	// TODO Remove in Cortex 1.9.0
+	// TODO Remove in Cortex 1.10.0
 	f.BoolVar(&cfg.SSEEncryption, prefix+"s3.sse-encryption", false, "Enable AWS Server Side Encryption [Deprecated: Use .sse instead. if s3.sse-encryption is enabled, it assumes .sse.type SSE-S3]")
 
 	cfg.SSEConfig.RegisterFlagsWithPrefix(prefix+"s3.sse.", f)
diff --git a/chunk_store.go b/chunk_store.go
index 0fc096d3c801e..595f79641be58 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -20,7 +20,6 @@ import (
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
-	"github.com/cortexproject/cortex/pkg/util/flagext"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
@@ -58,9 +57,6 @@ type StoreConfig struct {
 
 	CacheLookupsOlderThan model.Duration `yaml:"cache_lookups_older_than"`
 
-	// Limits query start time to be greater than now() - MaxLookBackPeriod, if set.
-	MaxLookBackPeriod model.Duration `yaml:"max_look_back_period"`
-
 	// Not visible in yaml because the setting shouldn't be common between ingesters and queriers.
 	// This exists in case we don't want to cache all the chunks but still want to take advantage of
 	// ingester chunk write deduplication. But for the queriers we need the full value. So when this option
@@ -78,16 +74,10 @@ func (cfg *StoreConfig) RegisterFlags(f *flag.FlagSet) {
 	cfg.WriteDedupeCacheConfig.RegisterFlagsWithPrefix("store.index-cache-write.", "Cache config for index entry writing. ", f)
 
 	f.Var(&cfg.CacheLookupsOlderThan, "store.cache-lookups-older-than", "Cache index entries older than this period. 0 to disable.")
-	f.Var(&cfg.MaxLookBackPeriod, "store.max-look-back-period", "Deprecated: use -querier.max-query-lookback instead. Limit how long back data can be queried. This setting applies to chunks storage only.") // To be removed in Cortex 1.8.
 }
 
 // Validate validates the store config.
 func (cfg *StoreConfig) Validate(logger log.Logger) error {
-	if cfg.MaxLookBackPeriod > 0 {
-		flagext.DeprecatedFlagsUsed.Inc()
-		level.Warn(logger).Log("msg", "running with DEPRECATED flag -store.max-look-back-period, use -querier.max-query-lookback instead.")
-	}
-
 	if err := cfg.ChunkCacheConfig.Validate(); err != nil {
 		return err
 	}
@@ -319,13 +309,6 @@ func (c *baseStore) validateQueryTimeRange(ctx context.Context, userID string, f
 		return true, nil
 	}
 
-	if c.cfg.MaxLookBackPeriod != 0 {
-		oldestStartTime := model.Now().Add(-time.Duration(c.cfg.MaxLookBackPeriod))
-		if oldestStartTime.After(*from) {
-			*from = oldestStartTime
-		}
-	}
-
 	if through.After(now.Add(5 * time.Minute)) {
 		// time-span end is in future ... regard as legal
 		level.Info(log).Log("msg", "adjusting end timerange from future to now", "old_through", through, "new_through", now)
diff --git a/chunk_store_test.go b/chunk_store_test.go
index 09056cc999b0c..5b7e06a1bec62 100644
--- a/chunk_store_test.go
+++ b/chunk_store_test.go
@@ -17,7 +17,6 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
-	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/chunk/encoding"
@@ -783,59 +782,6 @@ func TestChunkStoreError(t *testing.T) {
 	}
 }
 
-func TestStoreMaxLookBack(t *testing.T) {
-	ctx := user.InjectOrgID(context.Background(), userID)
-	metric := labels.Labels{
-		{Name: labels.MetricName, Value: "foo"},
-		{Name: "bar", Value: "baz"},
-	}
-	storeMaker := stores[0]
-	storeCfg := storeMaker.configFn()
-
-	// Creating 2 stores, One with no look back limit and another with 30 Mins look back limit
-	storeWithoutLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
-	defer storeWithoutLookBackLimit.Stop()
-
-	storeCfg.MaxLookBackPeriod = model.Duration(30 * time.Minute)
-	storeWithLookBackLimit := newTestChunkStoreConfig(t, "v9", storeCfg)
-	defer storeWithLookBackLimit.Stop()
-
-	now := model.Now()
-
-	// Populating both stores with chunks
-	fooChunk1 := dummyChunkFor(now, metric)
-	err := fooChunk1.Encode()
-	require.NoError(t, err)
-	err = storeWithoutLookBackLimit.Put(ctx, []Chunk{fooChunk1})
-	require.NoError(t, err)
-	err = storeWithLookBackLimit.Put(ctx, []Chunk{fooChunk1})
-	require.NoError(t, err)
-
-	fooChunk2 := dummyChunkFor(now.Add(-time.Hour*1), metric)
-	err = fooChunk2.Encode()
-	require.NoError(t, err)
-	err = storeWithoutLookBackLimit.Put(ctx, []Chunk{fooChunk2})
-	require.NoError(t, err)
-	err = storeWithLookBackLimit.Put(ctx, []Chunk{fooChunk2})
-	require.NoError(t, err)
-
-	matchers, err := parser.ParseMetricSelector(`foo{bar="baz"}`)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Both the chunks should be returned
-	chunks, err := storeWithoutLookBackLimit.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
-	require.NoError(t, err)
-	require.Equal(t, 2, len(chunks))
-
-	// Single chunk should be returned with newer timestamp
-	chunks, err = storeWithLookBackLimit.Get(ctx, userID, now.Add(-time.Hour), now, matchers...)
-	require.NoError(t, err)
-	require.Equal(t, 1, len(chunks))
-	require.Equal(t, now, chunks[0].Through)
-}
-
 func benchmarkParseIndexEntries(i int64, regex string, b *testing.B) {
 	b.ReportAllocs()
 	b.StopTimer()

From b646cccfd442853e2af0f62627fa16a5f814ebd5 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 26 Apr 2021 14:32:49 +0530
Subject: [PATCH 650/660] stop the index search early based on query params
 (#4111)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 local/boltdb_index_client.go | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/local/boltdb_index_client.go b/local/boltdb_index_client.go
index bbb814badadf2..843cab1a5769f 100644
--- a/local/boltdb_index_client.go
+++ b/local/boltdb_index_client.go
@@ -264,16 +264,15 @@ func (b *BoltIndexClient) QueryWithCursor(_ context.Context, c *bbolt.Cursor, qu
 	var batch boltReadBatch
 
 	for k, v := c.Seek(start); k != nil; k, v = c.Next() {
-		if len(query.ValueEqual) > 0 && !bytes.Equal(v, query.ValueEqual) {
-			continue
+		if !bytes.HasPrefix(k, rowPrefix) {
+			break
 		}
 
 		if len(query.RangeValuePrefix) > 0 && !bytes.HasPrefix(k, start) {
 			break
 		}
-
-		if !bytes.HasPrefix(k, rowPrefix) {
-			break
+		if len(query.ValueEqual) > 0 && !bytes.Equal(v, query.ValueEqual) {
+			continue
 		}
 
 		// make a copy since k, v are only valid for the life of the transaction.

From 58120d1979545cadd05dbe1da23b7604af04b2a9 Mon Sep 17 00:00:00 2001
From: Marco Pracucci <marco@pracucci.com>
Date: Wed, 28 Apr 2021 17:52:38 +0200
Subject: [PATCH 651/660] Enforce "max chunks per query" limit in ingesters too
 (#4125)

* Deprecated -store.query-chunk-limit in favour of the new config -querier.max-fetched-chunks-per-query which is applied both to ingesters and long-term storage

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fixed PR number in CHANGELOG

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Moved CHANGELOG entry to unreleased

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Addressed review feedback

Signed-off-by: Marco Pracucci <marco@pracucci.com>
---
 chunk_store.go     | 2 +-
 composite_store.go | 2 +-
 series_store.go    | 2 +-
 storage/factory.go | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/chunk_store.go b/chunk_store.go
index 595f79641be58..54796b6408b07 100644
--- a/chunk_store.go
+++ b/chunk_store.go
@@ -355,7 +355,7 @@ func (c *store) getMetricNameChunks(ctx context.Context, userID string, from, th
 	filtered := filterChunksByTime(from, through, chunks)
 	level.Debug(log).Log("Chunks post filtering", len(chunks))
 
-	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
+	maxChunksPerQuery := c.limits.MaxChunksPerQueryFromStore(userID)
 	if maxChunksPerQuery > 0 && len(filtered) > maxChunksPerQuery {
 		err := QueryError(fmt.Sprintf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(filtered), maxChunksPerQuery))
 		level.Error(log).Log("err", err)
diff --git a/composite_store.go b/composite_store.go
index d3c79013bbfc4..21b1ec0243533 100644
--- a/composite_store.go
+++ b/composite_store.go
@@ -14,7 +14,7 @@ import (
 
 // StoreLimits helps get Limits specific to Queries for Stores
 type StoreLimits interface {
-	MaxChunksPerQuery(userID string) int
+	MaxChunksPerQueryFromStore(userID string) int
 	MaxQueryLength(userID string) time.Duration
 }
 
diff --git a/series_store.go b/series_store.go
index 9fb5b94128b47..22ceb43fa1119 100644
--- a/series_store.go
+++ b/series_store.go
@@ -112,7 +112,7 @@ func (c *seriesStore) Get(ctx context.Context, userID string, from, through mode
 	chunks := chks[0]
 	fetcher := fetchers[0]
 	// Protect ourselves against OOMing.
-	maxChunksPerQuery := c.limits.MaxChunksPerQuery(userID)
+	maxChunksPerQuery := c.limits.MaxChunksPerQueryFromStore(userID)
 	if maxChunksPerQuery > 0 && len(chunks) > maxChunksPerQuery {
 		err := QueryError(fmt.Sprintf("Query %v fetched too many chunks (%d > %d)", allMatchers, len(chunks), maxChunksPerQuery))
 		level.Error(log).Log("err", err)
diff --git a/storage/factory.go b/storage/factory.go
index d59e5ec3e147d..73c96ed2ed6b6 100644
--- a/storage/factory.go
+++ b/storage/factory.go
@@ -73,7 +73,7 @@ func RegisterIndexStore(name string, indexClientFactory IndexClientFactoryFunc,
 // StoreLimits helps get Limits specific to Queries for Stores
 type StoreLimits interface {
 	CardinalityLimit(userID string) int
-	MaxChunksPerQuery(userID string) int
+	MaxChunksPerQueryFromStore(userID string) int
 	MaxQueryLength(userID string) time.Duration
 }
 

From c9f3ffae2edfc056ec2c75082678ab21016799b7 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@status.im>
Date: Thu, 6 May 2021 10:36:49 +0200
Subject: [PATCH 652/660] purger: fix nil value in column range for BatchWrite
 query (#4128)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes following error:
```
msg="error adding delete request to the store" err="Invalid null value in condition for column range"
```
Resolves: https://github.com/cortexproject/cortex/issues/3237

Signed-off-by: Jakub Sokołowski <jakub@status.im>
---
 purger/delete_requests_store.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/purger/delete_requests_store.go b/purger/delete_requests_store.go
index 2818a061f9855..396c51c053d95 100644
--- a/purger/delete_requests_store.go
+++ b/purger/delete_requests_store.go
@@ -135,7 +135,7 @@ func (ds *DeleteStore) addDeleteRequest(ctx context.Context, userID string, crea
 	// we update only cache gen number because only query responses are changing at this stage.
 	// we still have to query data from store for doing query time filtering and we don't want to invalidate its results now.
 	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults),
-		nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
+		[]byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
 
 	return ds.indexClient.BatchWrite(ctx, writeBatch)
 }
@@ -178,7 +178,7 @@ func (ds *DeleteStore) UpdateStatus(ctx context.Context, userID, requestID strin
 	if newStatus == StatusProcessed {
 		// we have deleted data from store so invalidate cache only for store since we don't have to do runtime filtering anymore.
 		// we don't have to change cache gen number because we were anyways doing runtime filtering
-		writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindStore), nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
+		writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindStore), []byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
 	}
 
 	return ds.indexClient.BatchWrite(ctx, writeBatch)
@@ -326,7 +326,7 @@ func (ds *DeleteStore) RemoveDeleteRequest(ctx context.Context, userID, requestI
 
 	// we need to invalidate results cache since removal of delete request would cause query results to change
 	writeBatch.Add(ds.cfg.RequestsTableName, fmt.Sprintf("%s:%s:%s", cacheGenNum, userID, CacheKindResults),
-		nil, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
+		[]byte{}, []byte(strconv.FormatInt(time.Now().Unix(), 10)))
 
 	return ds.indexClient.BatchWrite(ctx, writeBatch)
 }

From 02ebf5b175c52992bfbf14f417fc27076313425c Mon Sep 17 00:00:00 2001
From: Bryan Boreham <bryan@weave.works>
Date: Mon, 10 May 2021 15:37:38 +0100
Subject: [PATCH 653/660] Implement blocksconvert scanner for DynamoDB v9
 schema (#3828)

* Implement DynamoDB blocksconvert (v9 schema only)

Remember which series we have processed, so we only emit entries
to the plan once for each series.

Signed-off-by: Bryan Boreham <bjboreham@gmail.com>

Includes:
* Move IndexEntryProcessor to chunk package, so it can be shared across other packages
* Pre-check if user is allowed, and make use of map of ignored users
* Move IndexReader type beside IndexEntryProcessor
* Stop returning unexported type
---
 aws/dynamodb_index_reader.go | 237 +++++++++++++++++++++++++++++++++++
 index_reader.go              |  33 +++++
 2 files changed, 270 insertions(+)
 create mode 100644 aws/dynamodb_index_reader.go
 create mode 100644 index_reader.go

diff --git a/aws/dynamodb_index_reader.go b/aws/dynamodb_index_reader.go
new file mode 100644
index 0000000000000..5f51a8e45627e
--- /dev/null
+++ b/aws/dynamodb_index_reader.go
@@ -0,0 +1,237 @@
+package aws
+
+import (
+	"context"
+	"encoding/base64"
+	"fmt"
+	"strings"
+	"sync"
+
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/client"
+	"github.com/aws/aws-sdk-go/aws/request"
+	"github.com/aws/aws-sdk-go/service/dynamodb"
+	gklog "github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+)
+
+type dynamodbIndexReader struct {
+	dynamoDBStorageClient
+
+	log        gklog.Logger
+	maxRetries int
+
+	rowsRead prometheus.Counter
+}
+
+// NewDynamoDBIndexReader returns an object that can scan an entire index table
+func NewDynamoDBIndexReader(cfg DynamoDBConfig, schemaCfg chunk.SchemaConfig, reg prometheus.Registerer, l gklog.Logger, rowsRead prometheus.Counter) (chunk.IndexReader, error) {
+	client, err := newDynamoDBStorageClient(cfg, schemaCfg, reg)
+	if err != nil {
+		return nil, err
+	}
+
+	return &dynamodbIndexReader{
+		dynamoDBStorageClient: *client,
+		maxRetries:            cfg.BackoffConfig.MaxRetries,
+		log:                   l,
+
+		rowsRead: rowsRead,
+	}, nil
+}
+
+func (r *dynamodbIndexReader) IndexTableNames(ctx context.Context) ([]string, error) {
+	// fake up a table client - if we call NewDynamoDBTableClient() it will double-register metrics
+	tableClient := dynamoTableClient{
+		DynamoDB: r.DynamoDB,
+		metrics:  r.metrics,
+	}
+	return tableClient.ListTables(ctx)
+}
+
+type seriesMap struct {
+	mutex           sync.Mutex           // protect concurrent access to maps
+	seriesProcessed map[string]sha256Set // map of userID/bucket to set showing which series have been processed
+}
+
+// Since all sha256 values are the same size, a fixed-size array
+// is more space-efficient than string or byte slice
+type sha256 [32]byte
+
+// an entry in this set indicates we have processed a series with that sha already
+type sha256Set struct {
+	series map[sha256]struct{}
+}
+
+// ReadIndexEntries reads the whole of a table on multiple goroutines in parallel.
+// Entries for the same HashValue and RangeValue should be passed to the same processor.
+func (r *dynamodbIndexReader) ReadIndexEntries(ctx context.Context, tableName string, processors []chunk.IndexEntryProcessor) error {
+	projection := hashKey + "," + rangeKey
+
+	sm := &seriesMap{ // new map per table
+		seriesProcessed: make(map[string]sha256Set),
+	}
+
+	var readerGroup errgroup.Group
+	// Start a goroutine for each processor
+	for i, processor := range processors {
+		segment, processor := i, processor // https://golang.org/doc/faq#closures_and_goroutines
+		readerGroup.Go(func() error {
+			input := &dynamodb.ScanInput{
+				TableName:              aws.String(tableName),
+				ProjectionExpression:   aws.String(projection),
+				Segment:                aws.Int64(int64(segment)),
+				TotalSegments:          aws.Int64(int64(len(processors))),
+				ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+			}
+			withRetrys := func(req *request.Request) {
+				req.Retryer = client.DefaultRetryer{NumMaxRetries: r.maxRetries}
+			}
+			err := r.DynamoDB.ScanPagesWithContext(ctx, input, func(page *dynamodb.ScanOutput, lastPage bool) bool {
+				if cc := page.ConsumedCapacity; cc != nil {
+					r.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.ScanTable", *cc.TableName).
+						Add(float64(*cc.CapacityUnits))
+				}
+				r.processPage(ctx, sm, processor, tableName, page)
+				return true
+			}, withRetrys)
+			if err != nil {
+				return err
+			}
+			processor.Flush()
+			level.Info(r.log).Log("msg", "Segment finished", "segment", segment)
+			return nil
+		})
+	}
+	// Wait until all reader segments have finished
+	outerErr := readerGroup.Wait()
+	if outerErr != nil {
+		return outerErr
+	}
+	return nil
+}
+
+func (r *dynamodbIndexReader) processPage(ctx context.Context, sm *seriesMap, processor chunk.IndexEntryProcessor, tableName string, page *dynamodb.ScanOutput) {
+	for _, item := range page.Items {
+		r.rowsRead.Inc()
+		rangeValue := item[rangeKey].B
+		if !isSeriesIndexEntry(rangeValue) {
+			continue
+		}
+		hashValue := aws.StringValue(item[hashKey].S)
+		orgStr, day, seriesID, err := decodeHashValue(hashValue)
+		if err != nil {
+			level.Error(r.log).Log("msg", "Failed to decode hash value", "err", err)
+			continue
+		}
+		if !processor.AcceptUser(orgStr) {
+			continue
+		}
+
+		bucketHashKey := orgStr + ":" + day // from v9Entries.GetChunkWriteEntries()
+
+		// Check whether we have already processed this series
+		// via two-step lookup: first by tenant/day bucket, then by series
+		var seriesSha256 sha256
+		err = decodeBase64(seriesSha256[:], seriesID)
+		if err != nil {
+			level.Error(r.log).Log("msg", "Failed to decode series ID", "err", err)
+			continue
+		}
+		sm.mutex.Lock()
+		shaSet := sm.seriesProcessed[bucketHashKey]
+		if shaSet.series == nil {
+			shaSet.series = make(map[sha256]struct{})
+			sm.seriesProcessed[bucketHashKey] = shaSet
+		}
+		if _, exists := shaSet.series[seriesSha256]; exists {
+			sm.mutex.Unlock()
+			continue
+		}
+		// mark it as 'seen already'
+		shaSet.series[seriesSha256] = struct{}{}
+		sm.mutex.Unlock()
+
+		err = r.queryChunkEntriesForSeries(ctx, processor, tableName, bucketHashKey+":"+seriesID)
+		if err != nil {
+			level.Error(r.log).Log("msg", "error while reading series", "err", err)
+			return
+		}
+	}
+}
+
+func decodeBase64(dst []byte, value string) error {
+	n, err := base64.RawStdEncoding.Decode(dst, []byte(value))
+	if err != nil {
+		return errors.Wrap(err, "unable to decode sha256")
+	}
+	if n != len(dst) {
+		return errors.Wrapf(err, "seriesID has unexpected length; raw value %q", value)
+	}
+	return nil
+}
+
+func (r *dynamodbIndexReader) queryChunkEntriesForSeries(ctx context.Context, processor chunk.IndexEntryProcessor, tableName, queryHashKey string) error {
+	// DynamoDB query which just says "all rows with hashKey X"
+	// This is hard-coded for schema v9
+	input := &dynamodb.QueryInput{
+		TableName: aws.String(tableName),
+		KeyConditions: map[string]*dynamodb.Condition{
+			hashKey: {
+				AttributeValueList: []*dynamodb.AttributeValue{
+					{S: aws.String(queryHashKey)},
+				},
+				ComparisonOperator: aws.String(dynamodb.ComparisonOperatorEq),
+			},
+		},
+		ReturnConsumedCapacity: aws.String(dynamodb.ReturnConsumedCapacityTotal),
+	}
+	withRetrys := func(req *request.Request) {
+		req.Retryer = client.DefaultRetryer{NumMaxRetries: r.maxRetries}
+	}
+	var result error
+	err := r.DynamoDB.QueryPagesWithContext(ctx, input, func(output *dynamodb.QueryOutput, _ bool) bool {
+		if cc := output.ConsumedCapacity; cc != nil {
+			r.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
+				Add(float64(*cc.CapacityUnits))
+		}
+
+		for _, item := range output.Items {
+			err := processor.ProcessIndexEntry(chunk.IndexEntry{
+				TableName:  tableName,
+				HashValue:  aws.StringValue(item[hashKey].S),
+				RangeValue: item[rangeKey].B})
+			if err != nil {
+				result = errors.Wrap(err, "processor error")
+				return false
+			}
+		}
+		return true
+	}, withRetrys)
+	if err != nil {
+		return errors.Wrap(err, "DynamoDB error")
+	}
+	return result
+}
+
+func isSeriesIndexEntry(rangeValue []byte) bool {
+	const chunkTimeRangeKeyV3 = '3' // copied from pkg/chunk/schema.go
+	return len(rangeValue) > 2 && rangeValue[len(rangeValue)-2] == chunkTimeRangeKeyV3
+}
+
+func decodeHashValue(hashValue string) (orgStr, day, seriesID string, err error) {
+	hashParts := strings.SplitN(hashValue, ":", 3)
+	if len(hashParts) != 3 {
+		err = fmt.Errorf("unrecognized hash value: %q", hashValue)
+		return
+	}
+	orgStr = hashParts[0]
+	day = hashParts[1]
+	seriesID = hashParts[2]
+	return
+}
diff --git a/index_reader.go b/index_reader.go
new file mode 100644
index 0000000000000..e122ade71396b
--- /dev/null
+++ b/index_reader.go
@@ -0,0 +1,33 @@
+package chunk
+
+import (
+	"context"
+)
+
+// IndexEntryProcessor receives index entries from a table.
+type IndexEntryProcessor interface {
+	ProcessIndexEntry(indexEntry IndexEntry) error
+
+	// Will this user be accepted by the processor?
+	AcceptUser(user string) bool
+
+	// Called at the end of reading of index entries.
+	Flush() error
+}
+
+// IndexReader parses index entries and passes them to the IndexEntryProcessor.
+type IndexReader interface {
+	IndexTableNames(ctx context.Context) ([]string, error)
+
+	// Reads a single table from index, and passes individual index entries to the processors.
+	//
+	// All entries with the same TableName, HashValue and RangeValue are passed to the same processor,
+	// and all such entries (with different Values) are passed before index entries with different
+	// values of HashValue and RangeValue are passed to the same processor.
+	//
+	// This allows IndexEntryProcessor to find when values for given Hash and Range finish:
+	// as soon as new Hash and Range differ from last IndexEntry.
+	//
+	// Index entries passed to the same processor arrive sorted by HashValue and RangeValue.
+	ReadIndexEntries(ctx context.Context, table string, processors []IndexEntryProcessor) error
+}

From e89603baf6402b5babceda45246ccb8232e798b9 Mon Sep 17 00:00:00 2001
From: ubcharron <bcharron@ubisoft.com>
Date: Tue, 18 May 2021 07:57:34 -0400
Subject: [PATCH 654/660] Add Cassandra support to blocksconvert (#3795)

Signed-off-by: Benjamin Charron <benjamin.charron@ubisoft.com>

Co-authored-by: Benjamin Charron <benjamin.charron@ubisoft.com>
---
 cassandra/storage_client.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cassandra/storage_client.go b/cassandra/storage_client.go
index 8bb0611804ef8..fa57fc85d4781 100644
--- a/cassandra/storage_client.go
+++ b/cassandra/storage_client.go
@@ -400,6 +400,11 @@ func (s *StorageClient) query(ctx context.Context, query chunk.IndexQuery, callb
 	return errors.WithStack(scanner.Err())
 }
 
+// Allow other packages to interact with Cassandra directly
+func (s *StorageClient) GetReadSession() *gocql.Session {
+	return s.readSession
+}
+
 // readBatch represents a batch of rows read from Cassandra.
 type readBatch struct {
 	rangeValue []byte

From 5c24fc9674810919f80ee5f122dc1fceaad86b51 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Mon, 24 May 2021 19:40:39 +0530
Subject: [PATCH 655/660] return ErrStorageObjectNotFound when object requested
 for download is not found in Azure blob storage (#4200)

* detect and return ErrStorageObjectNotFound when object requested for download is not found in storage

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>

* use azblob.StorageError to get detect the object missing error

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 azure/blob_storage_client.go | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index c6a1dee6242b3..ca68a86e5567d 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -2,6 +2,7 @@ package azure
 
 import (
 	"context"
+	"errors"
 	"flag"
 	"fmt"
 	"io"
@@ -136,6 +137,9 @@ func (b *BlobStorage) getObject(ctx context.Context, objectKey string) (rc io.Re
 	// Request access to the blob
 	downloadResponse, err := blockBlobURL.Download(ctx, 0, azblob.CountToEnd, azblob.BlobAccessConditions{}, false)
 	if err != nil {
+		if isObjNotFoundErr(err) {
+			return nil, chunk.ErrStorageObjectNotFound
+		}
 		return nil, err
 	}
 
@@ -264,3 +268,13 @@ func (b *BlobStorage) selectBlobURLFmt() string {
 func (b *BlobStorage) selectContainerURLFmt() string {
 	return endpoints[b.cfg.Environment].containerURLFmt
 }
+
+// isObjNotFoundErr returns true if error means that object is not found. Relevant to GetObject operations.
+func isObjNotFoundErr(err error) bool {
+	var e azblob.StorageError
+	if errors.As(err, &e) && e.ServiceCode() == azblob.ServiceCodeBlobNotFound {
+		return true
+	}
+
+	return false
+}

From 341793a6f339ea566043d6c46993c24dadc10050 Mon Sep 17 00:00:00 2001
From: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
Date: Wed, 26 May 2021 14:45:33 +0530
Subject: [PATCH 656/660] return ErrStorageObjectNotFound when a non-existent
 object is being deleted (#4221)

Signed-off-by: Sandeep Sukhani <sandeep.d.sukhani@gmail.com>
---
 azure/blob_storage_client.go | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/azure/blob_storage_client.go b/azure/blob_storage_client.go
index ca68a86e5567d..6b0c0a70cde02 100644
--- a/azure/blob_storage_client.go
+++ b/azure/blob_storage_client.go
@@ -250,6 +250,9 @@ func (b *BlobStorage) DeleteObject(ctx context.Context, blobID string) error {
 	}
 
 	_, err = blockBlobURL.Delete(ctx, azblob.DeleteSnapshotsOptionInclude, azblob.BlobAccessConditions{})
+	if err != nil && isObjNotFoundErr(err) {
+		return chunk.ErrStorageObjectNotFound
+	}
 	return err
 }
 
@@ -269,7 +272,7 @@ func (b *BlobStorage) selectContainerURLFmt() string {
 	return endpoints[b.cfg.Environment].containerURLFmt
 }
 
-// isObjNotFoundErr returns true if error means that object is not found. Relevant to GetObject operations.
+// isObjNotFoundErr returns true if error means that object is not found. Relevant to GetObject and DeleteObject operations.
 func isObjNotFoundErr(err error) bool {
 	var e azblob.StorageError
 	if errors.As(err, &e) && e.ServiceCode() == azblob.ServiceCodeBlobNotFound {

From 128f86d99770eaf74aea39ea7fe5db37f2fe9c7f Mon Sep 17 00:00:00 2001
From: kamijin_fanta <kamijin@live.jp>
Date: Thu, 27 May 2021 18:54:41 +0900
Subject: [PATCH 657/660] Fixed Cache fetch error on Redis Cluster (#4056)

* chunks/cache: redis cluster support

Signed-off-by: kamijin_fanta <kamijin@live.jp>

* add redis client test

Signed-off-by: kamijin_fanta <kamijin@live.jp>

* update miniredis

Signed-off-by: kamijin_fanta <kamijin@live.jp>

* redis_client: changed the test to simple.

Signed-off-by: kamijin_fanta <kamijin@live.jp>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 cache/redis_cache_test.go  |   2 +-
 cache/redis_client.go      |  37 +++++++++++---
 cache/redis_client_test.go | 100 +++++++++++++++++++++++++++++++++++++
 3 files changed, 130 insertions(+), 9 deletions(-)
 create mode 100644 cache/redis_client_test.go

diff --git a/cache/redis_cache_test.go b/cache/redis_cache_test.go
index acccf96e067a7..59dc477a2ca15 100644
--- a/cache/redis_cache_test.go
+++ b/cache/redis_cache_test.go
@@ -5,7 +5,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/alicebob/miniredis"
+	"github.com/alicebob/miniredis/v2"
 	"github.com/go-kit/kit/log"
 	"github.com/go-redis/redis/v8"
 	"github.com/stretchr/testify/require"
diff --git a/cache/redis_client.go b/cache/redis_client.go
index dc8789e4139f3..acb2162ec61fe 100644
--- a/cache/redis_client.go
+++ b/cache/redis_client.go
@@ -110,17 +110,38 @@ func (c *RedisClient) MGet(ctx context.Context, keys []string) ([][]byte, error)
 		defer cancel()
 	}
 
-	cmd := c.rdb.MGet(ctx, keys...)
-	if err := cmd.Err(); err != nil {
-		return nil, err
-	}
-
 	ret := make([][]byte, len(keys))
-	for i, val := range cmd.Val() {
-		if val != nil {
-			ret[i] = StringToBytes(val.(string))
+
+	// redis.UniversalClient can take redis.Client and redis.ClusterClient.
+	// if redis.Client is set, then Single node or sentinel configuration. mget is always supported.
+	// if redis.ClusterClient is set, then Redis Cluster configuration. mget may not be supported.
+	_, isCluster := c.rdb.(*redis.ClusterClient)
+
+	if isCluster {
+		for i, key := range keys {
+			cmd := c.rdb.Get(ctx, key)
+			err := cmd.Err()
+			if err == redis.Nil {
+				// if key not found, response nil
+				continue
+			} else if err != nil {
+				return nil, err
+			}
+			ret[i] = StringToBytes(cmd.Val())
+		}
+	} else {
+		cmd := c.rdb.MGet(ctx, keys...)
+		if err := cmd.Err(); err != nil {
+			return nil, err
+		}
+
+		for i, val := range cmd.Val() {
+			if val != nil {
+				ret[i] = StringToBytes(val.(string))
+			}
 		}
 	}
+
 	return ret, nil
 }
 
diff --git a/cache/redis_client_test.go b/cache/redis_client_test.go
new file mode 100644
index 0000000000000..60b4e24835fcd
--- /dev/null
+++ b/cache/redis_client_test.go
@@ -0,0 +1,100 @@
+package cache
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/alicebob/miniredis/v2"
+	"github.com/go-redis/redis/v8"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRedisClient(t *testing.T) {
+	single, err := mockRedisClientSingle()
+	require.Nil(t, err)
+	defer single.Close()
+
+	cluster, err := mockRedisClientCluster()
+	require.Nil(t, err)
+	defer cluster.Close()
+
+	ctx := context.Background()
+
+	tests := []struct {
+		name   string
+		client *RedisClient
+	}{
+		{
+			name:   "single redis client",
+			client: single,
+		},
+		{
+			name:   "cluster redis client",
+			client: cluster,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			keys := []string{"key1", "key2", "key3"}
+			bufs := [][]byte{[]byte("data1"), []byte("data2"), []byte("data3")}
+			miss := []string{"miss1", "miss2"}
+
+			// set values
+			err := tt.client.MSet(ctx, keys, bufs)
+			require.Nil(t, err)
+
+			// get keys
+			values, err := tt.client.MGet(ctx, keys)
+			require.Nil(t, err)
+			require.Len(t, values, len(keys))
+			for i, value := range values {
+				require.Equal(t, values[i], value)
+			}
+
+			// get missing keys
+			values, err = tt.client.MGet(ctx, miss)
+			require.Nil(t, err)
+			require.Len(t, values, len(miss))
+			for _, value := range values {
+				require.Nil(t, value)
+			}
+		})
+	}
+}
+
+func mockRedisClientSingle() (*RedisClient, error) {
+	redisServer, err := miniredis.Run()
+	if err != nil {
+		return nil, err
+	}
+	return &RedisClient{
+		expiration: time.Minute,
+		timeout:    100 * time.Millisecond,
+		rdb: redis.NewClient(&redis.Options{
+			Addr: redisServer.Addr(),
+		}),
+	}, nil
+}
+
+func mockRedisClientCluster() (*RedisClient, error) {
+	redisServer1, err := miniredis.Run()
+	if err != nil {
+		return nil, err
+	}
+	redisServer2, err := miniredis.Run()
+	if err != nil {
+		return nil, err
+	}
+	return &RedisClient{
+		expiration: time.Minute,
+		timeout:    100 * time.Millisecond,
+		rdb: redis.NewClusterClient(&redis.ClusterOptions{
+			Addrs: []string{
+				redisServer1.Addr(),
+				redisServer2.Addr(),
+			},
+		}),
+	}, nil
+}

From 79c9f8fbe2a9d4457d8987176f627ae38ba1f856 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Thu, 27 May 2021 16:18:56 +0200
Subject: [PATCH 658/660] Add the ability to disable OpenCensus within GCS
 client. (#4219)

* Adds the ability to remove OpenCensus instrumentation on GCS.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Add the ability to disable OpenCensus within GCS client.

The rationale is that we're pretty biaised about Prometheus instrumentation here and it should be possible
to disable this if we don't want it at all.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

* Adds changelog entry.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>

Co-authored-by: Marco Pracucci <marco@pracucci.com>
---
 gcp/gcs_object_client.go | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gcp/gcs_object_client.go b/gcp/gcs_object_client.go
index 0c956e446adf0..8a064f8958c1c 100644
--- a/gcp/gcs_object_client.go
+++ b/gcp/gcs_object_client.go
@@ -8,6 +8,7 @@ import (
 
 	"cloud.google.com/go/storage"
 	"google.golang.org/api/iterator"
+	"google.golang.org/api/option"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/util"
@@ -21,9 +22,10 @@ type GCSObjectClient struct {
 
 // GCSConfig is config for the GCS Chunk Client.
 type GCSConfig struct {
-	BucketName      string        `yaml:"bucket_name"`
-	ChunkBufferSize int           `yaml:"chunk_buffer_size"`
-	RequestTimeout  time.Duration `yaml:"request_timeout"`
+	BucketName       string        `yaml:"bucket_name"`
+	ChunkBufferSize  int           `yaml:"chunk_buffer_size"`
+	RequestTimeout   time.Duration `yaml:"request_timeout"`
+	EnableOpenCensus bool          `yaml:"enable_opencensus"`
 }
 
 // RegisterFlags registers flags.
@@ -36,16 +38,22 @@ func (cfg *GCSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
 	f.StringVar(&cfg.BucketName, prefix+"gcs.bucketname", "", "Name of GCS bucket. Please refer to https://cloud.google.com/docs/authentication/production for more information about how to configure authentication.")
 	f.IntVar(&cfg.ChunkBufferSize, prefix+"gcs.chunk-buffer-size", 0, "The size of the buffer that GCS client for each PUT request. 0 to disable buffering.")
 	f.DurationVar(&cfg.RequestTimeout, prefix+"gcs.request-timeout", 0, "The duration after which the requests to GCS should be timed out.")
+	f.BoolVar(&cfg.EnableOpenCensus, prefix+"gcs.enable-opencensus", true, "Enabled OpenCensus (OC) instrumentation for all requests.")
 }
 
 // NewGCSObjectClient makes a new chunk.Client that writes chunks to GCS.
 func NewGCSObjectClient(ctx context.Context, cfg GCSConfig) (*GCSObjectClient, error) {
-	option, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
+	var opts []option.ClientOption
+	instrumentation, err := gcsInstrumentation(ctx, storage.ScopeReadWrite)
 	if err != nil {
 		return nil, err
 	}
+	opts = append(opts, instrumentation)
+	if !cfg.EnableOpenCensus {
+		opts = append(opts, option.WithTelemetryDisabled())
+	}
 
-	client, err := storage.NewClient(ctx, option)
+	client, err := storage.NewClient(ctx, opts...)
 	if err != nil {
 		return nil, err
 	}
@@ -85,7 +93,6 @@ func (s *GCSObjectClient) GetObject(ctx context.Context, objectKey string) (io.R
 
 func (s *GCSObjectClient) getObject(ctx context.Context, objectKey string) (rc io.ReadCloser, err error) {
 	reader, err := s.bucket.Object(objectKey).NewReader(ctx)
-
 	if err != nil {
 		if err == storage.ErrObjectNotExist {
 			return nil, chunk.ErrStorageObjectNotFound
@@ -165,7 +172,6 @@ func (s *GCSObjectClient) List(ctx context.Context, prefix, delimiter string) ([
 // key does not exist a generic chunk.ErrStorageObjectNotFound error is returned.
 func (s *GCSObjectClient) DeleteObject(ctx context.Context, objectKey string) error {
 	err := s.bucket.Object(objectKey).Delete(ctx)
-
 	if err != nil {
 		if err == storage.ErrObjectNotExist {
 			return chunk.ErrStorageObjectNotFound

From f668f8163d45091212bbabcf802387f9c3acce07 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Fri, 11 Jun 2021 17:03:57 +0200
Subject: [PATCH 659/660] Lint, protobuf generation and prometheus metrics.

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 .drone/drone.yml                              |    8 +-
 Makefile                                      |    2 +-
 cmd/migrate/main.go                           |    8 +-
 go.mod                                        |   23 +-
 go.sum                                        |    3 +-
 loki-build-image/Dockerfile                   |   12 +-
 pkg/chunkenc/facade.go                        |    2 +-
 pkg/chunkenc/memchunk.go                      |    2 +-
 pkg/chunkenc/memchunk_test.go                 |    3 +-
 pkg/ingester/checkpoint.pb.go                 |  245 +-
 pkg/ingester/checkpoint_test.go               |    2 +-
 pkg/ingester/flush.go                         |    3 +-
 pkg/ingester/flush_test.go                    |    2 +-
 pkg/ingester/ingester.go                      |    2 +-
 pkg/ingester/ingester_test.go                 |    2 +-
 pkg/ingester/recovery_test.go                 |    4 +-
 pkg/logcli/query/query.go                     |    4 +-
 pkg/logproto/logproto.pb.go                   | 1188 +++++----
 pkg/logproto/types.go                         |   89 +-
 pkg/logqlmodel/stats/stats.pb.go              |  302 +--
 pkg/loki/loki.go                              |    2 +-
 pkg/loki/modules.go                           |   12 +-
 pkg/loki/modules_test.go                      |    2 +-
 pkg/querier/querier_mock_test.go              |    2 +-
 pkg/querier/queryrange/limits_test.go         |    2 +-
 pkg/querier/queryrange/queryrange.pb.go       |  617 ++---
 pkg/querier/queryrange/querysharding.go       |   53 +-
 pkg/querier/queryrange/querysharding_test.go  |   15 +-
 pkg/querier/queryrange/roundtrip.go           |    4 +-
 pkg/querier/queryrange/roundtrip_test.go      |   10 +-
 pkg/storage/async_store.go                    |    4 +-
 pkg/storage/async_store_test.go               |    3 +-
 pkg/storage/batch.go                          |    2 +-
 pkg/storage/batch_test.go                     |    2 +-
 .../chunk/aws/dynamodb_index_reader.go        |    9 +-
 pkg/storage/chunk/aws/dynamodb_metrics.go     |   12 +-
 .../chunk/aws/dynamodb_storage_client.go      |   15 +-
 .../chunk/aws/dynamodb_storage_client_test.go |    2 +-
 .../chunk/aws/dynamodb_table_client.go        |    6 +-
 pkg/storage/chunk/aws/fixtures.go             |    8 +-
 pkg/storage/chunk/aws/metrics_autoscaling.go  |    4 +-
 .../chunk/aws/metrics_autoscaling_test.go     |   14 +-
 pkg/storage/chunk/aws/mock.go                 |    4 +
 pkg/storage/chunk/aws/s3_storage_client.go    |   22 +-
 pkg/storage/chunk/aws/sse_config.go           |    2 +-
 .../chunk/azure/blob_storage_client.go        |   10 +-
 pkg/storage/chunk/cache/background.go         |    4 +-
 pkg/storage/chunk/cache/background_test.go    |    2 +-
 pkg/storage/chunk/cache/cache_test.go         |    6 +-
 pkg/storage/chunk/cache/instrumented.go       |    8 +-
 pkg/storage/chunk/cache/memcached.go          |    3 +-
 pkg/storage/chunk/cache/memcached_client.go   |    4 +-
 .../cache/memcached_client_selector_test.go   |    2 +-
 pkg/storage/chunk/cache/memcached_test.go     |    2 +-
 pkg/storage/chunk/cache/tiered_test.go        |    2 +-
 pkg/storage/chunk/cassandra/fixtures.go       |    5 +-
 .../chunk/cassandra/instrumentation.go        |    2 +-
 pkg/storage/chunk/cassandra/storage_client.go |    5 +-
 .../chunk/cassandra/storage_client_test.go    |    2 +
 pkg/storage/chunk/cassandra/table_client.go   |    2 +-
 pkg/storage/chunk/chunk.go                    |    4 +-
 pkg/storage/chunk/chunk_store.go              |   13 +-
 pkg/storage/chunk/chunk_store_test.go         |   15 +-
 pkg/storage/chunk/chunk_store_utils.go        |    4 +-
 pkg/storage/chunk/chunk_test.go               |    4 +-
 pkg/storage/chunk/composite_store.go          |    2 +-
 pkg/storage/chunk/encoding/bigchunk.go        |    2 +-
 pkg/storage/chunk/encoding/chunk.go           |    6 +-
 pkg/storage/chunk/encoding/chunk_test.go      |   11 +-
 pkg/storage/chunk/encoding/instrumentation.go |   37 -
 .../chunk/gcp/bigtable_index_client.go        |    7 +-
 .../chunk/gcp/bigtable_object_client.go       |    5 +-
 pkg/storage/chunk/gcp/fixtures.go             |    6 +-
 pkg/storage/chunk/gcp/gcs_object_client.go    |    4 +-
 pkg/storage/chunk/gcp/instrumentation.go      |    4 +-
 pkg/storage/chunk/gcp/table_client.go         |    2 +-
 pkg/storage/chunk/grpc/grpc.pb.go             |  169 +-
 pkg/storage/chunk/grpc/grpc_client_test.go    |    9 +-
 .../chunk/grpc/grpc_server_mock_test.go       |   15 +-
 pkg/storage/chunk/grpc/index_client.go        |    4 +-
 pkg/storage/chunk/grpc/storage_client.go      |    2 +-
 pkg/storage/chunk/grpc/table_client.go        |    2 +-
 .../chunk/local/boltdb_index_client.go        |    6 +-
 .../chunk/local/boltdb_index_client_test.go   |    3 +-
 .../chunk/local/boltdb_table_client.go        |    3 +-
 pkg/storage/chunk/local/fixtures.go           |    6 +-
 pkg/storage/chunk/local/fs_object_client.go   |    5 +-
 .../chunk/local/fs_object_client_test.go      |    2 +-
 pkg/storage/chunk/objectclient/client.go      |    4 +-
 .../chunk/openstack/swift_object_client.go    |    3 +-
 pkg/storage/chunk/purger/delete_plan.pb.go    |   73 +-
 .../chunk/purger/delete_requests_store.go     |    8 +-
 pkg/storage/chunk/purger/purger.go            |   21 +-
 pkg/storage/chunk/purger/purger_test.go       |   68 +-
 pkg/storage/chunk/purger/request_handler.go   |    2 +-
 .../chunk/purger/table_provisioning.go        |    2 +-
 pkg/storage/chunk/purger/tombstones.go        |    4 +-
 pkg/storage/chunk/schema.go                   |    5 +-
 pkg/storage/chunk/schema_caching.go           |    2 +-
 pkg/storage/chunk/schema_config.go            |    2 +-
 pkg/storage/chunk/series_store.go             |   17 +-
 pkg/storage/chunk/storage/by_key_test.go      |    2 +-
 pkg/storage/chunk/storage/caching_fixtures.go |    8 +-
 .../chunk/storage/caching_index_client.go     |   32 +-
 .../chunk/storage/caching_index_client.pb.go  |   53 +-
 .../storage/caching_index_client_test.go      |    4 +-
 .../chunk/storage/chunk_client_test.go        |    4 +-
 pkg/storage/chunk/storage/factory.go          |   23 +-
 pkg/storage/chunk/storage/factory_test.go     |    7 +-
 .../chunk/storage/index_client_test.go        |    4 +-
 pkg/storage/chunk/storage/metrics.go          |   10 +-
 pkg/storage/chunk/storage/utils_test.go       |   12 +-
 pkg/storage/chunk/table_manager.go            |   13 +-
 pkg/storage/chunk/table_manager_test.go       |    3 +-
 pkg/storage/chunk/testutils/testutils.go      |    7 +-
 .../chunk/util/parallel_chunk_fetch.go        |    5 +-
 .../chunk/util/parallel_chunk_fetch_test.go   |    2 +-
 pkg/storage/chunk/util/util.go                |    7 +-
 pkg/storage/hack/main.go                      |    6 +-
 pkg/storage/lazy_chunk.go                     |    2 +-
 pkg/storage/lazy_chunk_test.go                |    4 +-
 pkg/storage/store.go                          |    8 +-
 pkg/storage/store_test.go                     |   12 +-
 .../stores/shipper/compactor/compactor.go     |   10 +-
 .../deletion/delete_requests_store.go         |   10 +-
 .../deletion/delete_requests_store_test.go    |    2 +-
 .../deletion/delete_requests_table.go         |    4 +-
 .../deletion/delete_requests_table_test.go    |    4 +-
 .../shipper/compactor/retention/index.go      |    2 +-
 .../shipper/compactor/retention/index_test.go |    2 +-
 .../shipper/compactor/retention/iterator.go   |    3 +-
 .../compactor/retention/iterator_test.go      |    3 +-
 .../shipper/compactor/retention/marker.go     |    2 +-
 .../shipper/compactor/retention/retention.go  |    2 +-
 .../compactor/retention/retention_test.go     |    4 +-
 .../shipper/compactor/retention/series.go     |    3 +-
 .../shipper/compactor/retention/util_test.go  |   18 +-
 pkg/storage/stores/shipper/compactor/table.go |    4 +-
 .../stores/shipper/compactor/table_test.go    |    2 +-
 pkg/storage/stores/shipper/downloads/table.go |    4 +-
 .../stores/shipper/downloads/table_manager.go |    4 +-
 .../shipper/downloads/table_manager_test.go   |    5 +-
 .../stores/shipper/downloads/table_test.go    |    4 +-
 .../stores/shipper/shipper_index_client.go    |    6 +-
 pkg/storage/stores/shipper/table_client.go    |    2 +-
 .../stores/shipper/table_client_test.go       |    7 +-
 .../stores/shipper/testutil/testutil.go       |    7 +-
 pkg/storage/stores/shipper/uploads/table.go   |    7 +-
 .../stores/shipper/uploads/table_manager.go   |    6 +-
 .../shipper/uploads/table_manager_test.go     |    4 +-
 .../stores/shipper/uploads/table_test.go      |    4 +-
 pkg/storage/stores/shipper/util/queries.go    |    5 +-
 .../stores/shipper/util/queries_test.go       |    6 +-
 pkg/storage/stores/shipper/util/util.go       |    5 +-
 pkg/storage/stores/shipper/util/util_test.go  |    4 +-
 pkg/storage/stores/util/object_client.go      |    3 +-
 pkg/storage/util_test.go                      |   19 +-
 pkg/util/server/error.go                      |    2 +-
 pkg/util/server/error_test.go                 |    2 +-
 .../github.com/alicebob/gopher-json/LICENSE   |   24 +
 .../github.com/alicebob/gopher-json/README.md |    7 +
 vendor/github.com/alicebob/gopher-json/doc.go |   33 +
 .../github.com/alicebob/gopher-json/json.go   |  189 ++
 .../alicebob/miniredis/v2/.gitignore          |    4 +
 .../alicebob/miniredis/v2/.travis.yml         |   16 +
 .../alicebob/miniredis/v2/CHANGELOG.md        |  127 +
 .../github.com/alicebob/miniredis/v2/LICENSE  |   21 +
 .../github.com/alicebob/miniredis/v2/Makefile |   12 +
 .../alicebob/miniredis/v2/README.md           |  324 +++
 .../github.com/alicebob/miniredis/v2/check.go |   63 +
 .../alicebob/miniredis/v2/cmd_cluster.go      |   66 +
 .../alicebob/miniredis/v2/cmd_command.go      | 2049 +++++++++++++++
 .../alicebob/miniredis/v2/cmd_connection.go   |  277 ++
 .../alicebob/miniredis/v2/cmd_generic.go      |  553 ++++
 .../alicebob/miniredis/v2/cmd_geo.go          |  601 +++++
 .../alicebob/miniredis/v2/cmd_hash.go         |  651 +++++
 .../alicebob/miniredis/v2/cmd_list.go         |  727 ++++++
 .../alicebob/miniredis/v2/cmd_pubsub.go       |  256 ++
 .../alicebob/miniredis/v2/cmd_scripting.go    |  265 ++
 .../alicebob/miniredis/v2/cmd_server.go       |  110 +
 .../alicebob/miniredis/v2/cmd_set.go          |  679 +++++
 .../alicebob/miniredis/v2/cmd_sorted_set.go   | 1465 +++++++++++
 .../alicebob/miniredis/v2/cmd_stream.go       |  625 +++++
 .../alicebob/miniredis/v2/cmd_string.go       | 1155 +++++++++
 .../alicebob/miniredis/v2/cmd_transactions.go |  179 ++
 vendor/github.com/alicebob/miniredis/v2/db.go |  816 ++++++
 .../alicebob/miniredis/v2/direct.go           |  745 ++++++
 .../github.com/alicebob/miniredis/v2/geo.go   |   48 +
 .../alicebob/miniredis/v2/geohash/LICENSE     |   22 +
 .../alicebob/miniredis/v2/geohash/README.md   |    2 +
 .../alicebob/miniredis/v2/geohash/base32.go   |   44 +
 .../alicebob/miniredis/v2/geohash/geohash.go  |  269 ++
 .../github.com/alicebob/miniredis/v2/go.mod   |    8 +
 .../github.com/alicebob/miniredis/v2/go.sum   |   10 +
 .../github.com/alicebob/miniredis/v2/keys.go  |   83 +
 .../github.com/alicebob/miniredis/v2/lua.go   |  224 ++
 .../alicebob/miniredis/v2/miniredis.go        |  601 +++++
 .../alicebob/miniredis/v2/pubsub.go           |  240 ++
 .../github.com/alicebob/miniredis/v2/redis.go |  209 ++
 .../alicebob/miniredis/v2/server/Makefile     |    9 +
 .../alicebob/miniredis/v2/server/proto.go     |  155 ++
 .../alicebob/miniredis/v2/server/server.go    |  472 ++++
 .../alicebob/miniredis/v2/sorted_set.go       |   98 +
 .../alicebob/miniredis/v2/stream.go           |  151 ++
 .../github.com/weaveworks/common/test/diff.go |   24 +
 vendor/github.com/yuin/gopher-lua/.travis.yml |   18 +
 vendor/github.com/yuin/gopher-lua/LICENSE     |   21 +
 vendor/github.com/yuin/gopher-lua/Makefile    |   10 +
 vendor/github.com/yuin/gopher-lua/README.rst  |  887 +++++++
 vendor/github.com/yuin/gopher-lua/_state.go   | 2081 +++++++++++++++
 vendor/github.com/yuin/gopher-lua/_vm.go      | 1033 ++++++++
 vendor/github.com/yuin/gopher-lua/alloc.go    |   79 +
 vendor/github.com/yuin/gopher-lua/ast/ast.go  |   29 +
 vendor/github.com/yuin/gopher-lua/ast/expr.go |  137 +
 vendor/github.com/yuin/gopher-lua/ast/misc.go |   17 +
 vendor/github.com/yuin/gopher-lua/ast/stmt.go |   95 +
 .../github.com/yuin/gopher-lua/ast/token.go   |   22 +
 vendor/github.com/yuin/gopher-lua/auxlib.go   |  460 ++++
 vendor/github.com/yuin/gopher-lua/baselib.go  |  592 +++++
 .../github.com/yuin/gopher-lua/channellib.go  |  184 ++
 vendor/github.com/yuin/gopher-lua/compile.go  | 1672 ++++++++++++
 vendor/github.com/yuin/gopher-lua/config.go   |   36 +
 .../yuin/gopher-lua/coroutinelib.go           |  112 +
 vendor/github.com/yuin/gopher-lua/debuglib.go |  173 ++
 vendor/github.com/yuin/gopher-lua/function.go |  193 ++
 vendor/github.com/yuin/gopher-lua/go.mod      |   10 +
 vendor/github.com/yuin/gopher-lua/go.sum      |    8 +
 vendor/github.com/yuin/gopher-lua/iolib.go    |  746 ++++++
 vendor/github.com/yuin/gopher-lua/linit.go    |   54 +
 vendor/github.com/yuin/gopher-lua/loadlib.go  |  125 +
 vendor/github.com/yuin/gopher-lua/mathlib.go  |  231 ++
 vendor/github.com/yuin/gopher-lua/opcode.go   |  371 +++
 vendor/github.com/yuin/gopher-lua/oslib.go    |  221 ++
 vendor/github.com/yuin/gopher-lua/package.go  |    7 +
 .../github.com/yuin/gopher-lua/parse/Makefile |    4 +
 .../github.com/yuin/gopher-lua/parse/lexer.go |  539 ++++
 .../yuin/gopher-lua/parse/parser.go           | 1137 +++++++++
 .../yuin/gopher-lua/parse/parser.go.y         |  524 ++++
 vendor/github.com/yuin/gopher-lua/pm/pm.go    |  638 +++++
 vendor/github.com/yuin/gopher-lua/state.go    | 2236 +++++++++++++++++
 .../github.com/yuin/gopher-lua/stringlib.go   |  448 ++++
 vendor/github.com/yuin/gopher-lua/table.go    |  387 +++
 vendor/github.com/yuin/gopher-lua/tablelib.go |  100 +
 vendor/github.com/yuin/gopher-lua/utils.go    |  265 ++
 vendor/github.com/yuin/gopher-lua/value.go    |  247 ++
 vendor/github.com/yuin/gopher-lua/vm.go       | 1718 +++++++++++++
 vendor/modules.txt                            |   35 +-
 247 files changed, 34673 insertions(+), 1702 deletions(-)
 create mode 100644 vendor/github.com/alicebob/gopher-json/LICENSE
 create mode 100644 vendor/github.com/alicebob/gopher-json/README.md
 create mode 100644 vendor/github.com/alicebob/gopher-json/doc.go
 create mode 100644 vendor/github.com/alicebob/gopher-json/json.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/.gitignore
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/.travis.yml
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/CHANGELOG.md
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/LICENSE
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/Makefile
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/README.md
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/check.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_cluster.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_command.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_connection.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_generic.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_geo.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_hash.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_list.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_pubsub.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_scripting.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_server.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_set.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_sorted_set.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_stream.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_string.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/cmd_transactions.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/db.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/direct.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/geo.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/geohash/LICENSE
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/geohash/README.md
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/geohash/base32.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/geohash/geohash.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/go.mod
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/go.sum
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/keys.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/lua.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/miniredis.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/pubsub.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/redis.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/server/Makefile
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/server/proto.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/server/server.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/sorted_set.go
 create mode 100644 vendor/github.com/alicebob/miniredis/v2/stream.go
 create mode 100644 vendor/github.com/weaveworks/common/test/diff.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/.travis.yml
 create mode 100644 vendor/github.com/yuin/gopher-lua/LICENSE
 create mode 100644 vendor/github.com/yuin/gopher-lua/Makefile
 create mode 100644 vendor/github.com/yuin/gopher-lua/README.rst
 create mode 100644 vendor/github.com/yuin/gopher-lua/_state.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/_vm.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/alloc.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/ast/ast.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/ast/expr.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/ast/misc.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/ast/stmt.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/ast/token.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/auxlib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/baselib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/channellib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/compile.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/config.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/coroutinelib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/debuglib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/function.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/go.mod
 create mode 100644 vendor/github.com/yuin/gopher-lua/go.sum
 create mode 100644 vendor/github.com/yuin/gopher-lua/iolib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/linit.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/loadlib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/mathlib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/opcode.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/oslib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/package.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/parse/Makefile
 create mode 100644 vendor/github.com/yuin/gopher-lua/parse/lexer.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/parse/parser.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/parse/parser.go.y
 create mode 100644 vendor/github.com/yuin/gopher-lua/pm/pm.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/state.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/stringlib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/table.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/tablelib.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/utils.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/value.go
 create mode 100644 vendor/github.com/yuin/gopher-lua/vm.go

diff --git a/.drone/drone.yml b/.drone/drone.yml
index 8ebdcea403824..069f0157fcf73 100644
--- a/.drone/drone.yml
+++ b/.drone/drone.yml
@@ -12,28 +12,28 @@ workspace:
 
 steps:
 - name: test
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.16.0
   commands:
   - make BUILD_IN_CONTAINER=false test
   depends_on:
   - clone
 
 - name: lint
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.16.0
   commands:
   - make BUILD_IN_CONTAINER=false lint
   depends_on:
   - clone
 
 - name: check-generated-files
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.16.0
   commands:
   - make BUILD_IN_CONTAINER=false check-generated-files
   depends_on:
   - clone
 
 - name: check-mod
-  image: grafana/loki-build-image:0.14.0
+  image: grafana/loki-build-image:0.16.0
   commands:
   - make BUILD_IN_CONTAINER=false check-mod
   depends_on:
diff --git a/Makefile b/Makefile
index 5d4f8f9b62cee..40fb8ee85b04a 100644
--- a/Makefile
+++ b/Makefile
@@ -38,7 +38,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES))
 # make BUILD_IN_CONTAINER=false target
 # or you can override this with an environment variable
 BUILD_IN_CONTAINER ?= true
-BUILD_IMAGE_VERSION := 0.14.0
+BUILD_IMAGE_VERSION := 0.16.0
 
 # Docker image info
 IMAGE_PREFIX ?= grafana
diff --git a/cmd/migrate/main.go b/cmd/migrate/main.go
index 2147bd0a81e03..39fd8dcf54588 100644
--- a/cmd/migrate/main.go
+++ b/cmd/migrate/main.go
@@ -12,18 +12,18 @@ import (
 	"sync"
 	"time"
 
-	cortex_storage "github.com/cortexproject/cortex/pkg/chunk/storage"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/weaveworks/common/user"
 
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/loki"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_storage "github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/util"
 	"github.com/grafana/loki/pkg/util/cfg"
 	"github.com/grafana/loki/pkg/validation"
@@ -90,7 +90,7 @@ func main() {
 	}
 	// Create a new registerer to avoid registering duplicate metrics
 	prometheus.DefaultRegisterer = prometheus.NewRegistry()
-	sourceStore, err := cortex_storage.NewStore(sourceConfig.StorageConfig.Config, sourceConfig.ChunkStoreConfig.StoreConfig, sourceConfig.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
+	sourceStore, err := chunk_storage.NewStore(sourceConfig.StorageConfig.Config, sourceConfig.ChunkStoreConfig.StoreConfig, sourceConfig.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
 	if err != nil {
 		log.Println("Failed to create source store:", err)
 		os.Exit(1)
@@ -103,7 +103,7 @@ func main() {
 
 	// Create a new registerer to avoid registering duplicate metrics
 	prometheus.DefaultRegisterer = prometheus.NewRegistry()
-	destStore, err := cortex_storage.NewStore(destConfig.StorageConfig.Config, destConfig.ChunkStoreConfig.StoreConfig, destConfig.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
+	destStore, err := chunk_storage.NewStore(destConfig.StorageConfig.Config, destConfig.ChunkStoreConfig.StoreConfig, destConfig.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
 	if err != nil {
 		log.Println("Failed to create destination store:", err)
 		os.Exit(1)
diff --git a/go.mod b/go.mod
index 01aee64c44506..c77e3c033d6ba 100644
--- a/go.mod
+++ b/go.mod
@@ -3,12 +3,20 @@ module github.com/grafana/loki
 go 1.15
 
 require (
+	cloud.google.com/go/bigtable v1.2.0
 	cloud.google.com/go/pubsub v1.3.1
+	cloud.google.com/go/storage v1.10.0
+	github.com/Azure/azure-pipeline-go v0.2.2
+	github.com/Azure/azure-storage-blob-go v0.8.0
 	github.com/Masterminds/sprig/v3 v3.2.2
 	github.com/NYTimes/gziphandler v1.1.1
+	github.com/alicebob/miniredis/v2 v2.14.3
 	github.com/aws/aws-lambda-go v1.17.0
+	github.com/aws/aws-sdk-go v1.38.35
 	github.com/bmatcuk/doublestar v1.2.2
+	github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b
 	github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee
+	github.com/cespare/xxhash v1.1.0
 	github.com/cespare/xxhash/v2 v2.1.1
 	github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448 // indirect
 	github.com/coreos/go-systemd v0.0.0-20191104093116-d3cd4ed1dbcf
@@ -19,14 +27,19 @@ require (
 	github.com/docker/go-plugins-helpers v0.0.0-20181025120712-1e6269c305b8
 	github.com/drone/envsubst v1.0.2
 	github.com/dustin/go-humanize v1.0.0
+	github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
 	github.com/fatih/color v1.9.0
 	github.com/felixge/fgprof v0.9.1
 	github.com/fluent/fluent-bit-go v0.0.0-20190925192703-ea13c021720c
+	github.com/fsouza/fake-gcs-server v1.7.0
 	github.com/go-kit/kit v0.10.0
 	github.com/go-logfmt/logfmt v0.5.0
+	github.com/go-redis/redis/v8 v8.2.3
+	github.com/gocql/gocql v0.0.0-20200526081602-cd04bd7f22a7
 	github.com/gofrs/flock v0.7.1 // indirect
 	github.com/gofrs/uuid v4.0.0+incompatible
 	github.com/gogo/protobuf v1.3.2 // remember to update loki-build-image/Dockerfile too
+	github.com/golang/protobuf v1.5.2
 	github.com/golang/snappy v0.0.3
 	github.com/gorilla/mux v1.7.3
 	github.com/gorilla/websocket v1.4.2
@@ -43,9 +56,13 @@ require (
 	github.com/json-iterator/go v1.1.11
 	github.com/klauspost/compress v1.11.3
 	github.com/klauspost/pgzip v1.2.5
+	github.com/minio/minio-go/v7 v7.0.10
 	github.com/mitchellh/mapstructure v1.4.1
 	github.com/modern-go/reflect2 v1.0.1
 	github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f
+	github.com/ncw/swift v1.0.52
+	github.com/oklog/ulid v1.3.1
+	github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e
 	github.com/opentracing/opentracing-go v1.2.0
 	// github.com/pierrec/lz4 v2.0.5+incompatible
 	github.com/pierrec/lz4/v4 v4.1.7
@@ -57,17 +74,21 @@ require (
 	github.com/segmentio/fasthash v1.0.2
 	github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749
 	github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546
+	github.com/sony/gobreaker v0.4.1
 	github.com/spf13/afero v1.2.2
 	github.com/stretchr/testify v1.7.0
+	github.com/thanos-io/thanos v0.19.1-0.20210427154226-d5bd651319d2
 	github.com/tonistiigi/fifo v0.0.0-20190226154929-a9fb20d87448
 	github.com/uber/jaeger-client-go v2.28.0+incompatible
 	github.com/ugorji/go v1.1.7 // indirect
-	github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624
+	github.com/weaveworks/common v0.0.0-20210506120931-f2676019da11
 	go.etcd.io/bbolt v1.3.5
 	go.uber.org/atomic v1.7.0
 	golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83
 	golang.org/x/net v0.0.0-20210505214959-0714010a04ed
+	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 	golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6
+	golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
 	google.golang.org/api v0.46.0
 	google.golang.org/grpc v1.37.0
 	gopkg.in/alecthomas/kingpin.v2 v2.2.6
diff --git a/go.sum b/go.sum
index 4d2d0f95490b2..e49c86a62a89f 100644
--- a/go.sum
+++ b/go.sum
@@ -1648,8 +1648,9 @@ github.com/weaveworks/common v0.0.0-20200625145055-4b1847531bc9/go.mod h1:c98fKi
 github.com/weaveworks/common v0.0.0-20200914083218-61ffdd448099/go.mod h1:hz10LOsAdzC3K/iXaKoFxOKTDRgxJl+BTGX1GY+TzO4=
 github.com/weaveworks/common v0.0.0-20201119133501-0619918236ec/go.mod h1:ykzWac1LtVfOxdCK+jD754at1Ws9dKCwFeUzkFBffPs=
 github.com/weaveworks/common v0.0.0-20210112142934-23c8d7fa6120/go.mod h1:ykzWac1LtVfOxdCK+jD754at1Ws9dKCwFeUzkFBffPs=
-github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624 h1:rbPhNKTbWNWchMqGWKKVYUocxiAk1ii5b8D/C49v/Lg=
 github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624/go.mod h1:ykzWac1LtVfOxdCK+jD754at1Ws9dKCwFeUzkFBffPs=
+github.com/weaveworks/common v0.0.0-20210506120931-f2676019da11 h1:plzdhjbaqstAnOYhaEwPMUFRe6KR4bag8/O6V+Y9ju4=
+github.com/weaveworks/common v0.0.0-20210506120931-f2676019da11/go.mod h1:YU9FvnS7kUnRt6HY10G+2qHkwzP3n3Vb1XsXDsJTSp8=
 github.com/weaveworks/promrus v1.2.0 h1:jOLf6pe6/vss4qGHjXmGz4oDJQA+AOCqEL3FvvZGz7M=
 github.com/weaveworks/promrus v1.2.0/go.mod h1:SaE82+OJ91yqjrE1rsvBWVzNZKcHYFtMUyS1+Ogs/KA=
 github.com/willf/bitset v1.1.3/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
diff --git a/loki-build-image/Dockerfile b/loki-build-image/Dockerfile
index eb96b56a46140..3661b7e04f659 100644
--- a/loki-build-image/Dockerfile
+++ b/loki-build-image/Dockerfile
@@ -26,7 +26,7 @@ RUN apk add --no-cache docker-cli
 # TODO this should be fixed to download and extract the specific release binary from github as we do for golangci and helm above
 # however we need a commit which hasn't been released yet: https://github.com/drone/drone-cli/commit/1fad337d74ca0ecf420993d9d2d7229a1c99f054
 # Read the comment below regarding GO111MODULE=on and why it is necessary
-FROM golang:1.16.2 as drone
+FROM golang:1.16.3 as drone
 RUN GO111MODULE=on go get github.com/drone/drone-cli/drone@1fad337d74ca0ecf420993d9d2d7229a1c99f054
 
 # Install faillint used to lint go imports in CI.
@@ -34,10 +34,10 @@ RUN GO111MODULE=on go get github.com/drone/drone-cli/drone@1fad337d74ca0ecf42099
 # Error:
 #	github.com/fatih/faillint@v1.5.0 requires golang.org/x/tools@v0.0.0-20200207224406-61798d64f025
 #   (not golang.org/x/tools@v0.0.0-20190918214920-58d531046acd from golang.org/x/tools/cmd/goyacc@58d531046acdc757f177387bc1725bfa79895d69)
-FROM golang:1.16.2 as faillint
+FROM golang:1.16.3 as faillint
 RUN GO111MODULE=on go get github.com/fatih/faillint@v1.5.0
 
-FROM golang:1.16.2-buster
+FROM golang:1.16.3-buster
 RUN apt-get update && \
     apt-get install -qy \
     musl gnupg \
@@ -59,9 +59,9 @@ COPY --from=faillint /go/bin/faillint /usr/bin/faillint
 # file for it to detect and switch to Go Modules automatically.
 # It's possible this can be revisited in newer versions of Go if the behavior around GOPATH vs GO111MODULES changes
 RUN GO111MODULE=on go get \
-    github.com/golang/protobuf/protoc-gen-go@v1.3.0 \
-    github.com/gogo/protobuf/protoc-gen-gogoslick@v1.2.1 \
-    github.com/gogo/protobuf/gogoproto@v1.2.1 \
+    github.com/golang/protobuf/protoc-gen-go@v1.3.1 \
+    github.com/gogo/protobuf/protoc-gen-gogoslick@v1.3.0 \
+    github.com/gogo/protobuf/gogoproto@v1.3.0 \
     github.com/go-delve/delve/cmd/dlv@v1.3.2 \
     # Due to the lack of a proper release tag, we use the commit hash of
     # https://github.com/golang/tools/releases v0.1.7
diff --git a/pkg/chunkenc/facade.go b/pkg/chunkenc/facade.go
index 24d73a9b81761..a2c3836b57f5e 100644
--- a/pkg/chunkenc/facade.go
+++ b/pkg/chunkenc/facade.go
@@ -5,7 +5,7 @@ import (
 
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 // GzipLogChunk is a cortex encoding type for our chunks.
diff --git a/pkg/chunkenc/memchunk.go b/pkg/chunkenc/memchunk.go
index 1c6ac2ee0ba02..2154cac0266b6 100644
--- a/pkg/chunkenc/memchunk.go
+++ b/pkg/chunkenc/memchunk.go
@@ -18,7 +18,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 
 	"github.com/grafana/loki/pkg/iter"
 	"github.com/grafana/loki/pkg/logproto"
diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go
index b5a254d68fba9..f158b8eba64a9 100644
--- a/pkg/chunkenc/memchunk_test.go
+++ b/pkg/chunkenc/memchunk_test.go
@@ -17,7 +17,7 @@ import (
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 
 	"github.com/grafana/loki/pkg/chunkenc/testdata"
 	"github.com/grafana/loki/pkg/iter"
@@ -1133,7 +1133,6 @@ func TestMemChunk_Rebound(t *testing.T) {
 
 				require.Equal(t, originalChunkItr.Entry(), newChunkItr.Entry())
 			}
-
 		})
 	}
 }
diff --git a/pkg/ingester/checkpoint.pb.go b/pkg/ingester/checkpoint.pb.go
index 4d8c8f7ac5b66..3f7166863676a 100644
--- a/pkg/ingester/checkpoint.pb.go
+++ b/pkg/ingester/checkpoint.pb.go
@@ -14,6 +14,7 @@ import (
 	github_com_gogo_protobuf_types "github.com/gogo/protobuf/types"
 	io "io"
 	math "math"
+	math_bits "math/bits"
 	reflect "reflect"
 	strings "strings"
 	time "time"
@@ -29,7 +30,7 @@ var _ = time.Kitchen
 // is compatible with the proto package it is being compiled against.
 // A compilation error at this line likely means your copy of the
 // proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 // Chunk is a {de,}serializable intermediate type for chunkDesc which allows
 // efficient loading/unloading to disk during WAL checkpoint recovery.
@@ -59,7 +60,7 @@ func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Chunk.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -159,7 +160,7 @@ func (m *Series) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Series.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -398,7 +399,7 @@ func valueToGoStringCheckpoint(v interface{}, typ string) string {
 func (m *Chunk) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -406,81 +407,88 @@ func (m *Chunk) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Chunk) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintCheckpoint(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.From)))
-	n1, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.From, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n1
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintCheckpoint(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.To)))
-	n2, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n2
-	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintCheckpoint(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.FlushedAt)))
-	n3, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.FlushedAt, dAtA[i:])
-	if err != nil {
-		return 0, err
+	if len(m.Head) > 0 {
+		i -= len(m.Head)
+		copy(dAtA[i:], m.Head)
+		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.Head)))
+		i--
+		dAtA[i] = 0x42
 	}
-	i += n3
-	dAtA[i] = 0x22
-	i++
-	i = encodeVarintCheckpoint(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.LastUpdated)))
-	n4, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.LastUpdated, dAtA[i:])
-	if err != nil {
-		return 0, err
+	if len(m.Data) > 0 {
+		i -= len(m.Data)
+		copy(dAtA[i:], m.Data)
+		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.Data)))
+		i--
+		dAtA[i] = 0x3a
 	}
-	i += n4
-	if m.Closed {
-		dAtA[i] = 0x28
-		i++
-		if m.Closed {
+	if m.Synced {
+		i--
+		if m.Synced {
 			dAtA[i] = 1
 		} else {
 			dAtA[i] = 0
 		}
-		i++
-	}
-	if m.Synced {
+		i--
 		dAtA[i] = 0x30
-		i++
-		if m.Synced {
+	}
+	if m.Closed {
+		i--
+		if m.Closed {
 			dAtA[i] = 1
 		} else {
 			dAtA[i] = 0
 		}
-		i++
+		i--
+		dAtA[i] = 0x28
 	}
-	if len(m.Data) > 0 {
-		dAtA[i] = 0x3a
-		i++
-		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.Data)))
-		i += copy(dAtA[i:], m.Data)
+	n1, err1 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.LastUpdated, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.LastUpdated):])
+	if err1 != nil {
+		return 0, err1
 	}
-	if len(m.Head) > 0 {
-		dAtA[i] = 0x42
-		i++
-		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.Head)))
-		i += copy(dAtA[i:], m.Head)
+	i -= n1
+	i = encodeVarintCheckpoint(dAtA, i, uint64(n1))
+	i--
+	dAtA[i] = 0x22
+	n2, err2 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.FlushedAt, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.FlushedAt):])
+	if err2 != nil {
+		return 0, err2
 	}
-	return i, nil
+	i -= n2
+	i = encodeVarintCheckpoint(dAtA, i, uint64(n2))
+	i--
+	dAtA[i] = 0x1a
+	n3, err3 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.To):])
+	if err3 != nil {
+		return 0, err3
+	}
+	i -= n3
+	i = encodeVarintCheckpoint(dAtA, i, uint64(n3))
+	i--
+	dAtA[i] = 0x12
+	n4, err4 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.From, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.From):])
+	if err4 != nil {
+		return 0, err4
+	}
+	i -= n4
+	i = encodeVarintCheckpoint(dAtA, i, uint64(n4))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *Series) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -488,70 +496,83 @@ func (m *Series) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Series) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Series) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.UserID) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.UserID)))
-		i += copy(dAtA[i:], m.UserID)
+	if len(m.LastLine) > 0 {
+		i -= len(m.LastLine)
+		copy(dAtA[i:], m.LastLine)
+		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.LastLine)))
+		i--
+		dAtA[i] = 0x32
 	}
-	if m.Fingerprint != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintCheckpoint(dAtA, i, uint64(m.Fingerprint))
+	n5, err5 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.To):])
+	if err5 != nil {
+		return 0, err5
 	}
-	if len(m.Labels) > 0 {
-		for _, msg := range m.Labels {
-			dAtA[i] = 0x1a
-			i++
-			i = encodeVarintCheckpoint(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	i -= n5
+	i = encodeVarintCheckpoint(dAtA, i, uint64(n5))
+	i--
+	dAtA[i] = 0x2a
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintCheckpoint(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x22
 		}
 	}
-	if len(m.Chunks) > 0 {
-		for _, msg := range m.Chunks {
-			dAtA[i] = 0x22
-			i++
-			i = encodeVarintCheckpoint(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	if len(m.Labels) > 0 {
+		for iNdEx := len(m.Labels) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Labels[iNdEx].Size()
+				i -= size
+				if _, err := m.Labels[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintCheckpoint(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x1a
 		}
 	}
-	dAtA[i] = 0x2a
-	i++
-	i = encodeVarintCheckpoint(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.To)))
-	n5, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i:])
-	if err != nil {
-		return 0, err
+	if m.Fingerprint != 0 {
+		i = encodeVarintCheckpoint(dAtA, i, uint64(m.Fingerprint))
+		i--
+		dAtA[i] = 0x10
 	}
-	i += n5
-	if len(m.LastLine) > 0 {
-		dAtA[i] = 0x32
-		i++
-		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.LastLine)))
-		i += copy(dAtA[i:], m.LastLine)
+	if len(m.UserID) > 0 {
+		i -= len(m.UserID)
+		copy(dAtA[i:], m.UserID)
+		i = encodeVarintCheckpoint(dAtA, i, uint64(len(m.UserID)))
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func encodeVarintCheckpoint(dAtA []byte, offset int, v uint64) int {
+	offset -= sovCheckpoint(v)
+	base := offset
 	for v >= 1<<7 {
 		dAtA[offset] = uint8(v&0x7f | 0x80)
 		v >>= 7
 		offset++
 	}
 	dAtA[offset] = uint8(v)
-	return offset + 1
+	return base
 }
 func (m *Chunk) Size() (n int) {
 	if m == nil {
@@ -619,14 +640,7 @@ func (m *Series) Size() (n int) {
 }
 
 func sovCheckpoint(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
+	return (math_bits.Len64(x|1) + 6) / 7
 }
 func sozCheckpoint(x uint64) (n int) {
 	return sovCheckpoint(uint64((x << 1) ^ uint64((int64(x) >> 63))))
@@ -636,10 +650,10 @@ func (this *Chunk) String() string {
 		return "nil"
 	}
 	s := strings.Join([]string{`&Chunk{`,
-		`From:` + strings.Replace(strings.Replace(this.From.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`To:` + strings.Replace(strings.Replace(this.To.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`FlushedAt:` + strings.Replace(strings.Replace(this.FlushedAt.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`LastUpdated:` + strings.Replace(strings.Replace(this.LastUpdated.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`From:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.From), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`To:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.To), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`FlushedAt:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.FlushedAt), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`LastUpdated:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.LastUpdated), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Closed:` + fmt.Sprintf("%v", this.Closed) + `,`,
 		`Synced:` + fmt.Sprintf("%v", this.Synced) + `,`,
 		`Data:` + fmt.Sprintf("%v", this.Data) + `,`,
@@ -652,12 +666,17 @@ func (this *Series) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForChunks := "[]Chunk{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(strings.Replace(f.String(), "Chunk", "Chunk", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForChunks += "}"
 	s := strings.Join([]string{`&Series{`,
 		`UserID:` + fmt.Sprintf("%v", this.UserID) + `,`,
 		`Fingerprint:` + fmt.Sprintf("%v", this.Fingerprint) + `,`,
 		`Labels:` + fmt.Sprintf("%v", this.Labels) + `,`,
-		`Chunks:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Chunks), "Chunk", "Chunk", 1), `&`, ``, 1) + `,`,
-		`To:` + strings.Replace(strings.Replace(this.To.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Chunks:` + repeatedStringForChunks + `,`,
+		`To:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.To), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`LastLine:` + fmt.Sprintf("%v", this.LastLine) + `,`,
 		`}`,
 	}, "")
diff --git a/pkg/ingester/checkpoint_test.go b/pkg/ingester/checkpoint_test.go
index e5b1519a9915d..4a01f7390dfab 100644
--- a/pkg/ingester/checkpoint_test.go
+++ b/pkg/ingester/checkpoint_test.go
@@ -9,7 +9,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/cortexpb"
 	"github.com/cortexproject/cortex/pkg/util/services"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -22,6 +21,7 @@ import (
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql/log"
 	"github.com/grafana/loki/pkg/runtime"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/validation"
 )
 
diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go
index 883690cbcca08..cd6e0fb86622e 100644
--- a/pkg/ingester/flush.go
+++ b/pkg/ingester/flush.go
@@ -16,11 +16,11 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 
 	"github.com/grafana/loki/pkg/chunkenc"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	loki_util "github.com/grafana/loki/pkg/util"
 )
 
@@ -134,7 +134,6 @@ func (i *Ingester) flush(mayRemoveStreams bool) {
 
 	i.flushQueuesDone.Wait()
 	level.Debug(util_log.Logger).Log("msg", "flush queues have drained")
-
 }
 
 // FlushHandler triggers a flush of all in memory chunks.  Mainly used for
diff --git a/pkg/ingester/flush_test.go b/pkg/ingester/flush_test.go
index 3719e9988499b..e46e71645962a 100644
--- a/pkg/ingester/flush_test.go
+++ b/pkg/ingester/flush_test.go
@@ -16,7 +16,6 @@ import (
 	"github.com/grafana/loki/pkg/storage"
 	"github.com/grafana/loki/pkg/validation"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ring"
 	"github.com/cortexproject/cortex/pkg/ring/kv"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
@@ -32,6 +31,7 @@ import (
 	"github.com/grafana/loki/pkg/ingester/client"
 	"github.com/grafana/loki/pkg/iter"
 	"github.com/grafana/loki/pkg/logproto"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const (
diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go
index ff80d96731c3d..fa3ec4c264c5c 100644
--- a/pkg/ingester/ingester.go
+++ b/pkg/ingester/ingester.go
@@ -9,7 +9,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ring"
 	"github.com/cortexproject/cortex/pkg/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
@@ -31,6 +30,7 @@ import (
 	"github.com/grafana/loki/pkg/logqlmodel/stats"
 	"github.com/grafana/loki/pkg/runtime"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 	errUtil "github.com/grafana/loki/pkg/util"
 	listutil "github.com/grafana/loki/pkg/util"
diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go
index 860eb1ae89561..cc1f660f5bd08 100644
--- a/pkg/ingester/ingester_test.go
+++ b/pkg/ingester/ingester_test.go
@@ -10,7 +10,6 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/services"
 	"github.com/stretchr/testify/require"
@@ -27,6 +26,7 @@ import (
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/runtime"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/validation"
 )
 
diff --git a/pkg/ingester/recovery_test.go b/pkg/ingester/recovery_test.go
index 8448294c5684c..5bb9dd1098d00 100644
--- a/pkg/ingester/recovery_test.go
+++ b/pkg/ingester/recovery_test.go
@@ -8,7 +8,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/tsdb/record"
@@ -18,6 +17,7 @@ import (
 	"github.com/grafana/loki/pkg/ingester/client"
 	"github.com/grafana/loki/pkg/logproto"
 	loki_runtime "github.com/grafana/loki/pkg/runtime"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/validation"
 )
 
@@ -95,7 +95,6 @@ func buildMemoryReader(users, totalStreams, entriesPerStream int) (*MemoryWALRea
 	}
 
 	return reader, recs
-
 }
 
 type MemRecoverer struct {
@@ -194,7 +193,6 @@ func Test_InMemorySegmentRecover(t *testing.T) {
 			}
 		}
 	}
-
 }
 
 func TestSeriesRecoveryNoDuplicates(t *testing.T) {
diff --git a/pkg/logcli/query/query.go b/pkg/logcli/query/query.go
index fc29630260da2..ff05861a6f73d 100644
--- a/pkg/logcli/query/query.go
+++ b/pkg/logcli/query/query.go
@@ -12,7 +12,6 @@ import (
 	"text/tabwriter"
 	"time"
 
-	cortex_storage "github.com/cortexproject/cortex/pkg/chunk/storage"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/fatih/color"
 	json "github.com/json-iterator/go"
@@ -28,6 +27,7 @@ import (
 	"github.com/grafana/loki/pkg/logqlmodel/stats"
 	"github.com/grafana/loki/pkg/loki"
 	"github.com/grafana/loki/pkg/storage"
+	chunk_storage "github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/util/cfg"
 	"github.com/grafana/loki/pkg/util/marshal"
 	"github.com/grafana/loki/pkg/validation"
@@ -189,7 +189,7 @@ func (q *Query) DoLocalQuery(out output.LogOutput, statistics bool, orgID string
 		return err
 	}
 
-	chunkStore, err := cortex_storage.NewStore(conf.StorageConfig.Config, conf.ChunkStoreConfig.StoreConfig, conf.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
+	chunkStore, err := chunk_storage.NewStore(conf.StorageConfig.Config, conf.ChunkStoreConfig.StoreConfig, conf.SchemaConfig.SchemaConfig, limits, prometheus.DefaultRegisterer, nil, util_log.Logger)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/logproto/logproto.pb.go b/pkg/logproto/logproto.pb.go
index b1acaa0d5d59a..cb906ad9bfeac 100644
--- a/pkg/logproto/logproto.pb.go
+++ b/pkg/logproto/logproto.pb.go
@@ -14,8 +14,11 @@ import (
 	_ "github.com/gogo/protobuf/types"
 	github_com_gogo_protobuf_types "github.com/gogo/protobuf/types"
 	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
 	io "io"
 	math "math"
+	math_bits "math/bits"
 	reflect "reflect"
 	strconv "strconv"
 	strings "strings"
@@ -32,7 +35,7 @@ var _ = time.Kitchen
 // is compatible with the proto package it is being compiled against.
 // A compilation error at this line likely means your copy of the
 // proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 type Direction int32
 
@@ -72,7 +75,7 @@ func (m *PushRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_PushRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -107,7 +110,7 @@ func (m *PushResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_PushResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -148,7 +151,7 @@ func (m *QueryRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_QueryRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -229,7 +232,7 @@ func (m *SampleQueryRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_SampleQueryRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -293,7 +296,7 @@ func (m *SampleQueryResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_SampleQueryResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -329,7 +332,7 @@ func (m *QueryResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error
 		return xxx_messageInfo_QueryResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -368,7 +371,7 @@ func (m *LabelRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_LabelRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -432,7 +435,7 @@ func (m *LabelResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error
 		return xxx_messageInfo_LabelResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -476,7 +479,7 @@ func (m *StreamAdapter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error
 		return xxx_messageInfo_StreamAdapter.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -527,7 +530,7 @@ func (m *EntryAdapter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_EntryAdapter.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -579,7 +582,7 @@ func (m *Sample) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Sample.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -637,7 +640,7 @@ func (m *Series) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Series.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -690,7 +693,7 @@ func (m *TailRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_TailRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -755,7 +758,7 @@ func (m *TailResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_TailResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -800,7 +803,7 @@ func (m *SeriesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error
 		return xxx_messageInfo_SeriesRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -857,7 +860,7 @@ func (m *SeriesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, erro
 		return xxx_messageInfo_SeriesResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -900,7 +903,7 @@ func (m *SeriesIdentifier) XXX_Marshal(b []byte, deterministic bool) ([]byte, er
 		return xxx_messageInfo_SeriesIdentifier.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -945,7 +948,7 @@ func (m *DroppedStream) XXX_Marshal(b []byte, deterministic bool) ([]byte, error
 		return xxx_messageInfo_DroppedStream.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1005,7 +1008,7 @@ func (m *TimeSeriesChunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, err
 		return xxx_messageInfo_TimeSeriesChunk.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1070,7 +1073,7 @@ func (m *LabelPair) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_LabelPair.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1120,7 +1123,7 @@ func (m *Chunk) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Chunk.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1162,7 +1165,7 @@ func (m *TransferChunksResponse) XXX_Marshal(b []byte, deterministic bool) ([]by
 		return xxx_messageInfo_TransferChunksResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1197,7 +1200,7 @@ func (m *TailersCountRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_TailersCountRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1233,7 +1236,7 @@ func (m *TailersCountResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte
 		return xxx_messageInfo_TailersCountResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1278,7 +1281,7 @@ func (m *GetChunkIDsRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_GetChunkIDsRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1335,7 +1338,7 @@ func (m *GetChunkIDsResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_GetChunkIDsResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -2652,6 +2655,14 @@ type PusherServer interface {
 	Push(context.Context, *PushRequest) (*PushResponse, error)
 }
 
+// UnimplementedPusherServer can be embedded to have forward compatible implementations.
+type UnimplementedPusherServer struct {
+}
+
+func (*UnimplementedPusherServer) Push(ctx context.Context, req *PushRequest) (*PushResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method Push not implemented")
+}
+
 func RegisterPusherServer(s *grpc.Server, srv PusherServer) {
 	s.RegisterService(&_Pusher_serviceDesc, srv)
 }
@@ -2851,6 +2862,32 @@ type QuerierServer interface {
 	GetChunkIDs(context.Context, *GetChunkIDsRequest) (*GetChunkIDsResponse, error)
 }
 
+// UnimplementedQuerierServer can be embedded to have forward compatible implementations.
+type UnimplementedQuerierServer struct {
+}
+
+func (*UnimplementedQuerierServer) Query(req *QueryRequest, srv Querier_QueryServer) error {
+	return status.Errorf(codes.Unimplemented, "method Query not implemented")
+}
+func (*UnimplementedQuerierServer) QuerySample(req *SampleQueryRequest, srv Querier_QuerySampleServer) error {
+	return status.Errorf(codes.Unimplemented, "method QuerySample not implemented")
+}
+func (*UnimplementedQuerierServer) Label(ctx context.Context, req *LabelRequest) (*LabelResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method Label not implemented")
+}
+func (*UnimplementedQuerierServer) Tail(req *TailRequest, srv Querier_TailServer) error {
+	return status.Errorf(codes.Unimplemented, "method Tail not implemented")
+}
+func (*UnimplementedQuerierServer) Series(ctx context.Context, req *SeriesRequest) (*SeriesResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method Series not implemented")
+}
+func (*UnimplementedQuerierServer) TailersCount(ctx context.Context, req *TailersCountRequest) (*TailersCountResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method TailersCount not implemented")
+}
+func (*UnimplementedQuerierServer) GetChunkIDs(ctx context.Context, req *GetChunkIDsRequest) (*GetChunkIDsResponse, error) {
+	return nil, status.Errorf(codes.Unimplemented, "method GetChunkIDs not implemented")
+}
+
 func RegisterQuerierServer(s *grpc.Server, srv QuerierServer) {
 	s.RegisterService(&_Querier_serviceDesc, srv)
 }
@@ -3085,6 +3122,14 @@ type IngesterServer interface {
 	TransferChunks(Ingester_TransferChunksServer) error
 }
 
+// UnimplementedIngesterServer can be embedded to have forward compatible implementations.
+type UnimplementedIngesterServer struct {
+}
+
+func (*UnimplementedIngesterServer) TransferChunks(srv Ingester_TransferChunksServer) error {
+	return status.Errorf(codes.Unimplemented, "method TransferChunks not implemented")
+}
+
 func RegisterIngesterServer(s *grpc.Server, srv IngesterServer) {
 	s.RegisterService(&_Ingester_serviceDesc, srv)
 }
@@ -3132,7 +3177,7 @@ var _Ingester_serviceDesc = grpc.ServiceDesc{
 func (m *PushRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3140,29 +3185,36 @@ func (m *PushRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *PushRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *PushRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Streams) > 0 {
-		for _, msg := range m.Streams {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Streams) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Streams[iNdEx].Size()
+				i -= size
+				if _, err := m.Streams[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0xa
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *PushResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3170,17 +3222,22 @@ func (m *PushResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *PushResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *PushResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *QueryRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3188,64 +3245,64 @@ func (m *QueryRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *QueryRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *QueryRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Selector) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Selector)))
-		i += copy(dAtA[i:], m.Selector)
+	if len(m.Shards) > 0 {
+		for iNdEx := len(m.Shards) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Shards[iNdEx])
+			copy(dAtA[i:], m.Shards[iNdEx])
+			i = encodeVarintLogproto(dAtA, i, uint64(len(m.Shards[iNdEx])))
+			i--
+			dAtA[i] = 0x3a
+		}
 	}
-	if m.Limit != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.Limit))
+	if m.Direction != 0 {
+		i = encodeVarintLogproto(dAtA, i, uint64(m.Direction))
+		i--
+		dAtA[i] = 0x28
 	}
-	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Start)))
-	n1, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n1, err1 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.End):])
+	if err1 != nil {
+		return 0, err1
 	}
-	i += n1
+	i -= n1
+	i = encodeVarintLogproto(dAtA, i, uint64(n1))
+	i--
 	dAtA[i] = 0x22
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.End)))
-	n2, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n2, err2 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Start):])
+	if err2 != nil {
+		return 0, err2
 	}
-	i += n2
-	if m.Direction != 0 {
-		dAtA[i] = 0x28
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.Direction))
+	i -= n2
+	i = encodeVarintLogproto(dAtA, i, uint64(n2))
+	i--
+	dAtA[i] = 0x1a
+	if m.Limit != 0 {
+		i = encodeVarintLogproto(dAtA, i, uint64(m.Limit))
+		i--
+		dAtA[i] = 0x10
 	}
-	if len(m.Shards) > 0 {
-		for _, s := range m.Shards {
-			dAtA[i] = 0x3a
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
-		}
+	if len(m.Selector) > 0 {
+		i -= len(m.Selector)
+		copy(dAtA[i:], m.Selector)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Selector)))
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *SampleQueryRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3253,54 +3310,54 @@ func (m *SampleQueryRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *SampleQueryRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *SampleQueryRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Selector) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Selector)))
-		i += copy(dAtA[i:], m.Selector)
+	if len(m.Shards) > 0 {
+		for iNdEx := len(m.Shards) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Shards[iNdEx])
+			copy(dAtA[i:], m.Shards[iNdEx])
+			i = encodeVarintLogproto(dAtA, i, uint64(len(m.Shards[iNdEx])))
+			i--
+			dAtA[i] = 0x22
+		}
 	}
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Start)))
-	n3, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n3, err3 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.End):])
+	if err3 != nil {
+		return 0, err3
 	}
-	i += n3
+	i -= n3
+	i = encodeVarintLogproto(dAtA, i, uint64(n3))
+	i--
 	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.End)))
-	n4, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n4, err4 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Start):])
+	if err4 != nil {
+		return 0, err4
 	}
-	i += n4
-	if len(m.Shards) > 0 {
-		for _, s := range m.Shards {
-			dAtA[i] = 0x22
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
-		}
+	i -= n4
+	i = encodeVarintLogproto(dAtA, i, uint64(n4))
+	i--
+	dAtA[i] = 0x12
+	if len(m.Selector) > 0 {
+		i -= len(m.Selector)
+		copy(dAtA[i:], m.Selector)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Selector)))
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *SampleQueryResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3308,29 +3365,36 @@ func (m *SampleQueryResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *SampleQueryResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *SampleQueryResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Series) > 0 {
-		for _, msg := range m.Series {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Series) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Series[iNdEx].Size()
+				i -= size
+				if _, err := m.Series[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0xa
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *QueryResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3338,29 +3402,36 @@ func (m *QueryResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *QueryResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *QueryResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Streams) > 0 {
-		for _, msg := range m.Streams {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Streams) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Streams[iNdEx].Size()
+				i -= size
+				if _, err := m.Streams[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0xa
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *LabelRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3368,53 +3439,59 @@ func (m *LabelRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LabelRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LabelRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Name) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Name)))
-		i += copy(dAtA[i:], m.Name)
+	if m.End != nil {
+		n5, err5 := github_com_gogo_protobuf_types.StdTimeMarshalTo(*m.End, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(*m.End):])
+		if err5 != nil {
+			return 0, err5
+		}
+		i -= n5
+		i = encodeVarintLogproto(dAtA, i, uint64(n5))
+		i--
+		dAtA[i] = 0x22
+	}
+	if m.Start != nil {
+		n6, err6 := github_com_gogo_protobuf_types.StdTimeMarshalTo(*m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(*m.Start):])
+		if err6 != nil {
+			return 0, err6
+		}
+		i -= n6
+		i = encodeVarintLogproto(dAtA, i, uint64(n6))
+		i--
+		dAtA[i] = 0x1a
 	}
 	if m.Values {
-		dAtA[i] = 0x10
-		i++
+		i--
 		if m.Values {
 			dAtA[i] = 1
 		} else {
 			dAtA[i] = 0
 		}
-		i++
-	}
-	if m.Start != nil {
-		dAtA[i] = 0x1a
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(*m.Start)))
-		n5, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(*m.Start, dAtA[i:])
-		if err != nil {
-			return 0, err
-		}
-		i += n5
+		i--
+		dAtA[i] = 0x10
 	}
-	if m.End != nil {
-		dAtA[i] = 0x22
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(*m.End)))
-		n6, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(*m.End, dAtA[i:])
-		if err != nil {
-			return 0, err
-		}
-		i += n6
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *LabelResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3422,32 +3499,31 @@ func (m *LabelResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LabelResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LabelResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Values) > 0 {
-		for _, s := range m.Values {
+		for iNdEx := len(m.Values) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Values[iNdEx])
+			copy(dAtA[i:], m.Values[iNdEx])
+			i = encodeVarintLogproto(dAtA, i, uint64(len(m.Values[iNdEx])))
+			i--
 			dAtA[i] = 0xa
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *StreamAdapter) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3455,35 +3531,43 @@ func (m *StreamAdapter) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *StreamAdapter) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *StreamAdapter) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Labels) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
-		i += copy(dAtA[i:], m.Labels)
-	}
 	if len(m.Entries) > 0 {
-		for _, msg := range m.Entries {
-			dAtA[i] = 0x12
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Entries) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Entries[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.Labels) > 0 {
+		i -= len(m.Labels)
+		copy(dAtA[i:], m.Labels)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *EntryAdapter) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3491,31 +3575,37 @@ func (m *EntryAdapter) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *EntryAdapter) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *EntryAdapter) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Timestamp)))
-	n7, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Timestamp, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n7
 	if len(m.Line) > 0 {
-		dAtA[i] = 0x12
-		i++
+		i -= len(m.Line)
+		copy(dAtA[i:], m.Line)
 		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Line)))
-		i += copy(dAtA[i:], m.Line)
+		i--
+		dAtA[i] = 0x12
+	}
+	n7, err7 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Timestamp, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Timestamp):])
+	if err7 != nil {
+		return 0, err7
 	}
-	return i, nil
+	i -= n7
+	i = encodeVarintLogproto(dAtA, i, uint64(n7))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *Sample) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3523,33 +3613,38 @@ func (m *Sample) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Sample) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Sample) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.Timestamp != 0 {
-		dAtA[i] = 0x8
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.Timestamp))
+	if m.Hash != 0 {
+		i = encodeVarintLogproto(dAtA, i, uint64(m.Hash))
+		i--
+		dAtA[i] = 0x18
 	}
 	if m.Value != 0 {
-		dAtA[i] = 0x11
-		i++
+		i -= 8
 		encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.Value))))
-		i += 8
+		i--
+		dAtA[i] = 0x11
 	}
-	if m.Hash != 0 {
-		dAtA[i] = 0x18
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.Hash))
+	if m.Timestamp != 0 {
+		i = encodeVarintLogproto(dAtA, i, uint64(m.Timestamp))
+		i--
+		dAtA[i] = 0x8
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *Series) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3557,35 +3652,43 @@ func (m *Series) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Series) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Series) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Labels) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
-		i += copy(dAtA[i:], m.Labels)
-	}
 	if len(m.Samples) > 0 {
-		for _, msg := range m.Samples {
-			dAtA[i] = 0x12
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Samples) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Samples[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.Labels) > 0 {
+		i -= len(m.Labels)
+		copy(dAtA[i:], m.Labels)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *TailRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3593,41 +3696,47 @@ func (m *TailRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TailRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TailRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Query) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Query)))
-		i += copy(dAtA[i:], m.Query)
-	}
-	if m.DelayFor != 0 {
-		dAtA[i] = 0x18
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.DelayFor))
+	n8, err8 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Start):])
+	if err8 != nil {
+		return 0, err8
 	}
+	i -= n8
+	i = encodeVarintLogproto(dAtA, i, uint64(n8))
+	i--
+	dAtA[i] = 0x2a
 	if m.Limit != 0 {
-		dAtA[i] = 0x20
-		i++
 		i = encodeVarintLogproto(dAtA, i, uint64(m.Limit))
+		i--
+		dAtA[i] = 0x20
 	}
-	dAtA[i] = 0x2a
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Start)))
-	n8, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i:])
-	if err != nil {
-		return 0, err
+	if m.DelayFor != 0 {
+		i = encodeVarintLogproto(dAtA, i, uint64(m.DelayFor))
+		i--
+		dAtA[i] = 0x18
 	}
-	i += n8
-	return i, nil
+	if len(m.Query) > 0 {
+		i -= len(m.Query)
+		copy(dAtA[i:], m.Query)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Query)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *TailResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3635,39 +3744,48 @@ func (m *TailResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TailResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TailResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.Stream != nil {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(m.Stream.Size()))
-		n9, err := m.Stream.MarshalTo(dAtA[i:])
-		if err != nil {
-			return 0, err
-		}
-		i += n9
-	}
 	if len(m.DroppedStreams) > 0 {
-		for _, msg := range m.DroppedStreams {
+		for iNdEx := len(m.DroppedStreams) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.DroppedStreams[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
+			}
+			i--
 			dAtA[i] = 0x12
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
+		}
+	}
+	if m.Stream != nil {
+		{
+			size := m.Stream.Size()
+			i -= size
+			if _, err := m.Stream.MarshalTo(dAtA[i:]); err != nil {
 				return 0, err
 			}
-			i += n
+			i = encodeVarintLogproto(dAtA, i, uint64(size))
 		}
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *SeriesRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3675,48 +3793,47 @@ func (m *SeriesRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *SeriesRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *SeriesRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Start)))
-	n10, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n10
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.End)))
-	n11, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n11
 	if len(m.Groups) > 0 {
-		for _, s := range m.Groups {
+		for iNdEx := len(m.Groups) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Groups[iNdEx])
+			copy(dAtA[i:], m.Groups[iNdEx])
+			i = encodeVarintLogproto(dAtA, i, uint64(len(m.Groups[iNdEx])))
+			i--
 			dAtA[i] = 0x1a
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
 		}
 	}
-	return i, nil
+	n10, err10 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.End):])
+	if err10 != nil {
+		return 0, err10
+	}
+	i -= n10
+	i = encodeVarintLogproto(dAtA, i, uint64(n10))
+	i--
+	dAtA[i] = 0x12
+	n11, err11 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Start):])
+	if err11 != nil {
+		return 0, err11
+	}
+	i -= n11
+	i = encodeVarintLogproto(dAtA, i, uint64(n11))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *SeriesResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3724,29 +3841,36 @@ func (m *SeriesResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *SeriesResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *SeriesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Series) > 0 {
-		for _, msg := range m.Series {
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Series) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Series[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0xa
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *SeriesIdentifier) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3754,34 +3878,41 @@ func (m *SeriesIdentifier) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *SeriesIdentifier) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *SeriesIdentifier) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Labels) > 0 {
-		for k, _ := range m.Labels {
-			dAtA[i] = 0xa
-			i++
+		for k := range m.Labels {
 			v := m.Labels[k]
-			mapSize := 1 + len(k) + sovLogproto(uint64(len(k))) + 1 + len(v) + sovLogproto(uint64(len(v)))
-			i = encodeVarintLogproto(dAtA, i, uint64(mapSize))
-			dAtA[i] = 0xa
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(len(k)))
-			i += copy(dAtA[i:], k)
-			dAtA[i] = 0x12
-			i++
+			baseI := i
+			i -= len(v)
+			copy(dAtA[i:], v)
 			i = encodeVarintLogproto(dAtA, i, uint64(len(v)))
-			i += copy(dAtA[i:], v)
+			i--
+			dAtA[i] = 0x12
+			i -= len(k)
+			copy(dAtA[i:], k)
+			i = encodeVarintLogproto(dAtA, i, uint64(len(k)))
+			i--
+			dAtA[i] = 0xa
+			i = encodeVarintLogproto(dAtA, i, uint64(baseI-i))
+			i--
+			dAtA[i] = 0xa
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *DroppedStream) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3789,39 +3920,45 @@ func (m *DroppedStream) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *DroppedStream) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *DroppedStream) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.From)))
-	n12, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.From, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n12
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.To)))
-	n13, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n13
 	if len(m.Labels) > 0 {
-		dAtA[i] = 0x1a
-		i++
+		i -= len(m.Labels)
+		copy(dAtA[i:], m.Labels)
 		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
-		i += copy(dAtA[i:], m.Labels)
+		i--
+		dAtA[i] = 0x1a
+	}
+	n12, err12 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.To, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.To):])
+	if err12 != nil {
+		return 0, err12
+	}
+	i -= n12
+	i = encodeVarintLogproto(dAtA, i, uint64(n12))
+	i--
+	dAtA[i] = 0x12
+	n13, err13 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.From, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.From):])
+	if err13 != nil {
+		return 0, err13
 	}
-	return i, nil
+	i -= n13
+	i = encodeVarintLogproto(dAtA, i, uint64(n13))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *TimeSeriesChunk) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3829,53 +3966,64 @@ func (m *TimeSeriesChunk) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TimeSeriesChunk) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TimeSeriesChunk) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.FromIngesterId) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.FromIngesterId)))
-		i += copy(dAtA[i:], m.FromIngesterId)
-	}
-	if len(m.UserId) > 0 {
-		dAtA[i] = 0x12
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.UserId)))
-		i += copy(dAtA[i:], m.UserId)
-	}
-	if len(m.Labels) > 0 {
-		for _, msg := range m.Labels {
-			dAtA[i] = 0x1a
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	if len(m.Chunks) > 0 {
+		for iNdEx := len(m.Chunks) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Chunks[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x22
 		}
 	}
-	if len(m.Chunks) > 0 {
-		for _, msg := range m.Chunks {
-			dAtA[i] = 0x22
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	if len(m.Labels) > 0 {
+		for iNdEx := len(m.Labels) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Labels[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x1a
 		}
 	}
-	return i, nil
+	if len(m.UserId) > 0 {
+		i -= len(m.UserId)
+		copy(dAtA[i:], m.UserId)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.UserId)))
+		i--
+		dAtA[i] = 0x12
+	}
+	if len(m.FromIngesterId) > 0 {
+		i -= len(m.FromIngesterId)
+		copy(dAtA[i:], m.FromIngesterId)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.FromIngesterId)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *LabelPair) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3883,29 +4031,36 @@ func (m *LabelPair) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LabelPair) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LabelPair) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Name) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Name)))
-		i += copy(dAtA[i:], m.Name)
-	}
 	if len(m.Value) > 0 {
-		dAtA[i] = 0x12
-		i++
+		i -= len(m.Value)
+		copy(dAtA[i:], m.Value)
 		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Value)))
-		i += copy(dAtA[i:], m.Value)
+		i--
+		dAtA[i] = 0x12
 	}
-	return i, nil
+	if len(m.Name) > 0 {
+		i -= len(m.Name)
+		copy(dAtA[i:], m.Name)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Name)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *Chunk) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3913,23 +4068,29 @@ func (m *Chunk) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Chunk) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Chunk) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.Data) > 0 {
-		dAtA[i] = 0xa
-		i++
+		i -= len(m.Data)
+		copy(dAtA[i:], m.Data)
 		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Data)))
-		i += copy(dAtA[i:], m.Data)
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *TransferChunksResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3937,17 +4098,22 @@ func (m *TransferChunksResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TransferChunksResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TransferChunksResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *TailersCountRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3955,17 +4121,22 @@ func (m *TailersCountRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TailersCountRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TailersCountRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *TailersCountResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3973,22 +4144,27 @@ func (m *TailersCountResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *TailersCountResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *TailersCountResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if m.Count != 0 {
-		dAtA[i] = 0x8
-		i++
 		i = encodeVarintLogproto(dAtA, i, uint64(m.Count))
+		i--
+		dAtA[i] = 0x8
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *GetChunkIDsRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -3996,39 +4172,45 @@ func (m *GetChunkIDsRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *GetChunkIDsRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *GetChunkIDsRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Matchers) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Matchers)))
-		i += copy(dAtA[i:], m.Matchers)
+	n14, err14 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.End):])
+	if err14 != nil {
+		return 0, err14
 	}
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.Start)))
-	n14, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n14
+	i -= n14
+	i = encodeVarintLogproto(dAtA, i, uint64(n14))
+	i--
 	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.End)))
-	n15, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.End, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n15, err15 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.Start, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.Start):])
+	if err15 != nil {
+		return 0, err15
+	}
+	i -= n15
+	i = encodeVarintLogproto(dAtA, i, uint64(n15))
+	i--
+	dAtA[i] = 0x12
+	if len(m.Matchers) > 0 {
+		i -= len(m.Matchers)
+		copy(dAtA[i:], m.Matchers)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Matchers)))
+		i--
+		dAtA[i] = 0xa
 	}
-	i += n15
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *GetChunkIDsResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -4036,36 +4218,37 @@ func (m *GetChunkIDsResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *GetChunkIDsResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *GetChunkIDsResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
 	if len(m.ChunkIDs) > 0 {
-		for _, s := range m.ChunkIDs {
+		for iNdEx := len(m.ChunkIDs) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.ChunkIDs[iNdEx])
+			copy(dAtA[i:], m.ChunkIDs[iNdEx])
+			i = encodeVarintLogproto(dAtA, i, uint64(len(m.ChunkIDs[iNdEx])))
+			i--
 			dAtA[i] = 0xa
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
 		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func encodeVarintLogproto(dAtA []byte, offset int, v uint64) int {
+	offset -= sovLogproto(v)
+	base := offset
 	for v >= 1<<7 {
 		dAtA[offset] = uint8(v&0x7f | 0x80)
 		v >>= 7
 		offset++
 	}
 	dAtA[offset] = uint8(v)
-	return offset + 1
+	return base
 }
 func (m *PushRequest) Size() (n int) {
 	if m == nil {
@@ -4513,14 +4696,7 @@ func (m *GetChunkIDsResponse) Size() (n int) {
 }
 
 func sovLogproto(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
+	return (math_bits.Len64(x|1) + 6) / 7
 }
 func sozLogproto(x uint64) (n int) {
 	return sovLogproto(uint64((x << 1) ^ uint64((int64(x) >> 63))))
@@ -4551,8 +4727,8 @@ func (this *QueryRequest) String() string {
 	s := strings.Join([]string{`&QueryRequest{`,
 		`Selector:` + fmt.Sprintf("%v", this.Selector) + `,`,
 		`Limit:` + fmt.Sprintf("%v", this.Limit) + `,`,
-		`Start:` + strings.Replace(strings.Replace(this.Start.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`End:` + strings.Replace(strings.Replace(this.End.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Start:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Start), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`End:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.End), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Direction:` + fmt.Sprintf("%v", this.Direction) + `,`,
 		`Shards:` + fmt.Sprintf("%v", this.Shards) + `,`,
 		`}`,
@@ -4565,8 +4741,8 @@ func (this *SampleQueryRequest) String() string {
 	}
 	s := strings.Join([]string{`&SampleQueryRequest{`,
 		`Selector:` + fmt.Sprintf("%v", this.Selector) + `,`,
-		`Start:` + strings.Replace(strings.Replace(this.Start.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`End:` + strings.Replace(strings.Replace(this.End.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Start:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Start), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`End:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.End), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Shards:` + fmt.Sprintf("%v", this.Shards) + `,`,
 		`}`,
 	}, "")
@@ -4619,9 +4795,14 @@ func (this *StreamAdapter) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForEntries := "[]EntryAdapter{"
+	for _, f := range this.Entries {
+		repeatedStringForEntries += strings.Replace(strings.Replace(f.String(), "EntryAdapter", "EntryAdapter", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForEntries += "}"
 	s := strings.Join([]string{`&StreamAdapter{`,
 		`Labels:` + fmt.Sprintf("%v", this.Labels) + `,`,
-		`Entries:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Entries), "EntryAdapter", "EntryAdapter", 1), `&`, ``, 1) + `,`,
+		`Entries:` + repeatedStringForEntries + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4631,7 +4812,7 @@ func (this *EntryAdapter) String() string {
 		return "nil"
 	}
 	s := strings.Join([]string{`&EntryAdapter{`,
-		`Timestamp:` + strings.Replace(strings.Replace(this.Timestamp.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Timestamp:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Timestamp), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Line:` + fmt.Sprintf("%v", this.Line) + `,`,
 		`}`,
 	}, "")
@@ -4653,9 +4834,14 @@ func (this *Series) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForSamples := "[]Sample{"
+	for _, f := range this.Samples {
+		repeatedStringForSamples += strings.Replace(strings.Replace(f.String(), "Sample", "Sample", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForSamples += "}"
 	s := strings.Join([]string{`&Series{`,
 		`Labels:` + fmt.Sprintf("%v", this.Labels) + `,`,
-		`Samples:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Samples), "Sample", "Sample", 1), `&`, ``, 1) + `,`,
+		`Samples:` + repeatedStringForSamples + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4668,7 +4854,7 @@ func (this *TailRequest) String() string {
 		`Query:` + fmt.Sprintf("%v", this.Query) + `,`,
 		`DelayFor:` + fmt.Sprintf("%v", this.DelayFor) + `,`,
 		`Limit:` + fmt.Sprintf("%v", this.Limit) + `,`,
-		`Start:` + strings.Replace(strings.Replace(this.Start.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Start:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Start), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4677,9 +4863,14 @@ func (this *TailResponse) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForDroppedStreams := "[]*DroppedStream{"
+	for _, f := range this.DroppedStreams {
+		repeatedStringForDroppedStreams += strings.Replace(f.String(), "DroppedStream", "DroppedStream", 1) + ","
+	}
+	repeatedStringForDroppedStreams += "}"
 	s := strings.Join([]string{`&TailResponse{`,
 		`Stream:` + fmt.Sprintf("%v", this.Stream) + `,`,
-		`DroppedStreams:` + strings.Replace(fmt.Sprintf("%v", this.DroppedStreams), "DroppedStream", "DroppedStream", 1) + `,`,
+		`DroppedStreams:` + repeatedStringForDroppedStreams + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4689,8 +4880,8 @@ func (this *SeriesRequest) String() string {
 		return "nil"
 	}
 	s := strings.Join([]string{`&SeriesRequest{`,
-		`Start:` + strings.Replace(strings.Replace(this.Start.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`End:` + strings.Replace(strings.Replace(this.End.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Start:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Start), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`End:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.End), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Groups:` + fmt.Sprintf("%v", this.Groups) + `,`,
 		`}`,
 	}, "")
@@ -4700,8 +4891,13 @@ func (this *SeriesResponse) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForSeries := "[]SeriesIdentifier{"
+	for _, f := range this.Series {
+		repeatedStringForSeries += strings.Replace(strings.Replace(f.String(), "SeriesIdentifier", "SeriesIdentifier", 1), `&`, ``, 1) + ","
+	}
+	repeatedStringForSeries += "}"
 	s := strings.Join([]string{`&SeriesResponse{`,
-		`Series:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Series), "SeriesIdentifier", "SeriesIdentifier", 1), `&`, ``, 1) + `,`,
+		`Series:` + repeatedStringForSeries + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4731,8 +4927,8 @@ func (this *DroppedStream) String() string {
 		return "nil"
 	}
 	s := strings.Join([]string{`&DroppedStream{`,
-		`From:` + strings.Replace(strings.Replace(this.From.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`To:` + strings.Replace(strings.Replace(this.To.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`From:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.From), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`To:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.To), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Labels:` + fmt.Sprintf("%v", this.Labels) + `,`,
 		`}`,
 	}, "")
@@ -4742,11 +4938,21 @@ func (this *TimeSeriesChunk) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForLabels := "[]*LabelPair{"
+	for _, f := range this.Labels {
+		repeatedStringForLabels += strings.Replace(f.String(), "LabelPair", "LabelPair", 1) + ","
+	}
+	repeatedStringForLabels += "}"
+	repeatedStringForChunks := "[]*Chunk{"
+	for _, f := range this.Chunks {
+		repeatedStringForChunks += strings.Replace(f.String(), "Chunk", "Chunk", 1) + ","
+	}
+	repeatedStringForChunks += "}"
 	s := strings.Join([]string{`&TimeSeriesChunk{`,
 		`FromIngesterId:` + fmt.Sprintf("%v", this.FromIngesterId) + `,`,
 		`UserId:` + fmt.Sprintf("%v", this.UserId) + `,`,
-		`Labels:` + strings.Replace(fmt.Sprintf("%v", this.Labels), "LabelPair", "LabelPair", 1) + `,`,
-		`Chunks:` + strings.Replace(fmt.Sprintf("%v", this.Chunks), "Chunk", "Chunk", 1) + `,`,
+		`Labels:` + repeatedStringForLabels + `,`,
+		`Chunks:` + repeatedStringForChunks + `,`,
 		`}`,
 	}, "")
 	return s
@@ -4806,8 +5012,8 @@ func (this *GetChunkIDsRequest) String() string {
 	}
 	s := strings.Join([]string{`&GetChunkIDsRequest{`,
 		`Matchers:` + fmt.Sprintf("%v", this.Matchers) + `,`,
-		`Start:` + strings.Replace(strings.Replace(this.Start.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`End:` + strings.Replace(strings.Replace(this.End.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`Start:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Start), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`End:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.End), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`}`,
 	}, "")
 	return s
diff --git a/pkg/logproto/types.go b/pkg/logproto/types.go
index fbca7e70a94c6..655fc0b83ccb3 100644
--- a/pkg/logproto/types.go
+++ b/pkg/logproto/types.go
@@ -1,8 +1,8 @@
 package logproto
 
 import (
-	"fmt"
-	"io"
+	fmt "fmt"
+	io "io"
 	"time"
 )
 
@@ -23,7 +23,7 @@ type Entry struct {
 func (m *Stream) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -31,35 +31,43 @@ func (m *Stream) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Stream) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Stream) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Labels) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
-		i += copy(dAtA[i:], m.Labels)
-	}
 	if len(m.Entries) > 0 {
-		for _, msg := range m.Entries {
-			dAtA[i] = 0x12
-			i++
-			i = encodeVarintLogproto(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Entries) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Entries[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintLogproto(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.Labels) > 0 {
+		i -= len(m.Labels)
+		copy(dAtA[i:], m.Labels)
+		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Labels)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *Entry) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -67,25 +75,31 @@ func (m *Entry) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Entry) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Entry) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintLogproto(dAtA, i, uint64(SizeOfStdTime(m.Timestamp)))
-	n5, err := StdTimeMarshalTo(m.Timestamp, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n5
 	if len(m.Line) > 0 {
-		dAtA[i] = 0x12
-		i++
+		i -= len(m.Line)
+		copy(dAtA[i:], m.Line)
 		i = encodeVarintLogproto(dAtA, i, uint64(len(m.Line)))
-		i += copy(dAtA[i:], m.Line)
+		i--
+		dAtA[i] = 0x12
 	}
-	return i, nil
+	n7, err7 := StdTimeMarshalTo(m.Timestamp, dAtA[i-SizeOfStdTime(m.Timestamp):])
+	if err7 != nil {
+		return 0, err7
+	}
+	i -= n7
+	i = encodeVarintLogproto(dAtA, i, uint64(n7))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *Stream) Unmarshal(dAtA []byte) error {
@@ -111,10 +125,10 @@ func (m *Stream) Unmarshal(dAtA []byte) error {
 		fieldNum := int32(wire >> 3)
 		wireType := int(wire & 0x7)
 		if wireType == 4 {
-			return fmt.Errorf("proto: Stream: wiretype end group for non-group")
+			return fmt.Errorf("proto: StreamAdapter: wiretype end group for non-group")
 		}
 		if fieldNum <= 0 {
-			return fmt.Errorf("proto: Stream: illegal tag %d (wire type %d)", fieldNum, wire)
+			return fmt.Errorf("proto: StreamAdapter: illegal tag %d (wire type %d)", fieldNum, wire)
 		}
 		switch fieldNum {
 		case 1:
@@ -231,10 +245,10 @@ func (m *Entry) Unmarshal(dAtA []byte) error {
 		fieldNum := int32(wire >> 3)
 		wireType := int(wire & 0x7)
 		if wireType == 4 {
-			return fmt.Errorf("proto: Entry: wiretype end group for non-group")
+			return fmt.Errorf("proto: EntryAdapter: wiretype end group for non-group")
 		}
 		if fieldNum <= 0 {
-			return fmt.Errorf("proto: Entry: illegal tag %d (wire type %d)", fieldNum, wire)
+			return fmt.Errorf("proto: EntryAdapter: illegal tag %d (wire type %d)", fieldNum, wire)
 		}
 		switch fieldNum {
 		case 1:
@@ -393,6 +407,7 @@ func (m *Stream) Equal(that interface{}) bool {
 	}
 	return true
 }
+
 func (m *Entry) Equal(that interface{}) bool {
 	if that == nil {
 		return m == nil
diff --git a/pkg/logqlmodel/stats/stats.pb.go b/pkg/logqlmodel/stats/stats.pb.go
index 1d62798631224..ad900f0cb9720 100644
--- a/pkg/logqlmodel/stats/stats.pb.go
+++ b/pkg/logqlmodel/stats/stats.pb.go
@@ -10,6 +10,7 @@ import (
 	proto "github.com/gogo/protobuf/proto"
 	io "io"
 	math "math"
+	math_bits "math/bits"
 	reflect "reflect"
 	strings "strings"
 )
@@ -23,7 +24,7 @@ var _ = math.Inf
 // is compatible with the proto package it is being compiled against.
 // A compilation error at this line likely means your copy of the
 // proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 // Result contains LogQL query statistics.
 type Result struct {
@@ -45,7 +46,7 @@ func (m *Result) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Result.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -112,7 +113,7 @@ func (m *Summary) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Summary.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -200,7 +201,7 @@ func (m *Store) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Store.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -318,7 +319,7 @@ func (m *Ingester) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_Ingester.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -702,7 +703,7 @@ func valueToGoStringStats(v interface{}, typ string) string {
 func (m *Result) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -710,41 +711,52 @@ func (m *Result) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Result) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Result) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintStats(dAtA, i, uint64(m.Summary.Size()))
-	n1, err := m.Summary.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n1
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintStats(dAtA, i, uint64(m.Store.Size()))
-	n2, err := m.Store.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	{
+		size, err := m.Ingester.MarshalToSizedBuffer(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = encodeVarintStats(dAtA, i, uint64(size))
 	}
-	i += n2
+	i--
 	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintStats(dAtA, i, uint64(m.Ingester.Size()))
-	n3, err := m.Ingester.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	{
+		size, err := m.Store.MarshalToSizedBuffer(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = encodeVarintStats(dAtA, i, uint64(size))
+	}
+	i--
+	dAtA[i] = 0x12
+	{
+		size, err := m.Summary.MarshalToSizedBuffer(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = encodeVarintStats(dAtA, i, uint64(size))
 	}
-	i += n3
-	return i, nil
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *Summary) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -752,43 +764,48 @@ func (m *Summary) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Summary) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Summary) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.BytesProcessedPerSecond != 0 {
-		dAtA[i] = 0x8
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.BytesProcessedPerSecond))
+	if m.ExecTime != 0 {
+		i -= 8
+		encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.ExecTime))))
+		i--
+		dAtA[i] = 0x29
 	}
-	if m.LinesProcessedPerSecond != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.LinesProcessedPerSecond))
+	if m.TotalLinesProcessed != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalLinesProcessed))
+		i--
+		dAtA[i] = 0x20
 	}
 	if m.TotalBytesProcessed != 0 {
-		dAtA[i] = 0x18
-		i++
 		i = encodeVarintStats(dAtA, i, uint64(m.TotalBytesProcessed))
+		i--
+		dAtA[i] = 0x18
 	}
-	if m.TotalLinesProcessed != 0 {
-		dAtA[i] = 0x20
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalLinesProcessed))
+	if m.LinesProcessedPerSecond != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.LinesProcessedPerSecond))
+		i--
+		dAtA[i] = 0x10
 	}
-	if m.ExecTime != 0 {
-		dAtA[i] = 0x29
-		i++
-		encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.ExecTime))))
-		i += 8
+	if m.BytesProcessedPerSecond != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.BytesProcessedPerSecond))
+		i--
+		dAtA[i] = 0x8
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *Store) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -796,63 +813,68 @@ func (m *Store) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Store) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Store) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.TotalChunksRef != 0 {
-		dAtA[i] = 0x8
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksRef))
+	if m.TotalDuplicates != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalDuplicates))
+		i--
+		dAtA[i] = 0x48
 	}
-	if m.TotalChunksDownloaded != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksDownloaded))
+	if m.CompressedBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.CompressedBytes))
+		i--
+		dAtA[i] = 0x40
 	}
-	if m.ChunksDownloadTime != 0 {
-		dAtA[i] = 0x19
-		i++
-		encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.ChunksDownloadTime))))
-		i += 8
+	if m.DecompressedLines != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedLines))
+		i--
+		dAtA[i] = 0x38
 	}
-	if m.HeadChunkBytes != 0 {
-		dAtA[i] = 0x20
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkBytes))
+	if m.DecompressedBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedBytes))
+		i--
+		dAtA[i] = 0x30
 	}
 	if m.HeadChunkLines != 0 {
-		dAtA[i] = 0x28
-		i++
 		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkLines))
+		i--
+		dAtA[i] = 0x28
 	}
-	if m.DecompressedBytes != 0 {
-		dAtA[i] = 0x30
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedBytes))
+	if m.HeadChunkBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkBytes))
+		i--
+		dAtA[i] = 0x20
 	}
-	if m.DecompressedLines != 0 {
-		dAtA[i] = 0x38
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedLines))
+	if m.ChunksDownloadTime != 0 {
+		i -= 8
+		encoding_binary.LittleEndian.PutUint64(dAtA[i:], uint64(math.Float64bits(float64(m.ChunksDownloadTime))))
+		i--
+		dAtA[i] = 0x19
 	}
-	if m.CompressedBytes != 0 {
-		dAtA[i] = 0x40
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.CompressedBytes))
+	if m.TotalChunksDownloaded != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksDownloaded))
+		i--
+		dAtA[i] = 0x10
 	}
-	if m.TotalDuplicates != 0 {
-		dAtA[i] = 0x48
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalDuplicates))
+	if m.TotalChunksRef != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksRef))
+		i--
+		dAtA[i] = 0x8
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *Ingester) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -860,71 +882,78 @@ func (m *Ingester) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *Ingester) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *Ingester) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.TotalReached != 0 {
-		dAtA[i] = 0x8
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalReached))
-	}
-	if m.TotalChunksMatched != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksMatched))
+	if m.TotalDuplicates != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalDuplicates))
+		i--
+		dAtA[i] = 0x50
 	}
-	if m.TotalBatches != 0 {
-		dAtA[i] = 0x18
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalBatches))
+	if m.CompressedBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.CompressedBytes))
+		i--
+		dAtA[i] = 0x48
 	}
-	if m.TotalLinesSent != 0 {
-		dAtA[i] = 0x20
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalLinesSent))
+	if m.DecompressedLines != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedLines))
+		i--
+		dAtA[i] = 0x40
 	}
-	if m.HeadChunkBytes != 0 {
-		dAtA[i] = 0x28
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkBytes))
+	if m.DecompressedBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedBytes))
+		i--
+		dAtA[i] = 0x38
 	}
 	if m.HeadChunkLines != 0 {
-		dAtA[i] = 0x30
-		i++
 		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkLines))
+		i--
+		dAtA[i] = 0x30
 	}
-	if m.DecompressedBytes != 0 {
-		dAtA[i] = 0x38
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedBytes))
+	if m.HeadChunkBytes != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.HeadChunkBytes))
+		i--
+		dAtA[i] = 0x28
 	}
-	if m.DecompressedLines != 0 {
-		dAtA[i] = 0x40
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.DecompressedLines))
+	if m.TotalLinesSent != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalLinesSent))
+		i--
+		dAtA[i] = 0x20
 	}
-	if m.CompressedBytes != 0 {
-		dAtA[i] = 0x48
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.CompressedBytes))
+	if m.TotalBatches != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalBatches))
+		i--
+		dAtA[i] = 0x18
 	}
-	if m.TotalDuplicates != 0 {
-		dAtA[i] = 0x50
-		i++
-		i = encodeVarintStats(dAtA, i, uint64(m.TotalDuplicates))
+	if m.TotalChunksMatched != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalChunksMatched))
+		i--
+		dAtA[i] = 0x10
+	}
+	if m.TotalReached != 0 {
+		i = encodeVarintStats(dAtA, i, uint64(m.TotalReached))
+		i--
+		dAtA[i] = 0x8
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func encodeVarintStats(dAtA []byte, offset int, v uint64) int {
+	offset -= sovStats(v)
+	base := offset
 	for v >= 1<<7 {
 		dAtA[offset] = uint8(v&0x7f | 0x80)
 		v >>= 7
 		offset++
 	}
 	dAtA[offset] = uint8(v)
-	return offset + 1
+	return base
 }
 func (m *Result) Size() (n int) {
 	if m == nil {
@@ -1041,14 +1070,7 @@ func (m *Ingester) Size() (n int) {
 }
 
 func sovStats(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
+	return (math_bits.Len64(x|1) + 6) / 7
 }
 func sozStats(x uint64) (n int) {
 	return sovStats(uint64((x << 1) ^ uint64((int64(x) >> 63))))
diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go
index 656310d9edf37..fd8a77870a580 100644
--- a/pkg/loki/loki.go
+++ b/pkg/loki/loki.go
@@ -21,7 +21,6 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/weaveworks/common/signals"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	cortex_tripper "github.com/cortexproject/cortex/pkg/querier/queryrange"
 	"github.com/cortexproject/cortex/pkg/ring"
 	"github.com/cortexproject/cortex/pkg/ring/kv/memberlist"
@@ -45,6 +44,7 @@ import (
 	"github.com/grafana/loki/pkg/querier/queryrange"
 	"github.com/grafana/loki/pkg/ruler"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/tracing"
 	serverutil "github.com/grafana/loki/pkg/util/server"
 )
diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go
index d112f4641c596..3e2b37adcc0c7 100644
--- a/pkg/loki/modules.go
+++ b/pkg/loki/modules.go
@@ -19,11 +19,6 @@ import (
 	"github.com/grafana/loki/pkg/storage/stores/shipper/compactor"
 	"github.com/grafana/loki/pkg/validation"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
-	cortex_storage "github.com/cortexproject/cortex/pkg/chunk/storage"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/cortex"
 	cortex_querier_worker "github.com/cortexproject/cortex/pkg/querier/worker"
 	"github.com/cortexproject/cortex/pkg/ring"
@@ -49,6 +44,11 @@ import (
 	"github.com/grafana/loki/pkg/querier/queryrange"
 	"github.com/grafana/loki/pkg/ruler"
 	loki_storage "github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
+	chunk_storage "github.com/grafana/loki/pkg/storage/chunk/storage"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/uploads"
 	serverutil "github.com/grafana/loki/pkg/util/server"
@@ -317,7 +317,7 @@ func (t *Loki) initStore() (_ services.Service, err error) {
 		}
 	}
 
-	chunkStore, err := cortex_storage.NewStore(t.Cfg.StorageConfig.Config, t.Cfg.ChunkStoreConfig.StoreConfig, t.Cfg.SchemaConfig.SchemaConfig, t.overrides, prometheus.DefaultRegisterer, nil, util_log.Logger)
+	chunkStore, err := chunk_storage.NewStore(t.Cfg.StorageConfig.Config, t.Cfg.ChunkStoreConfig.StoreConfig, t.Cfg.SchemaConfig.SchemaConfig, t.overrides, prometheus.DefaultRegisterer, nil, util_log.Logger)
 	if err != nil {
 		return
 	}
diff --git a/pkg/loki/modules_test.go b/pkg/loki/modules_test.go
index 3cec7814409cb..a3fecc36f3705 100644
--- a/pkg/loki/modules_test.go
+++ b/pkg/loki/modules_test.go
@@ -4,7 +4,7 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 func Test_calculateMaxLookBack(t *testing.T) {
diff --git a/pkg/querier/querier_mock_test.go b/pkg/querier/querier_mock_test.go
index e13bda18a8c78..cd73d8e96c72a 100644
--- a/pkg/querier/querier_mock_test.go
+++ b/pkg/querier/querier_mock_test.go
@@ -6,7 +6,6 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/distributor"
 	"github.com/cortexproject/cortex/pkg/ring"
 	ring_client "github.com/cortexproject/cortex/pkg/ring/client"
@@ -24,6 +23,7 @@ import (
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util"
 )
 
diff --git a/pkg/querier/queryrange/limits_test.go b/pkg/querier/queryrange/limits_test.go
index 8c612ce8d7250..37117c096c32c 100644
--- a/pkg/querier/queryrange/limits_test.go
+++ b/pkg/querier/queryrange/limits_test.go
@@ -8,7 +8,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -19,6 +18,7 @@ import (
 
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logqlmodel"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util/marshal"
 )
 
diff --git a/pkg/querier/queryrange/queryrange.pb.go b/pkg/querier/queryrange/queryrange.pb.go
index 94224f22225ad..74f705ce18fd1 100644
--- a/pkg/querier/queryrange/queryrange.pb.go
+++ b/pkg/querier/queryrange/queryrange.pb.go
@@ -16,6 +16,7 @@ import (
 	stats "github.com/grafana/loki/pkg/logqlmodel/stats"
 	io "io"
 	math "math"
+	math_bits "math/bits"
 	reflect "reflect"
 	strings "strings"
 	time "time"
@@ -31,7 +32,7 @@ var _ = time.Kitchen
 // is compatible with the proto package it is being compiled against.
 // A compilation error at this line likely means your copy of the
 // proto package needs to be updated.
-const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+const _ = proto.GoGoProtoPackageIsVersion3 // please upgrade the proto package
 
 type LokiRequest struct {
 	Query     string             `protobuf:"bytes,1,opt,name=query,proto3" json:"query,omitempty"`
@@ -57,7 +58,7 @@ func (m *LokiRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_LokiRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -157,7 +158,7 @@ func (m *LokiResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, error)
 		return xxx_messageInfo_LokiResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -252,7 +253,7 @@ func (m *LokiSeriesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byte, e
 		return xxx_messageInfo_LokiSeriesRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -319,7 +320,7 @@ func (m *LokiSeriesResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte,
 		return xxx_messageInfo_LokiSeriesResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -378,7 +379,7 @@ func (m *LokiLabelNamesRequest) XXX_Marshal(b []byte, deterministic bool) ([]byt
 		return xxx_messageInfo_LokiLabelNamesRequest.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -438,7 +439,7 @@ func (m *LokiLabelNamesResponse) XXX_Marshal(b []byte, deterministic bool) ([]by
 		return xxx_messageInfo_LokiLabelNamesResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -496,7 +497,7 @@ func (m *LokiData) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) {
 		return xxx_messageInfo_LokiData.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -541,7 +542,7 @@ func (m *LokiPromResponse) XXX_Marshal(b []byte, deterministic bool) ([]byte, er
 		return xxx_messageInfo_LokiPromResponse.Marshal(b, m, deterministic)
 	} else {
 		b = b[:cap(b)]
-		n, err := m.MarshalTo(b)
+		n, err := m.MarshalToSizedBuffer(b)
 		if err != nil {
 			return nil, err
 		}
@@ -1091,7 +1092,7 @@ func valueToGoStringQueryrange(v interface{}, typ string) string {
 func (m *LokiRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1099,75 +1100,76 @@ func (m *LokiRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Query) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Query)))
-		i += copy(dAtA[i:], m.Query)
+	if len(m.Shards) > 0 {
+		for iNdEx := len(m.Shards) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Shards[iNdEx])
+			copy(dAtA[i:], m.Shards[iNdEx])
+			i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Shards[iNdEx])))
+			i--
+			dAtA[i] = 0x42
+		}
 	}
-	if m.Limit != 0 {
-		dAtA[i] = 0x10
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Limit))
+	if len(m.Path) > 0 {
+		i -= len(m.Path)
+		copy(dAtA[i:], m.Path)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Path)))
+		i--
+		dAtA[i] = 0x3a
 	}
-	if m.Step != 0 {
-		dAtA[i] = 0x18
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Step))
+	if m.Direction != 0 {
+		i = encodeVarintQueryrange(dAtA, i, uint64(m.Direction))
+		i--
+		dAtA[i] = 0x30
 	}
-	dAtA[i] = 0x22
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs)))
-	n1, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n1, err1 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs):])
+	if err1 != nil {
+		return 0, err1
 	}
-	i += n1
+	i -= n1
+	i = encodeVarintQueryrange(dAtA, i, uint64(n1))
+	i--
 	dAtA[i] = 0x2a
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs)))
-	n2, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n2, err2 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs):])
+	if err2 != nil {
+		return 0, err2
 	}
-	i += n2
-	if m.Direction != 0 {
-		dAtA[i] = 0x30
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Direction))
+	i -= n2
+	i = encodeVarintQueryrange(dAtA, i, uint64(n2))
+	i--
+	dAtA[i] = 0x22
+	if m.Step != 0 {
+		i = encodeVarintQueryrange(dAtA, i, uint64(m.Step))
+		i--
+		dAtA[i] = 0x18
 	}
-	if len(m.Path) > 0 {
-		dAtA[i] = 0x3a
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Path)))
-		i += copy(dAtA[i:], m.Path)
+	if m.Limit != 0 {
+		i = encodeVarintQueryrange(dAtA, i, uint64(m.Limit))
+		i--
+		dAtA[i] = 0x10
 	}
-	if len(m.Shards) > 0 {
-		for _, s := range m.Shards {
-			dAtA[i] = 0x42
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
-		}
+	if len(m.Query) > 0 {
+		i -= len(m.Query)
+		copy(dAtA[i:], m.Query)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Query)))
+		i--
+		dAtA[i] = 0xa
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1175,78 +1177,92 @@ func (m *LokiResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Status) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
-		i += copy(dAtA[i:], m.Status)
+	if len(m.Headers) > 0 {
+		for iNdEx := len(m.Headers) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Headers[iNdEx].Size()
+				i -= size
+				if _, err := m.Headers[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintQueryrange(dAtA, i, uint64(size))
+			}
+			i--
+			dAtA[i] = 0x4a
+		}
 	}
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(m.Data.Size()))
-	n3, err := m.Data.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	{
+		size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i])
+		if err != nil {
+			return 0, err
+		}
+		i -= size
+		i = encodeVarintQueryrange(dAtA, i, uint64(size))
 	}
-	i += n3
-	if len(m.ErrorType) > 0 {
-		dAtA[i] = 0x1a
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.ErrorType)))
-		i += copy(dAtA[i:], m.ErrorType)
+	i--
+	dAtA[i] = 0x42
+	if m.Version != 0 {
+		i = encodeVarintQueryrange(dAtA, i, uint64(m.Version))
+		i--
+		dAtA[i] = 0x38
 	}
-	if len(m.Error) > 0 {
-		dAtA[i] = 0x22
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Error)))
-		i += copy(dAtA[i:], m.Error)
+	if m.Limit != 0 {
+		i = encodeVarintQueryrange(dAtA, i, uint64(m.Limit))
+		i--
+		dAtA[i] = 0x30
 	}
 	if m.Direction != 0 {
-		dAtA[i] = 0x28
-		i++
 		i = encodeVarintQueryrange(dAtA, i, uint64(m.Direction))
+		i--
+		dAtA[i] = 0x28
 	}
-	if m.Limit != 0 {
-		dAtA[i] = 0x30
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Limit))
-	}
-	if m.Version != 0 {
-		dAtA[i] = 0x38
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Version))
+	if len(m.Error) > 0 {
+		i -= len(m.Error)
+		copy(dAtA[i:], m.Error)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Error)))
+		i--
+		dAtA[i] = 0x22
 	}
-	dAtA[i] = 0x42
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(m.Statistics.Size()))
-	n4, err := m.Statistics.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	if len(m.ErrorType) > 0 {
+		i -= len(m.ErrorType)
+		copy(dAtA[i:], m.ErrorType)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.ErrorType)))
+		i--
+		dAtA[i] = 0x1a
 	}
-	i += n4
-	if len(m.Headers) > 0 {
-		for _, msg := range m.Headers {
-			dAtA[i] = 0x4a
-			i++
-			i = encodeVarintQueryrange(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
-			}
-			i += n
+	{
+		size, err := m.Data.MarshalToSizedBuffer(dAtA[:i])
+		if err != nil {
+			return 0, err
 		}
+		i -= size
+		i = encodeVarintQueryrange(dAtA, i, uint64(size))
 	}
-	return i, nil
+	i--
+	dAtA[i] = 0x12
+	if len(m.Status) > 0 {
+		i -= len(m.Status)
+		copy(dAtA[i:], m.Status)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiSeriesRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1254,54 +1270,54 @@ func (m *LokiSeriesRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiSeriesRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiSeriesRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Match) > 0 {
-		for _, s := range m.Match {
-			dAtA[i] = 0xa
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
-			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
-		}
+	if len(m.Path) > 0 {
+		i -= len(m.Path)
+		copy(dAtA[i:], m.Path)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Path)))
+		i--
+		dAtA[i] = 0x22
 	}
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs)))
-	n5, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n5, err5 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs):])
+	if err5 != nil {
+		return 0, err5
 	}
-	i += n5
+	i -= n5
+	i = encodeVarintQueryrange(dAtA, i, uint64(n5))
+	i--
 	dAtA[i] = 0x1a
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs)))
-	n6, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i:])
-	if err != nil {
-		return 0, err
+	n6, err6 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs):])
+	if err6 != nil {
+		return 0, err6
 	}
-	i += n6
-	if len(m.Path) > 0 {
-		dAtA[i] = 0x22
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Path)))
-		i += copy(dAtA[i:], m.Path)
+	i -= n6
+	i = encodeVarintQueryrange(dAtA, i, uint64(n6))
+	i--
+	dAtA[i] = 0x12
+	if len(m.Match) > 0 {
+		for iNdEx := len(m.Match) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Match[iNdEx])
+			copy(dAtA[i:], m.Match[iNdEx])
+			i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Match[iNdEx])))
+			i--
+			dAtA[i] = 0xa
+		}
 	}
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiSeriesResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1309,52 +1325,62 @@ func (m *LokiSeriesResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiSeriesResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiSeriesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Status) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
-		i += copy(dAtA[i:], m.Status)
-	}
-	if len(m.Data) > 0 {
-		for _, msg := range m.Data {
-			dAtA[i] = 0x12
-			i++
-			i = encodeVarintQueryrange(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	if len(m.Headers) > 0 {
+		for iNdEx := len(m.Headers) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Headers[iNdEx].Size()
+				i -= size
+				if _, err := m.Headers[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintQueryrange(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x22
 		}
 	}
 	if m.Version != 0 {
-		dAtA[i] = 0x18
-		i++
 		i = encodeVarintQueryrange(dAtA, i, uint64(m.Version))
+		i--
+		dAtA[i] = 0x18
 	}
-	if len(m.Headers) > 0 {
-		for _, msg := range m.Headers {
-			dAtA[i] = 0x22
-			i++
-			i = encodeVarintQueryrange(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+	if len(m.Data) > 0 {
+		for iNdEx := len(m.Data) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size, err := m.Data[iNdEx].MarshalToSizedBuffer(dAtA[:i])
+				if err != nil {
+					return 0, err
+				}
+				i -= size
+				i = encodeVarintQueryrange(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.Status) > 0 {
+		i -= len(m.Status)
+		copy(dAtA[i:], m.Status)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiLabelNamesRequest) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1362,39 +1388,45 @@ func (m *LokiLabelNamesRequest) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiLabelNamesRequest) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiLabelNamesRequest) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	dAtA[i] = 0xa
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs)))
-	n7, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n7
-	dAtA[i] = 0x12
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs)))
-	n8, err := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i:])
-	if err != nil {
-		return 0, err
-	}
-	i += n8
 	if len(m.Path) > 0 {
-		dAtA[i] = 0x1a
-		i++
+		i -= len(m.Path)
+		copy(dAtA[i:], m.Path)
 		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Path)))
-		i += copy(dAtA[i:], m.Path)
+		i--
+		dAtA[i] = 0x1a
 	}
-	return i, nil
+	n7, err7 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.EndTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.EndTs):])
+	if err7 != nil {
+		return 0, err7
+	}
+	i -= n7
+	i = encodeVarintQueryrange(dAtA, i, uint64(n7))
+	i--
+	dAtA[i] = 0x12
+	n8, err8 := github_com_gogo_protobuf_types.StdTimeMarshalTo(m.StartTs, dAtA[i-github_com_gogo_protobuf_types.SizeOfStdTime(m.StartTs):])
+	if err8 != nil {
+		return 0, err8
+	}
+	i -= n8
+	i = encodeVarintQueryrange(dAtA, i, uint64(n8))
+	i--
+	dAtA[i] = 0xa
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiLabelNamesResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1402,55 +1434,57 @@ func (m *LokiLabelNamesResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiLabelNamesResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiLabelNamesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.Status) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
-		i += copy(dAtA[i:], m.Status)
-	}
-	if len(m.Data) > 0 {
-		for _, s := range m.Data {
-			dAtA[i] = 0x12
-			i++
-			l = len(s)
-			for l >= 1<<7 {
-				dAtA[i] = uint8(uint64(l)&0x7f | 0x80)
-				l >>= 7
-				i++
+	if len(m.Headers) > 0 {
+		for iNdEx := len(m.Headers) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Headers[iNdEx].Size()
+				i -= size
+				if _, err := m.Headers[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintQueryrange(dAtA, i, uint64(size))
 			}
-			dAtA[i] = uint8(l)
-			i++
-			i += copy(dAtA[i:], s)
+			i--
+			dAtA[i] = 0x22
 		}
 	}
 	if m.Version != 0 {
-		dAtA[i] = 0x18
-		i++
 		i = encodeVarintQueryrange(dAtA, i, uint64(m.Version))
+		i--
+		dAtA[i] = 0x18
 	}
-	if len(m.Headers) > 0 {
-		for _, msg := range m.Headers {
-			dAtA[i] = 0x22
-			i++
-			i = encodeVarintQueryrange(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
-			}
-			i += n
+	if len(m.Data) > 0 {
+		for iNdEx := len(m.Data) - 1; iNdEx >= 0; iNdEx-- {
+			i -= len(m.Data[iNdEx])
+			copy(dAtA[i:], m.Data[iNdEx])
+			i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Data[iNdEx])))
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.Status) > 0 {
+		i -= len(m.Status)
+		copy(dAtA[i:], m.Status)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.Status)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiData) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1458,35 +1492,43 @@ func (m *LokiData) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiData) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiData) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if len(m.ResultType) > 0 {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.ResultType)))
-		i += copy(dAtA[i:], m.ResultType)
-	}
 	if len(m.Result) > 0 {
-		for _, msg := range m.Result {
-			dAtA[i] = 0x12
-			i++
-			i = encodeVarintQueryrange(dAtA, i, uint64(msg.Size()))
-			n, err := msg.MarshalTo(dAtA[i:])
-			if err != nil {
-				return 0, err
+		for iNdEx := len(m.Result) - 1; iNdEx >= 0; iNdEx-- {
+			{
+				size := m.Result[iNdEx].Size()
+				i -= size
+				if _, err := m.Result[iNdEx].MarshalTo(dAtA[i:]); err != nil {
+					return 0, err
+				}
+				i = encodeVarintQueryrange(dAtA, i, uint64(size))
 			}
-			i += n
+			i--
+			dAtA[i] = 0x12
 		}
 	}
-	return i, nil
+	if len(m.ResultType) > 0 {
+		i -= len(m.ResultType)
+		copy(dAtA[i:], m.ResultType)
+		i = encodeVarintQueryrange(dAtA, i, uint64(len(m.ResultType)))
+		i--
+		dAtA[i] = 0xa
+	}
+	return len(dAtA) - i, nil
 }
 
 func (m *LokiPromResponse) Marshal() (dAtA []byte, err error) {
 	size := m.Size()
 	dAtA = make([]byte, size)
-	n, err := m.MarshalTo(dAtA)
+	n, err := m.MarshalToSizedBuffer(dAtA[:size])
 	if err != nil {
 		return nil, err
 	}
@@ -1494,39 +1536,50 @@ func (m *LokiPromResponse) Marshal() (dAtA []byte, err error) {
 }
 
 func (m *LokiPromResponse) MarshalTo(dAtA []byte) (int, error) {
-	var i int
+	size := m.Size()
+	return m.MarshalToSizedBuffer(dAtA[:size])
+}
+
+func (m *LokiPromResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) {
+	i := len(dAtA)
 	_ = i
 	var l int
 	_ = l
-	if m.Response != nil {
-		dAtA[i] = 0xa
-		i++
-		i = encodeVarintQueryrange(dAtA, i, uint64(m.Response.Size()))
-		n9, err := m.Response.MarshalTo(dAtA[i:])
+	{
+		size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i])
 		if err != nil {
 			return 0, err
 		}
-		i += n9
+		i -= size
+		i = encodeVarintQueryrange(dAtA, i, uint64(size))
 	}
+	i--
 	dAtA[i] = 0x12
-	i++
-	i = encodeVarintQueryrange(dAtA, i, uint64(m.Statistics.Size()))
-	n10, err := m.Statistics.MarshalTo(dAtA[i:])
-	if err != nil {
-		return 0, err
+	if m.Response != nil {
+		{
+			size, err := m.Response.MarshalToSizedBuffer(dAtA[:i])
+			if err != nil {
+				return 0, err
+			}
+			i -= size
+			i = encodeVarintQueryrange(dAtA, i, uint64(size))
+		}
+		i--
+		dAtA[i] = 0xa
 	}
-	i += n10
-	return i, nil
+	return len(dAtA) - i, nil
 }
 
 func encodeVarintQueryrange(dAtA []byte, offset int, v uint64) int {
+	offset -= sovQueryrange(v)
+	base := offset
 	for v >= 1<<7 {
 		dAtA[offset] = uint8(v&0x7f | 0x80)
 		v >>= 7
 		offset++
 	}
 	dAtA[offset] = uint8(v)
-	return offset + 1
+	return base
 }
 func (m *LokiRequest) Size() (n int) {
 	if m == nil {
@@ -1735,14 +1788,7 @@ func (m *LokiPromResponse) Size() (n int) {
 }
 
 func sovQueryrange(x uint64) (n int) {
-	for {
-		n++
-		x >>= 7
-		if x == 0 {
-			break
-		}
-	}
-	return n
+	return (math_bits.Len64(x|1) + 6) / 7
 }
 func sozQueryrange(x uint64) (n int) {
 	return sovQueryrange(uint64((x << 1) ^ uint64((int64(x) >> 63))))
@@ -1755,8 +1801,8 @@ func (this *LokiRequest) String() string {
 		`Query:` + fmt.Sprintf("%v", this.Query) + `,`,
 		`Limit:` + fmt.Sprintf("%v", this.Limit) + `,`,
 		`Step:` + fmt.Sprintf("%v", this.Step) + `,`,
-		`StartTs:` + strings.Replace(strings.Replace(this.StartTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`EndTs:` + strings.Replace(strings.Replace(this.EndTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`StartTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.StartTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`EndTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.EndTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Direction:` + fmt.Sprintf("%v", this.Direction) + `,`,
 		`Path:` + fmt.Sprintf("%v", this.Path) + `,`,
 		`Shards:` + fmt.Sprintf("%v", this.Shards) + `,`,
@@ -1776,7 +1822,7 @@ func (this *LokiResponse) String() string {
 		`Direction:` + fmt.Sprintf("%v", this.Direction) + `,`,
 		`Limit:` + fmt.Sprintf("%v", this.Limit) + `,`,
 		`Version:` + fmt.Sprintf("%v", this.Version) + `,`,
-		`Statistics:` + strings.Replace(strings.Replace(this.Statistics.String(), "Result", "stats.Result", 1), `&`, ``, 1) + `,`,
+		`Statistics:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Statistics), "Result", "stats.Result", 1), `&`, ``, 1) + `,`,
 		`Headers:` + fmt.Sprintf("%v", this.Headers) + `,`,
 		`}`,
 	}, "")
@@ -1788,8 +1834,8 @@ func (this *LokiSeriesRequest) String() string {
 	}
 	s := strings.Join([]string{`&LokiSeriesRequest{`,
 		`Match:` + fmt.Sprintf("%v", this.Match) + `,`,
-		`StartTs:` + strings.Replace(strings.Replace(this.StartTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`EndTs:` + strings.Replace(strings.Replace(this.EndTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`StartTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.StartTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`EndTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.EndTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Path:` + fmt.Sprintf("%v", this.Path) + `,`,
 		`}`,
 	}, "")
@@ -1799,9 +1845,14 @@ func (this *LokiSeriesResponse) String() string {
 	if this == nil {
 		return "nil"
 	}
+	repeatedStringForData := "[]SeriesIdentifier{"
+	for _, f := range this.Data {
+		repeatedStringForData += fmt.Sprintf("%v", f) + ","
+	}
+	repeatedStringForData += "}"
 	s := strings.Join([]string{`&LokiSeriesResponse{`,
 		`Status:` + fmt.Sprintf("%v", this.Status) + `,`,
-		`Data:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Data), "SeriesIdentifier", "logproto.SeriesIdentifier", 1), `&`, ``, 1) + `,`,
+		`Data:` + repeatedStringForData + `,`,
 		`Version:` + fmt.Sprintf("%v", this.Version) + `,`,
 		`Headers:` + fmt.Sprintf("%v", this.Headers) + `,`,
 		`}`,
@@ -1813,8 +1864,8 @@ func (this *LokiLabelNamesRequest) String() string {
 		return "nil"
 	}
 	s := strings.Join([]string{`&LokiLabelNamesRequest{`,
-		`StartTs:` + strings.Replace(strings.Replace(this.StartTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
-		`EndTs:` + strings.Replace(strings.Replace(this.EndTs.String(), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`StartTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.StartTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
+		`EndTs:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.EndTs), "Timestamp", "types.Timestamp", 1), `&`, ``, 1) + `,`,
 		`Path:` + fmt.Sprintf("%v", this.Path) + `,`,
 		`}`,
 	}, "")
@@ -1850,7 +1901,7 @@ func (this *LokiPromResponse) String() string {
 	}
 	s := strings.Join([]string{`&LokiPromResponse{`,
 		`Response:` + strings.Replace(fmt.Sprintf("%v", this.Response), "PrometheusResponse", "queryrange.PrometheusResponse", 1) + `,`,
-		`Statistics:` + strings.Replace(strings.Replace(this.Statistics.String(), "Result", "stats.Result", 1), `&`, ``, 1) + `,`,
+		`Statistics:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Statistics), "Result", "stats.Result", 1), `&`, ``, 1) + `,`,
 		`}`,
 	}, "")
 	return s
diff --git a/pkg/querier/queryrange/querysharding.go b/pkg/querier/queryrange/querysharding.go
index c396054532ee0..00919c488d77e 100644
--- a/pkg/querier/queryrange/querysharding.go
+++ b/pkg/querier/queryrange/querysharding.go
@@ -10,18 +10,22 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/promql/parser"
 
 	"github.com/grafana/loki/pkg/loghttp"
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/logqlmodel"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util/marshal"
 )
 
+var errInvalidShardingRange = errors.New("Query does not fit in a single sharding configuration")
+
 // NewQueryShardMiddleware creates a middleware which downstreams queries after AST mapping and query encoding.
 func NewQueryShardMiddleware(
 	logger log.Logger,
-	confs queryrange.ShardingConfigs,
+	confs ShardingConfigs,
 	minShardingLookback time.Duration,
 	middlewareMetrics *queryrange.InstrumentMiddlewareMetrics,
 	shardingMetrics *logql.ShardingMetrics,
@@ -54,11 +58,10 @@ func NewQueryShardMiddleware(
 			next: queryrange.InstrumentMiddleware("sharding-bypass", middlewareMetrics).Wrap(next),
 		}
 	})
-
 }
 
 func newASTMapperware(
-	confs queryrange.ShardingConfigs,
+	confs ShardingConfigs,
 	next queryrange.Handler,
 	logger log.Logger,
 	metrics *logql.ShardingMetrics,
@@ -75,7 +78,7 @@ func newASTMapperware(
 }
 
 type astMapperware struct {
-	confs   queryrange.ShardingConfigs
+	confs   ShardingConfigs
 	logger  log.Logger
 	next    queryrange.Handler
 	ng      *logql.ShardedEngine
@@ -177,8 +180,7 @@ func (splitter *shardSplitter) Do(ctx context.Context, r queryrange.Request) (qu
 	return splitter.shardingware.Do(ctx, r)
 }
 
-// TODO(owen-d): export in cortex so we don't duplicate code
-func hasShards(confs queryrange.ShardingConfigs) bool {
+func hasShards(confs ShardingConfigs) bool {
 	for _, conf := range confs {
 		if conf.RowShards > 0 {
 			return true
@@ -186,3 +188,42 @@ func hasShards(confs queryrange.ShardingConfigs) bool {
 	}
 	return false
 }
+
+// ShardingConfigs is a slice of chunk shard configs
+type ShardingConfigs []chunk.PeriodConfig
+
+// ValidRange extracts a non-overlapping sharding configuration from a list of configs and a time range.
+func (confs ShardingConfigs) ValidRange(start, end int64) (chunk.PeriodConfig, error) {
+	for i, conf := range confs {
+		if start < int64(conf.From.Time) {
+			// the query starts before this config's range
+			return chunk.PeriodConfig{}, errInvalidShardingRange
+		} else if i == len(confs)-1 {
+			// the last configuration has no upper bound
+			return conf, nil
+		} else if end < int64(confs[i+1].From.Time) {
+			// The request is entirely scoped into this shard config
+			return conf, nil
+		} else {
+			continue
+		}
+	}
+
+	return chunk.PeriodConfig{}, errInvalidShardingRange
+}
+
+// GetConf will extract a shardable config corresponding to a request and the shardingconfigs
+func (confs ShardingConfigs) GetConf(r queryrange.Request) (chunk.PeriodConfig, error) {
+	conf, err := confs.ValidRange(r.GetStart(), r.GetEnd())
+	// query exists across multiple sharding configs
+	if err != nil {
+		return conf, err
+	}
+
+	// query doesn't have shard factor, so don't try to do AST mapping.
+	if conf.RowShards < 2 {
+		return conf, errors.Errorf("shard factor not high enough: [%d]", conf.RowShards)
+	}
+
+	return conf, nil
+}
diff --git a/pkg/querier/queryrange/querysharding_test.go b/pkg/querier/queryrange/querysharding_test.go
index 333734a24ca47..7786be7bf9d3a 100644
--- a/pkg/querier/queryrange/querysharding_test.go
+++ b/pkg/querier/queryrange/querysharding_test.go
@@ -9,7 +9,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/go-kit/kit/log"
@@ -18,6 +17,7 @@ import (
 	"github.com/grafana/loki/pkg/loghttp"
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 var (
@@ -139,7 +139,7 @@ func Test_astMapper(t *testing.T) {
 	})
 
 	mware := newASTMapperware(
-		queryrange.ShardingConfigs{
+		ShardingConfigs{
 			chunk.PeriodConfig{
 				RowShards: 2,
 			},
@@ -158,7 +158,6 @@ func Test_astMapper(t *testing.T) {
 	require.Nil(t, err)
 	require.Equal(t, called, 2)
 	require.Equal(t, expected.(*LokiResponse).Data, resp.(*LokiResponse).Data)
-
 }
 
 func Test_ShardingByPass(t *testing.T) {
@@ -169,7 +168,7 @@ func Test_ShardingByPass(t *testing.T) {
 	})
 
 	mware := newASTMapperware(
-		queryrange.ShardingConfigs{
+		ShardingConfigs{
 			chunk.PeriodConfig{
 				RowShards: 2,
 			},
@@ -187,23 +186,23 @@ func Test_ShardingByPass(t *testing.T) {
 
 func Test_hasShards(t *testing.T) {
 	for i, tc := range []struct {
-		input    queryrange.ShardingConfigs
+		input    ShardingConfigs
 		expected bool
 	}{
 		{
-			input: queryrange.ShardingConfigs{
+			input: ShardingConfigs{
 				{},
 			},
 			expected: false,
 		},
 		{
-			input: queryrange.ShardingConfigs{
+			input: ShardingConfigs{
 				{RowShards: 16},
 			},
 			expected: true,
 		},
 		{
-			input: queryrange.ShardingConfigs{
+			input: ShardingConfigs{
 				{},
 				{RowShards: 16},
 				{},
diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go
index ffdd7b7cd6d9e..c8f5fb9669578 100644
--- a/pkg/querier/queryrange/roundtrip.go
+++ b/pkg/querier/queryrange/roundtrip.go
@@ -7,8 +7,6 @@ import (
 	"strings"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	"github.com/go-kit/kit/log"
 	"github.com/prometheus/client_golang/prometheus"
@@ -18,6 +16,8 @@ import (
 
 	"github.com/grafana/loki/pkg/loghttp"
 	"github.com/grafana/loki/pkg/logql"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 // Config is the configuration for the queryrange tripperware
diff --git a/pkg/querier/queryrange/roundtrip_test.go b/pkg/querier/queryrange/roundtrip_test.go
index a9bc1a9e58e65..b0c1ab9dee961 100644
--- a/pkg/querier/queryrange/roundtrip_test.go
+++ b/pkg/querier/queryrange/roundtrip_test.go
@@ -13,7 +13,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
@@ -27,6 +26,7 @@ import (
 
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logqlmodel"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util/marshal"
 )
 
@@ -91,7 +91,6 @@ var (
 
 // those tests are mostly for testing the glue between all component and make sure they activate correctly.
 func TestMetricsTripperware(t *testing.T) {
-
 	tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{maxSeries: math.MaxInt32}, chunk.SchemaConfig{}, 0, nil)
 	if stopper != nil {
 		defer stopper.Stop()
@@ -101,7 +100,7 @@ func TestMetricsTripperware(t *testing.T) {
 	lreq := &LokiRequest{
 		Query:     `rate({app="foo"} |= "foo"[1m])`,
 		Limit:     1000,
-		Step:      30000, //30sec
+		Step:      30000, // 30sec
 		StartTs:   testTime.Add(-6 * time.Hour),
 		EndTs:     testTime,
 		Direction: logproto.FORWARD,
@@ -155,7 +154,6 @@ func TestMetricsTripperware(t *testing.T) {
 }
 
 func TestLogFilterTripperware(t *testing.T) {
-
 	tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, chunk.SchemaConfig{}, 0, nil)
 	if stopper != nil {
 		defer stopper.Stop()
@@ -182,7 +180,7 @@ func TestLogFilterTripperware(t *testing.T) {
 	err = user.InjectOrgIDIntoHTTPRequest(ctx, req)
 	require.NoError(t, err)
 
-	//testing limit
+	// testing limit
 	count, h := promqlResult(streams)
 	rt.setHandler(h)
 	_, err = tpw(rt).RoundTrip(req)
@@ -203,7 +201,6 @@ func TestLogFilterTripperware(t *testing.T) {
 }
 
 func TestSeriesTripperware(t *testing.T) {
-
 	tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, chunk.SchemaConfig{}, 0, nil)
 	if stopper != nil {
 		defer stopper.Stop()
@@ -245,7 +242,6 @@ func TestSeriesTripperware(t *testing.T) {
 }
 
 func TestLabelsTripperware(t *testing.T) {
-
 	tpw, stopper, err := NewTripperware(testConfig, util_log.Logger, fakeLimits{}, chunk.SchemaConfig{}, 0, nil)
 	if stopper != nil {
 		defer stopper.Stop()
diff --git a/pkg/storage/async_store.go b/pkg/storage/async_store.go
index e80bacb537596..fb12e8aeef59d 100644
--- a/pkg/storage/async_store.go
+++ b/pkg/storage/async_store.go
@@ -8,10 +8,10 @@ import (
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log/level"
-
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type IngesterQuerier interface {
diff --git a/pkg/storage/async_store_test.go b/pkg/storage/async_store_test.go
index d04992fcab5ac..859c4e00b698d 100644
--- a/pkg/storage/async_store_test.go
+++ b/pkg/storage/async_store_test.go
@@ -5,12 +5,12 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util"
 )
 
@@ -200,7 +200,6 @@ func TestAsyncStore_mergeIngesterAndStoreChunks(t *testing.T) {
 			}
 		})
 	}
-
 }
 
 func TestAsyncStore_QueryIngestersWithin(t *testing.T) {
diff --git a/pkg/storage/batch.go b/pkg/storage/batch.go
index 32b35be6e34f2..7969b169808df 100644
--- a/pkg/storage/batch.go
+++ b/pkg/storage/batch.go
@@ -5,7 +5,6 @@ import (
 	"sort"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
@@ -23,6 +22,7 @@ import (
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/logql/log"
 	"github.com/grafana/loki/pkg/logqlmodel/stats"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type ChunkMetrics struct {
diff --git a/pkg/storage/batch_test.go b/pkg/storage/batch_test.go
index 7fd5a9b66fa7e..dd67ac4f11a7e 100644
--- a/pkg/storage/batch_test.go
+++ b/pkg/storage/batch_test.go
@@ -7,7 +7,6 @@ import (
 	"time"
 
 	"github.com/cespare/xxhash/v2"
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
@@ -20,6 +19,7 @@ import (
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/logql/log"
 	"github.com/grafana/loki/pkg/logqlmodel/stats"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 var NilMetrics = NewChunkMetrics(nil, 0)
diff --git a/pkg/storage/chunk/aws/dynamodb_index_reader.go b/pkg/storage/chunk/aws/dynamodb_index_reader.go
index 5f51a8e45627e..18d2dad6c949f 100644
--- a/pkg/storage/chunk/aws/dynamodb_index_reader.go
+++ b/pkg/storage/chunk/aws/dynamodb_index_reader.go
@@ -17,7 +17,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/sync/errgroup"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type dynamodbIndexReader struct {
@@ -95,7 +95,7 @@ func (r *dynamodbIndexReader) ReadIndexEntries(ctx context.Context, tableName st
 			err := r.DynamoDB.ScanPagesWithContext(ctx, input, func(page *dynamodb.ScanOutput, lastPage bool) bool {
 				if cc := page.ConsumedCapacity; cc != nil {
 					r.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.ScanTable", *cc.TableName).
-						Add(float64(*cc.CapacityUnits))
+						Add(*cc.CapacityUnits)
 				}
 				r.processPage(ctx, sm, processor, tableName, page)
 				return true
@@ -198,14 +198,15 @@ func (r *dynamodbIndexReader) queryChunkEntriesForSeries(ctx context.Context, pr
 	err := r.DynamoDB.QueryPagesWithContext(ctx, input, func(output *dynamodb.QueryOutput, _ bool) bool {
 		if cc := output.ConsumedCapacity; cc != nil {
 			r.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
-				Add(float64(*cc.CapacityUnits))
+				Add(*cc.CapacityUnits)
 		}
 
 		for _, item := range output.Items {
 			err := processor.ProcessIndexEntry(chunk.IndexEntry{
 				TableName:  tableName,
 				HashValue:  aws.StringValue(item[hashKey].S),
-				RangeValue: item[rangeKey].B})
+				RangeValue: item[rangeKey].B,
+			})
 			if err != nil {
 				result = errors.Wrap(err, "processor error")
 				return false
diff --git a/pkg/storage/chunk/aws/dynamodb_metrics.go b/pkg/storage/chunk/aws/dynamodb_metrics.go
index 58533b4414677..261f50fc912d5 100644
--- a/pkg/storage/chunk/aws/dynamodb_metrics.go
+++ b/pkg/storage/chunk/aws/dynamodb_metrics.go
@@ -19,7 +19,7 @@ func newMetrics(r prometheus.Registerer) *dynamoDBMetrics {
 	m := dynamoDBMetrics{}
 
 	m.dynamoRequestDuration = instrument.NewHistogramCollector(promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_request_duration_seconds",
 		Help:      "Time spent doing DynamoDB requests.",
 
@@ -28,27 +28,27 @@ func newMetrics(r prometheus.Registerer) *dynamoDBMetrics {
 		Buckets: prometheus.ExponentialBuckets(0.001, 4, 9),
 	}, []string{"operation", "status_code"}))
 	m.dynamoConsumedCapacity = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_consumed_capacity_total",
 		Help:      "The capacity units consumed by operation.",
 	}, []string{"operation", tableNameLabel})
 	m.dynamoThrottled = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_throttled_total",
 		Help:      "The total number of throttled events.",
 	}, []string{"operation", tableNameLabel})
 	m.dynamoFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_failures_total",
 		Help:      "The total number of errors while storing chunks to the chunk store.",
 	}, []string{tableNameLabel, errorReasonLabel, "operation"})
 	m.dynamoDroppedRequests = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_dropped_requests_total",
 		Help:      "The total number of requests which were dropped due to errors encountered from dynamo.",
 	}, []string{tableNameLabel, errorReasonLabel, "operation"})
 	m.dynamoQueryPagesCount = promauto.With(r).NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "dynamo_query_pages_count",
 		Help:      "Number of pages per query.",
 		// Most queries will have one page, however this may increase with fuzzy
diff --git a/pkg/storage/chunk/aws/dynamodb_storage_client.go b/pkg/storage/chunk/aws/dynamodb_storage_client.go
index d3c67f710542d..a94673a9aabb2 100644
--- a/pkg/storage/chunk/aws/dynamodb_storage_client.go
+++ b/pkg/storage/chunk/aws/dynamodb_storage_client.go
@@ -27,13 +27,14 @@ import (
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 const (
@@ -196,7 +197,7 @@ func (a dynamoDBStorageClient) BatchWrite(ctx context.Context, input chunk.Write
 
 		for _, cc := range resp.ConsumedCapacity {
 			a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchWriteItem", *cc.TableName).
-				Add(float64(*cc.CapacityUnits))
+				Add(*cc.CapacityUnits)
 		}
 
 		if err != nil {
@@ -310,7 +311,7 @@ func (a dynamoDBStorageClient) query(ctx context.Context, query chunk.IndexQuery
 
 			if cc := output.ConsumedCapacity; cc != nil {
 				a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.QueryPages", *cc.TableName).
-					Add(float64(*cc.CapacityUnits))
+					Add(*cc.CapacityUnits)
 			}
 
 			return callback(query, &dynamoDBReadResponse{items: output.Items})
@@ -452,7 +453,7 @@ func (a dynamoDBStorageClient) getDynamoDBChunks(ctx context.Context, chunks []c
 
 		for _, cc := range response.ConsumedCapacity {
 			a.metrics.dynamoConsumedCapacity.WithLabelValues("DynamoDB.BatchGetItemPages", *cc.TableName).
-				Add(float64(*cc.CapacityUnits))
+				Add(*cc.CapacityUnits)
 		}
 
 		if err != nil {
@@ -570,9 +571,7 @@ func (a dynamoDBStorageClient) DeleteChunk(ctx context.Context, userID, chunkID
 }
 
 func (a dynamoDBStorageClient) writesForChunks(chunks []chunk.Chunk) (dynamoDBWriteBatch, error) {
-	var (
-		dynamoDBWrites = dynamoDBWriteBatch{}
-	)
+	dynamoDBWrites := dynamoDBWriteBatch{}
 
 	for i := range chunks {
 		buf, err := chunks[i].Encoded()
diff --git a/pkg/storage/chunk/aws/dynamodb_storage_client_test.go b/pkg/storage/chunk/aws/dynamodb_storage_client_test.go
index 0bf22229beede..c221fcd10a013 100644
--- a/pkg/storage/chunk/aws/dynamodb_storage_client_test.go
+++ b/pkg/storage/chunk/aws/dynamodb_storage_client_test.go
@@ -9,7 +9,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 const (
diff --git a/pkg/storage/chunk/aws/dynamodb_table_client.go b/pkg/storage/chunk/aws/dynamodb_table_client.go
index fa7b5f5fad77f..4fde3fca95391 100644
--- a/pkg/storage/chunk/aws/dynamodb_table_client.go
+++ b/pkg/storage/chunk/aws/dynamodb_table_client.go
@@ -14,9 +14,10 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"golang.org/x/time/rate"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // Pluggable auto-scaler implementation
@@ -306,7 +307,8 @@ func (d dynamoTableClient) UpdateTable(ctx context.Context, current, expected ch
 		if err := d.backoffAndRetry(ctx, func(ctx context.Context) error {
 			return instrument.CollectedRequest(ctx, "DynamoDB.UpdateTable", d.metrics.dynamoRequestDuration, instrument.ErrorCode, func(ctx context.Context) error {
 				var dynamoBillingMode string
-				updateTableInput := &dynamodb.UpdateTableInput{TableName: aws.String(expected.Name),
+				updateTableInput := &dynamodb.UpdateTableInput{
+					TableName: aws.String(expected.Name),
 					ProvisionedThroughput: &dynamodb.ProvisionedThroughput{
 						ReadCapacityUnits:  aws.Int64(expected.ProvisionedRead),
 						WriteCapacityUnits: aws.Int64(expected.ProvisionedWrite),
diff --git a/pkg/storage/chunk/aws/fixtures.go b/pkg/storage/chunk/aws/fixtures.go
index acc05dc0adbc8..7d1e27c046200 100644
--- a/pkg/storage/chunk/aws/fixtures.go
+++ b/pkg/storage/chunk/aws/fixtures.go
@@ -7,10 +7,11 @@ import (
 
 	"golang.org/x/time/rate"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 type fixture struct {
@@ -58,6 +59,7 @@ var Fixtures = []testutils.Fixture{
 	dynamoDBFixture(2, 10, 20),
 }
 
+// nolint
 func dynamoDBFixture(provisionedErr, gangsize, maxParallelism int) testutils.Fixture {
 	return fixture{
 		name: fmt.Sprintf("DynamoDB chunks provisionedErr=%d, ChunkGangSize=%d, ChunkGetMaxParallelism=%d",
diff --git a/pkg/storage/chunk/aws/metrics_autoscaling.go b/pkg/storage/chunk/aws/metrics_autoscaling.go
index b8aae77f5235b..4e8a3d296ae81 100644
--- a/pkg/storage/chunk/aws/metrics_autoscaling.go
+++ b/pkg/storage/chunk/aws/metrics_autoscaling.go
@@ -13,8 +13,9 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const (
@@ -100,7 +101,6 @@ func (m *metricsData) DescribeTable(ctx context.Context, desc *chunk.TableDesc)
 }
 
 func (m *metricsData) UpdateTable(ctx context.Context, current chunk.TableDesc, expected *chunk.TableDesc) error {
-
 	if err := m.update(ctx); err != nil {
 		return err
 	}
diff --git a/pkg/storage/chunk/aws/metrics_autoscaling_test.go b/pkg/storage/chunk/aws/metrics_autoscaling_test.go
index 992024e27763b..bc4c29078290d 100644
--- a/pkg/storage/chunk/aws/metrics_autoscaling_test.go
+++ b/pkg/storage/chunk/aws/metrics_autoscaling_test.go
@@ -12,7 +12,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/mtime"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const (
@@ -87,6 +87,7 @@ func fixtureReadProvisionConfig(readScale, inactReadScale chunk.AutoScalingConfi
 	}
 }
 
+// nolint
 func baseTable(name string, provisionedRead, provisionedWrite int64) []chunk.TableDesc {
 	return []chunk.TableDesc{
 		{
@@ -392,7 +393,6 @@ func TestTableManagerMetricsReadAutoScaling(t *testing.T) {
 		append(baseTable("a", inactiveRead, inactiveWrite),
 			staticTable(0, 1, write, 1, write)...), // - scale down to minimum... no usage at all
 	)
-
 }
 
 // Helper to return pre-canned results to Prometheus queries
@@ -432,7 +432,6 @@ func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, thro
 	for _, rates := range throttleRates {
 		readUsageMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
-
 			readUsageMatrix = append(readUsageMatrix,
 				&model.SampleStream{
 					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
@@ -449,7 +448,6 @@ func (m *mockPrometheus) SetResponseForWrites(q0, q1, q2 model.SampleValue, thro
 	for _, rates := range throttleRates {
 		readErrorMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
-
 			readErrorMatrix = append(readErrorMatrix,
 				&model.SampleStream{
 					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
@@ -470,9 +468,11 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	m.rangeValues = []model.Value{
 		// Queue lengths ( not used)
 		model.Matrix{
-			&model.SampleStream{Values: []model.SamplePair{{Timestamp: 0, Value: 0},
+			&model.SampleStream{Values: []model.SamplePair{
+				{Timestamp: 0, Value: 0},
 				{Timestamp: 15000, Value: 0},
-				{Timestamp: 30000, Value: 0}}},
+				{Timestamp: 30000, Value: 0},
+			}},
 		},
 	}
 	// Error rates, for writes so not used in a read test. Here as a filler for the expected number of prom responses
@@ -511,7 +511,6 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	for _, rates := range usageRates {
 		readUsageMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
-
 			readUsageMatrix = append(readUsageMatrix,
 				&model.SampleStream{
 					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
@@ -528,7 +527,6 @@ func (m *mockPrometheus) SetResponseForReads(usageRates [][]int, errorRates [][]
 	for _, rates := range errorRates {
 		readErrorMatrix := model.Matrix{}
 		for i := 0; i < len(rates)/2; i++ {
-
 			readErrorMatrix = append(readErrorMatrix,
 				&model.SampleStream{
 					Metric: model.Metric{"table": model.LabelValue(fmt.Sprintf("%s%d", tablePrefix, i))},
diff --git a/pkg/storage/chunk/aws/mock.go b/pkg/storage/chunk/aws/mock.go
index a6e09b361498c..a6230cb100a82 100644
--- a/pkg/storage/chunk/aws/mock.go
+++ b/pkg/storage/chunk/aws/mock.go
@@ -41,6 +41,7 @@ type mockDynamoDBTable struct {
 
 type mockDynamoDBItem map[string]*dynamodb.AttributeValue
 
+// nolint
 func newMockDynamoDB(unprocessed int, provisionedErr int) *mockDynamoDBClient {
 	return &mockDynamoDBClient{
 		tables:         map[string]*mockDynamoDBTable{},
@@ -253,12 +254,15 @@ type dynamoDBMockRequest struct {
 func (m *dynamoDBMockRequest) Send() error {
 	return m.err
 }
+
 func (m *dynamoDBMockRequest) Data() interface{} {
 	return m.result
 }
+
 func (m *dynamoDBMockRequest) Error() error {
 	return m.err
 }
+
 func (m *dynamoDBMockRequest) Retryable() bool {
 	return false
 }
diff --git a/pkg/storage/chunk/aws/s3_storage_client.go b/pkg/storage/chunk/aws/s3_storage_client.go
index e13f9842bbb9c..5a85712be2414 100644
--- a/pkg/storage/chunk/aws/s3_storage_client.go
+++ b/pkg/storage/chunk/aws/s3_storage_client.go
@@ -26,10 +26,11 @@ import (
 	awscommon "github.com/weaveworks/common/aws"
 	"github.com/weaveworks/common/instrument"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	cortex_s3 "github.com/cortexproject/cortex/pkg/storage/bucket/s3"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const (
@@ -42,14 +43,12 @@ var (
 	errUnsupportedSignatureVersion = errors.New("unsupported signature version")
 )
 
-var (
-	s3RequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
-		Name:      "s3_request_duration_seconds",
-		Help:      "Time spent doing S3 requests.",
-		Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
-	}, []string{"operation", "status_code"}))
-)
+var s3RequestDuration = instrument.NewHistogramCollector(prometheus.NewHistogramVec(prometheus.HistogramOpts{
+	Namespace: "loki",
+	Name:      "s3_request_duration_seconds",
+	Help:      "Time spent doing S3 requests.",
+	Buckets:   []float64{.025, .05, .1, .25, .5, 1, 2},
+}, []string{"operation", "status_code"}))
 
 // InjectRequestMiddleware gives users of this client the ability to make arbitrary
 // changes to outgoing requests.
@@ -252,7 +251,7 @@ func buildS3Config(cfg S3Config) (*aws.Config, []string, error) {
 		MaxIdleConnsPerHost:   100,
 		TLSHandshakeTimeout:   3 * time.Second,
 		ExpectContinueTimeout: 1 * time.Second,
-		ResponseHeaderTimeout: time.Duration(cfg.HTTPConfig.ResponseHeaderTimeout),
+		ResponseHeaderTimeout: cfg.HTTPConfig.ResponseHeaderTimeout,
 		TLSClientConfig:       &tls.Config{InsecureSkipVerify: cfg.HTTPConfig.InsecureSkipVerify},
 	})
 
@@ -290,7 +289,6 @@ func (a *S3ObjectClient) DeleteObject(ctx context.Context, objectKey string) err
 		Bucket: aws.String(a.bucketFromKey(objectKey)),
 		Key:    aws.String(objectKey),
 	})
-
 	if err != nil {
 		if aerr, ok := err.(awserr.Error); ok {
 			if aerr.Code() == s3.ErrCodeNoSuchKey {
@@ -332,7 +330,6 @@ func (a *S3ObjectClient) GetObject(ctx context.Context, objectKey string) (io.Re
 		})
 		return err
 	})
-
 	if err != nil {
 		if aerr, ok := err.(awserr.Error); ok {
 			if aerr.Code() == s3.ErrCodeNoSuchKey {
@@ -408,7 +405,6 @@ func (a *S3ObjectClient) List(ctx context.Context, prefix, delimiter string) ([]
 
 			return nil
 		})
-
 		if err != nil {
 			return nil, nil, err
 		}
diff --git a/pkg/storage/chunk/aws/sse_config.go b/pkg/storage/chunk/aws/sse_config.go
index 172534a1f5429..78c6c17b8d6c4 100644
--- a/pkg/storage/chunk/aws/sse_config.go
+++ b/pkg/storage/chunk/aws/sse_config.go
@@ -60,7 +60,7 @@ func parseKMSEncryptionContext(kmsEncryptionContext string) (*string, error) {
 		return nil, errors.Wrap(err, "failed to marshal KMS encryption context")
 	}
 
-	parsedKMSEncryptionContext := base64.StdEncoding.EncodeToString([]byte(jsonKMSEncryptionContext))
+	parsedKMSEncryptionContext := base64.StdEncoding.EncodeToString(jsonKMSEncryptionContext)
 
 	return &parsedKMSEncryptionContext, nil
 }
diff --git a/pkg/storage/chunk/azure/blob_storage_client.go b/pkg/storage/chunk/azure/blob_storage_client.go
index 6b0c0a70cde02..55aefa6cbed61 100644
--- a/pkg/storage/chunk/azure/blob_storage_client.go
+++ b/pkg/storage/chunk/azure/blob_storage_client.go
@@ -13,11 +13,12 @@ import (
 	"github.com/Azure/azure-pipeline-go/pipeline"
 	"github.com/Azure/azure-storage-blob-go/azblob"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 const (
@@ -88,7 +89,7 @@ func (c *BlobStorageConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagS
 // BlobStorage is used to interact with azure blob storage for setting or getting time series chunks.
 // Implements ObjectStorage
 type BlobStorage struct {
-	//blobService storage.Serv
+	// blobService storage.Serv
 	cfg          *BlobStorageConfig
 	containerURL azblob.ContainerURL
 }
@@ -161,10 +162,9 @@ func (b *BlobStorage) PutObject(ctx context.Context, objectKey string, object io
 }
 
 func (b *BlobStorage) getBlobURL(blobID string) (azblob.BlockBlobURL, error) {
-
 	blobID = strings.Replace(blobID, ":", "-", -1)
 
-	//generate url for new chunk blob
+	// generate url for new chunk blob
 	u, err := url.Parse(fmt.Sprintf(b.selectBlobURLFmt(), b.cfg.AccountName, b.cfg.ContainerName, blobID))
 	if err != nil {
 		return azblob.BlockBlobURL{}, err
diff --git a/pkg/storage/chunk/cache/background.go b/pkg/storage/chunk/cache/background.go
index bfdfb748d894b..db679200fc163 100644
--- a/pkg/storage/chunk/cache/background.go
+++ b/pkg/storage/chunk/cache/background.go
@@ -48,14 +48,14 @@ func NewBackground(name string, cfg BackgroundConfig, cache Cache, reg prometheu
 		bgWrites: make(chan backgroundWrite, cfg.WriteBackBuffer),
 		name:     name,
 		droppedWriteBack: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "cache_dropped_background_writes_total",
 			Help:        "Total count of dropped write backs to cache.",
 			ConstLabels: prometheus.Labels{"name": name},
 		}),
 
 		queueLength: promauto.With(reg).NewGauge(prometheus.GaugeOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "cache_background_queue_length",
 			Help:        "Length of the cache background write queue.",
 			ConstLabels: prometheus.Labels{"name": name},
diff --git a/pkg/storage/chunk/cache/background_test.go b/pkg/storage/chunk/cache/background_test.go
index 06e69bf28be44..f1a906f47ab82 100644
--- a/pkg/storage/chunk/cache/background_test.go
+++ b/pkg/storage/chunk/cache/background_test.go
@@ -3,7 +3,7 @@ package cache_test
 import (
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 func TestBackground(t *testing.T) {
diff --git a/pkg/storage/chunk/cache/cache_test.go b/pkg/storage/chunk/cache/cache_test.go
index 3500a6cbd0fab..c7a5fb58a0157 100644
--- a/pkg/storage/chunk/cache/cache_test.go
+++ b/pkg/storage/chunk/cache/cache_test.go
@@ -13,9 +13,9 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	prom_chunk "github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 const userID = "1"
diff --git a/pkg/storage/chunk/cache/instrumented.go b/pkg/storage/chunk/cache/instrumented.go
index ca27d4a3b4e40..c155b36c33dea 100644
--- a/pkg/storage/chunk/cache/instrumented.go
+++ b/pkg/storage/chunk/cache/instrumented.go
@@ -13,7 +13,7 @@ import (
 // Instrument returns an instrumented cache.
 func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache {
 	valueSize := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "cache_value_size_bytes",
 		Help:      "Size of values in the cache.",
 		// Cached chunks are generally in the KBs, but cached index can
@@ -28,7 +28,7 @@ func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache {
 		Cache: cache,
 
 		requestDuration: instr.NewHistogramCollector(promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
-			Namespace: "cortex",
+			Namespace: "loki",
 			Name:      "cache_request_duration_seconds",
 			Help:      "Total time spent in seconds doing cache requests.",
 			// Cache requests are very quick: smallest bucket is 16us, biggest is 1s.
@@ -37,14 +37,14 @@ func Instrument(name string, cache Cache, reg prometheus.Registerer) Cache {
 		}, []string{"method", "status_code"})),
 
 		fetchedKeys: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "cache_fetched_keys",
 			Help:        "Total count of keys requested from cache.",
 			ConstLabels: prometheus.Labels{"name": name},
 		}),
 
 		hits: promauto.With(reg).NewCounter(prometheus.CounterOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "cache_hits",
 			Help:        "Total count of keys found in cache.",
 			ConstLabels: prometheus.Labels{"name": name},
diff --git a/pkg/storage/chunk/cache/memcached.go b/pkg/storage/chunk/cache/memcached.go
index c60d624e407e2..f488f6839f81c 100644
--- a/pkg/storage/chunk/cache/memcached.go
+++ b/pkg/storage/chunk/cache/memcached.go
@@ -68,7 +68,7 @@ func NewMemcached(cfg MemcachedConfig, client MemcachedClient, name string, reg
 		logger:   logger,
 		requestDuration: observableVecCollector{
 			v: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
-				Namespace: "cortex",
+				Namespace: "loki",
 				Name:      "memcache_request_duration_seconds",
 				Help:      "Total time spent in seconds doing memcache requests.",
 				// Memcached requests are very quick: smallest bucket is 16us, biggest is 1s
@@ -163,7 +163,6 @@ func (c *Memcached) fetch(ctx context.Context, keys []string) (found []string, b
 		}
 		return err
 	})
-
 	if err != nil {
 		return found, bufs, keys
 	}
diff --git a/pkg/storage/chunk/cache/memcached_client.go b/pkg/storage/chunk/cache/memcached_client.go
index 021e02404abcb..7a05903a0a859 100644
--- a/pkg/storage/chunk/cache/memcached_client.go
+++ b/pkg/storage/chunk/cache/memcached_client.go
@@ -127,14 +127,14 @@ func NewMemcachedClient(cfg MemcachedClientConfig, name string, r prometheus.Reg
 		quit:        make(chan struct{}),
 
 		numServers: promauto.With(r).NewGauge(prometheus.GaugeOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "memcache_client_servers",
 			Help:        "The number of memcache servers discovered.",
 			ConstLabels: prometheus.Labels{"name": name},
 		}),
 
 		skipped: promauto.With(r).NewCounter(prometheus.CounterOpts{
-			Namespace:   "cortex",
+			Namespace:   "loki",
 			Name:        "memcache_client_set_skip_total",
 			Help:        "Total number of skipped set operations because of the value is larger than the max-item-size.",
 			ConstLabels: prometheus.Labels{"name": name},
diff --git a/pkg/storage/chunk/cache/memcached_client_selector_test.go b/pkg/storage/chunk/cache/memcached_client_selector_test.go
index 69305670b6791..4882fa2bc287d 100644
--- a/pkg/storage/chunk/cache/memcached_client_selector_test.go
+++ b/pkg/storage/chunk/cache/memcached_client_selector_test.go
@@ -8,7 +8,7 @@ import (
 	"github.com/facette/natsort"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 func TestNatSort(t *testing.T) {
diff --git a/pkg/storage/chunk/cache/memcached_test.go b/pkg/storage/chunk/cache/memcached_test.go
index 70b18fd4a7666..12f8ff2c5b3b2 100644
--- a/pkg/storage/chunk/cache/memcached_test.go
+++ b/pkg/storage/chunk/cache/memcached_test.go
@@ -11,7 +11,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"go.uber.org/atomic"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 func TestMemcached(t *testing.T) {
diff --git a/pkg/storage/chunk/cache/tiered_test.go b/pkg/storage/chunk/cache/tiered_test.go
index c4f85eb63aed1..abb1a6274894e 100644
--- a/pkg/storage/chunk/cache/tiered_test.go
+++ b/pkg/storage/chunk/cache/tiered_test.go
@@ -6,7 +6,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 func TestTieredSimple(t *testing.T) {
diff --git a/pkg/storage/chunk/cassandra/fixtures.go b/pkg/storage/chunk/cassandra/fixtures.go
index b55cb16af5f3d..609ded4b7b718 100644
--- a/pkg/storage/chunk/cassandra/fixtures.go
+++ b/pkg/storage/chunk/cassandra/fixtures.go
@@ -5,9 +5,10 @@ import (
 	"io"
 	"os"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 // GOCQL doesn't provide nice mocks, so we use a real Cassandra instance.
diff --git a/pkg/storage/chunk/cassandra/instrumentation.go b/pkg/storage/chunk/cassandra/instrumentation.go
index 045b51ef6dd0b..53219619831ca 100644
--- a/pkg/storage/chunk/cassandra/instrumentation.go
+++ b/pkg/storage/chunk/cassandra/instrumentation.go
@@ -9,7 +9,7 @@ import (
 )
 
 var requestDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{
-	Namespace: "cortex",
+	Namespace: "loki",
 	Name:      "cassandra_request_duration_seconds",
 	Help:      "Time spent doing Cassandra requests.",
 	Buckets:   prometheus.ExponentialBuckets(0.001, 4, 9),
diff --git a/pkg/storage/chunk/cassandra/storage_client.go b/pkg/storage/chunk/cassandra/storage_client.go
index fa57fc85d4781..2a644583c6d50 100644
--- a/pkg/storage/chunk/cassandra/storage_client.go
+++ b/pkg/storage/chunk/cassandra/storage_client.go
@@ -17,10 +17,11 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"golang.org/x/sync/semaphore"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 // Config for a StorageClient
diff --git a/pkg/storage/chunk/cassandra/storage_client_test.go b/pkg/storage/chunk/cassandra/storage_client_test.go
index 7fd6867fe2734..8c7a3438c2046 100644
--- a/pkg/storage/chunk/cassandra/storage_client_test.go
+++ b/pkg/storage/chunk/cassandra/storage_client_test.go
@@ -21,6 +21,7 @@ func TestConfig_setClusterConfig_noAuth(t *testing.T) {
 	assert.Nil(t, cqlCfg.Authenticator)
 }
 
+// nolint
 func TestConfig_setClusterConfig_authWithPassword(t *testing.T) {
 	cfg := defaultConfig()
 	cfg.Auth = true
@@ -75,6 +76,7 @@ func TestConfig_setClusterConfig_authWithPasswordAndPasswordFile(t *testing.T) {
 	assert.Error(t, cfg.Validate())
 }
 
+// nolint
 func TestConfig_setClusterConfig_clientSSL(t *testing.T) {
 	cfg := defaultConfig()
 	cfg.SSL = true
diff --git a/pkg/storage/chunk/cassandra/table_client.go b/pkg/storage/chunk/cassandra/table_client.go
index fc269e26409df..93b4983c96744 100644
--- a/pkg/storage/chunk/cassandra/table_client.go
+++ b/pkg/storage/chunk/cassandra/table_client.go
@@ -8,7 +8,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type tableClient struct {
diff --git a/pkg/storage/chunk/chunk.go b/pkg/storage/chunk/chunk.go
index b9fde2bd7f259..5da290fb3b82a 100644
--- a/pkg/storage/chunk/chunk.go
+++ b/pkg/storage/chunk/chunk.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync"
 
+	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
 	"github.com/golang/snappy"
 	jsoniter "github.com/json-iterator/go"
 	"github.com/pkg/errors"
@@ -16,8 +17,7 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	errs "github.com/weaveworks/common/errors"
 
-	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
-	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
+	prom_chunk "github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 const (
diff --git a/pkg/storage/chunk/chunk_store.go b/pkg/storage/chunk/chunk_store.go
index 54796b6408b07..6a46b26259abe 100644
--- a/pkg/storage/chunk/chunk_store.go
+++ b/pkg/storage/chunk/chunk_store.go
@@ -16,13 +16,14 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/cortexproject/cortex/pkg/util/validation"
+
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 var (
@@ -31,13 +32,13 @@ var (
 	ErrParialDeleteChunkNoOverlap = errors.New("interval for partial deletion has not overlap with chunk interval")
 
 	indexEntriesPerChunk = promauto.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_index_entries_per_chunk",
 		Help:      "Number of entries written to storage per chunk.",
 		Buckets:   prometheus.ExponentialBuckets(1, 2, 5),
 	})
 	cacheCorrupt = promauto.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "cache_corrupt_chunks_total",
 		Help:      "Total count of corrupt chunks found in cache.",
 	})
@@ -573,7 +574,7 @@ func (c *baseStore) parseIndexEntries(_ context.Context, entries []IndexEntry, m
 	return result, nil
 }
 
-func (c *baseStore) convertChunkIDsToChunks(ctx context.Context, userID string, chunkIDs []string) ([]Chunk, error) {
+func (c *baseStore) convertChunkIDsToChunks(_ context.Context, userID string, chunkIDs []string) ([]Chunk, error) {
 	chunkSet := make([]Chunk, 0, len(chunkIDs))
 	for _, chunkID := range chunkIDs {
 		chunk, err := ParseExternalKey(userID, chunkID)
@@ -592,7 +593,7 @@ func (c *store) DeleteChunk(ctx context.Context, from, through model.Time, userI
 		return ErrMetricNameLabelMissing
 	}
 
-	chunkWriteEntries, err := c.schema.GetWriteEntries(from, through, userID, string(metricName), metric, chunkID)
+	chunkWriteEntries, err := c.schema.GetWriteEntries(from, through, userID, metricName, metric, chunkID)
 	if err != nil {
 		return errors.Wrapf(err, "when getting index entries to delete for chunkID=%s", chunkID)
 	}
diff --git a/pkg/storage/chunk/chunk_store_test.go b/pkg/storage/chunk/chunk_store_test.go
index 5b7e06a1bec62..8a9da84da72ac 100644
--- a/pkg/storage/chunk/chunk_store_test.go
+++ b/pkg/storage/chunk/chunk_store_test.go
@@ -18,10 +18,11 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/test"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
+
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 type configFactory func() StoreConfig
@@ -350,7 +351,6 @@ func TestChunkStore_LabelValuesForMetricName(t *testing.T) {
 			}
 		}
 	}
-
 }
 
 func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
@@ -452,7 +452,6 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) {
 			}
 		}
 	}
-
 }
 
 // TestChunkStore_getMetricNameChunks tests if chunks are fetched correctly when we have the metric name
@@ -546,6 +545,7 @@ func TestChunkStore_getMetricNameChunks(t *testing.T) {
 	}
 }
 
+// nolint
 func mustNewLabelMatcher(matchType labels.MatchType, name string, value string) *labels.Matcher {
 	return labels.MustNewMatcher(matchType, name, value)
 }
@@ -812,19 +812,22 @@ func BenchmarkParseIndexEntries50000(b *testing.B) { benchmarkParseIndexEntries(
 func BenchmarkParseIndexEntriesRegexSet500(b *testing.B) {
 	benchmarkParseIndexEntries(500, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
 }
+
 func BenchmarkParseIndexEntriesRegexSet2500(b *testing.B) {
 	benchmarkParseIndexEntries(2500, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
 }
+
 func BenchmarkParseIndexEntriesRegexSet10000(b *testing.B) {
 	benchmarkParseIndexEntries(10000, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
 }
+
 func BenchmarkParseIndexEntriesRegexSet50000(b *testing.B) {
 	benchmarkParseIndexEntries(50000, "labelvalue0|labelvalue1|labelvalue2|labelvalue3|labelvalue600", b)
 }
 
 func generateIndexEntries(n int64) []IndexEntry {
 	res := make([]IndexEntry, 0, n)
-	for i := int64(n - 1); i >= 0; i-- {
+	for i := n - 1; i >= 0; i-- {
 		labelValue := fmt.Sprintf("labelvalue%d", i%(n/2))
 		chunkID := fmt.Sprintf("chunkid%d", i%(n/2))
 		rangeValue := []byte{}
@@ -1144,7 +1147,5 @@ func TestDisableIndexDeduplication(t *testing.T) {
 			// see if we deduped the chunk and the number of chunks we wrote is still 1
 			require.Equal(t, 1, storage.numChunkWrites)
 		})
-
 	}
-
 }
diff --git a/pkg/storage/chunk/chunk_store_utils.go b/pkg/storage/chunk/chunk_store_utils.go
index aeb9048854097..0f68375c2732b 100644
--- a/pkg/storage/chunk/chunk_store_utils.go
+++ b/pkg/storage/chunk/chunk_store_utils.go
@@ -9,9 +9,10 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/promql"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 const chunkDecodeParallelism = 16
@@ -93,6 +94,7 @@ type decodeRequest struct {
 	buf       []byte
 	responses chan decodeResponse
 }
+
 type decodeResponse struct {
 	chunk Chunk
 	err   error
diff --git a/pkg/storage/chunk/chunk_test.go b/pkg/storage/chunk/chunk_test.go
index e0c78549941aa..59d9862c660ad 100644
--- a/pkg/storage/chunk/chunk_test.go
+++ b/pkg/storage/chunk/chunk_test.go
@@ -12,9 +12,10 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/util"
+
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 const userID = "userID"
@@ -374,7 +375,6 @@ func TestChunk_Slice(t *testing.T) {
 
 				require.Equal(t, true, newChunkHasMoreSamples)
 			}
-
 		})
 	}
 }
diff --git a/pkg/storage/chunk/composite_store.go b/pkg/storage/chunk/composite_store.go
index 21b1ec0243533..10837fd4d6b6b 100644
--- a/pkg/storage/chunk/composite_store.go
+++ b/pkg/storage/chunk/composite_store.go
@@ -9,7 +9,7 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 // StoreLimits helps get Limits specific to Queries for Stores
diff --git a/pkg/storage/chunk/encoding/bigchunk.go b/pkg/storage/chunk/encoding/bigchunk.go
index c05defedb172c..a791c3a4dba5d 100644
--- a/pkg/storage/chunk/encoding/bigchunk.go
+++ b/pkg/storage/chunk/encoding/bigchunk.go
@@ -140,7 +140,7 @@ func (b *bigchunk) UnmarshalFromBuf(buf []byte) error {
 
 		b.chunks = append(b.chunks, smallChunk{
 			XORChunk: *chunk.(*chunkenc.XORChunk),
-			start:    int64(start),
+			start:    start,
 		})
 	}
 	return nil
diff --git a/pkg/storage/chunk/encoding/chunk.go b/pkg/storage/chunk/encoding/chunk.go
index 97c95e41a7736..40adf1ae7df47 100644
--- a/pkg/storage/chunk/encoding/chunk.go
+++ b/pkg/storage/chunk/encoding/chunk.go
@@ -35,9 +35,7 @@ const (
 	ErrSliceChunkOverflow = errs.Error("slicing should not overflow a chunk")
 )
 
-var (
-	errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
-)
+var errChunkBoundsExceeded = errors.New("attempted access outside of chunk boundaries")
 
 // Chunk is the interface for all chunks. Chunks are generally not
 // goroutine-safe.
@@ -145,8 +143,6 @@ func addToOverflowChunk(s model.SamplePair) (Chunk, error) {
 // provided sample. It returns the new chunks (transcoded plus overflow) with
 // the new sample at the end.
 func transcodeAndAdd(dst Chunk, src Chunk, s model.SamplePair) ([]Chunk, error) {
-	Ops.WithLabelValues(Transcode).Inc()
-
 	var (
 		head     = dst
 		newChunk Chunk
diff --git a/pkg/storage/chunk/encoding/chunk_test.go b/pkg/storage/chunk/encoding/chunk_test.go
index d2d00f5f54cc3..4c3a845b63090 100644
--- a/pkg/storage/chunk/encoding/chunk_test.go
+++ b/pkg/storage/chunk/encoding/chunk_test.go
@@ -29,7 +29,7 @@ import (
 
 func TestLen(t *testing.T) {
 	chunks := []Chunk{}
-	for _, encoding := range []Encoding{DoubleDelta, Varbit, Bigchunk, PrometheusXorChunk} {
+	for _, encoding := range []Encoding{Bigchunk, PrometheusXorChunk} {
 		c, err := NewForEncoding(encoding)
 		if err != nil {
 			t.Fatal(err)
@@ -60,18 +60,12 @@ func TestChunk(t *testing.T) {
 		encoding   Encoding
 		maxSamples int
 	}{
-		{DoubleDelta, 989},
-		{Varbit, 2048},
+
 		{Bigchunk, 4096},
 		{PrometheusXorChunk, 2048},
 	} {
 		for samples := tc.maxSamples / 10; samples < tc.maxSamples; samples += tc.maxSamples / 10 {
 
-			// DoubleDelta doesn't support zero length chunks.
-			if tc.encoding == DoubleDelta && samples == 0 {
-				continue
-			}
-
 			t.Run(fmt.Sprintf("testChunkEncoding/%s/%d", tc.encoding.String(), samples), func(t *testing.T) {
 				testChunkEncoding(t, tc.encoding, samples)
 			})
@@ -291,7 +285,6 @@ func testChunkRebound(t *testing.T, encoding Encoding, samples int) {
 
 				require.True(t, newChunkHasMoreSamples)
 			}
-
 		})
 	}
 }
diff --git a/pkg/storage/chunk/encoding/instrumentation.go b/pkg/storage/chunk/encoding/instrumentation.go
index 241b88c48bc17..a86c6c9a076c2 100644
--- a/pkg/storage/chunk/encoding/instrumentation.go
+++ b/pkg/storage/chunk/encoding/instrumentation.go
@@ -16,42 +16,11 @@
 
 package encoding
 
-import "github.com/prometheus/client_golang/prometheus"
-
 // Usually, a separate file for instrumentation is frowned upon. Metrics should
 // be close to where they are used. However, the metrics below are set all over
 // the place, so we go for a separate instrumentation file in this case.
-var (
-	Ops = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: namespace,
-			Subsystem: subsystem,
-			Name:      "chunk_ops_total",
-			Help:      "The total number of chunk operations by their type.",
-		},
-		[]string{OpTypeLabel},
-	)
-	DescOps = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: namespace,
-			Subsystem: subsystem,
-			Name:      "chunkdesc_ops_total",
-			Help:      "The total number of chunk descriptor operations by their type.",
-		},
-		[]string{OpTypeLabel},
-	)
-	NumMemDescs = prometheus.NewGauge(prometheus.GaugeOpts{
-		Namespace: namespace,
-		Subsystem: subsystem,
-		Name:      "memory_chunkdescs",
-		Help:      "The current number of chunk descriptors in memory.",
-	})
-)
 
 const (
-	namespace = "prometheus"
-	subsystem = "local_storage"
-
 	// OpTypeLabel is the label name for chunk operation types.
 	OpTypeLabel = "type"
 
@@ -78,12 +47,6 @@ const (
 	Load = "load"
 )
 
-func init() {
-	prometheus.MustRegister(Ops)
-	prometheus.MustRegister(DescOps)
-	prometheus.MustRegister(NumMemDescs)
-}
-
 // NumMemChunks is the total number of chunks in memory. This is a global
 // counter, also used internally, so not implemented as metrics. Collected in
 // MemorySeriesStorage.
diff --git a/pkg/storage/chunk/gcp/bigtable_index_client.go b/pkg/storage/chunk/gcp/bigtable_index_client.go
index 8992f358d5ed7..c5d8de02f51de 100644
--- a/pkg/storage/chunk/gcp/bigtable_index_client.go
+++ b/pkg/storage/chunk/gcp/bigtable_index_client.go
@@ -15,11 +15,12 @@ import (
 	ot "github.com/opentracing/opentracing-go"
 	"github.com/pkg/errors"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/util/grpcclient"
 	"github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 const (
@@ -114,13 +115,11 @@ func NewStorageClientColumnKey(ctx context.Context, cfg Config, schemaCfg chunk.
 }
 
 func newStorageClientColumnKey(cfg Config, schemaCfg chunk.SchemaConfig, client *bigtable.Client) *storageClientColumnKey {
-
 	return &storageClientColumnKey{
 		cfg:       cfg,
 		schemaCfg: schemaCfg,
 		client:    client,
 		keysFn: func(hashValue string, rangeValue []byte) (string, string) {
-
 			// We hash the row key and prepend it back to the key for better distribution.
 			// We preserve the existing key to make migrations and o11y easier.
 			if cfg.DistributeKeys {
diff --git a/pkg/storage/chunk/gcp/bigtable_object_client.go b/pkg/storage/chunk/gcp/bigtable_object_client.go
index 2a18195a4b9f1..f3d847c8c7e54 100644
--- a/pkg/storage/chunk/gcp/bigtable_object_client.go
+++ b/pkg/storage/chunk/gcp/bigtable_object_client.go
@@ -9,8 +9,9 @@ import (
 	otlog "github.com/opentracing/opentracing-go/log"
 	"github.com/pkg/errors"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util/math"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type bigtableObjectClient struct {
@@ -117,7 +118,7 @@ func (s *bigtableObjectClient) GetChunks(ctx context.Context, input []chunk.Chun
 				decodeContext := chunk.NewDecodeContext()
 
 				var processingErr error
-				var receivedChunks = 0
+				receivedChunks := 0
 
 				// rows are returned in key order, not order in row list
 				err := table.ReadRows(ctx, page, func(row bigtable.Row) bool {
diff --git a/pkg/storage/chunk/gcp/fixtures.go b/pkg/storage/chunk/gcp/fixtures.go
index ff9142f972574..9b525490c386c 100644
--- a/pkg/storage/chunk/gcp/fixtures.go
+++ b/pkg/storage/chunk/gcp/fixtures.go
@@ -11,9 +11,9 @@ import (
 	"google.golang.org/api/option"
 	"google.golang.org/grpc"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 const (
diff --git a/pkg/storage/chunk/gcp/gcs_object_client.go b/pkg/storage/chunk/gcp/gcs_object_client.go
index 8a064f8958c1c..6fd40e3e3d18d 100644
--- a/pkg/storage/chunk/gcp/gcs_object_client.go
+++ b/pkg/storage/chunk/gcp/gcs_object_client.go
@@ -10,8 +10,8 @@ import (
 	"google.golang.org/api/iterator"
 	"google.golang.org/api/option"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 type GCSObjectClient struct {
diff --git a/pkg/storage/chunk/gcp/instrumentation.go b/pkg/storage/chunk/gcp/instrumentation.go
index 44803e36bd5ee..0350aff9f0fb2 100644
--- a/pkg/storage/chunk/gcp/instrumentation.go
+++ b/pkg/storage/chunk/gcp/instrumentation.go
@@ -19,7 +19,7 @@ import (
 
 var (
 	bigtableRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "bigtable_request_duration_seconds",
 		Help:      "Time spent doing Bigtable requests.",
 
@@ -29,7 +29,7 @@ var (
 	}, []string{"operation", "status_code"})
 
 	gcsRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "gcs_request_duration_seconds",
 		Help:      "Time spent doing GCS requests.",
 
diff --git a/pkg/storage/chunk/gcp/table_client.go b/pkg/storage/chunk/gcp/table_client.go
index 26d032b483121..dec4eca2353ea 100644
--- a/pkg/storage/chunk/gcp/table_client.go
+++ b/pkg/storage/chunk/gcp/table_client.go
@@ -11,7 +11,7 @@ import (
 
 	"github.com/pkg/errors"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type tableClient struct {
diff --git a/pkg/storage/chunk/grpc/grpc.pb.go b/pkg/storage/chunk/grpc/grpc.pb.go
index 9ff2df44522df..6d2fc4b877074 100644
--- a/pkg/storage/chunk/grpc/grpc.pb.go
+++ b/pkg/storage/chunk/grpc/grpc.pb.go
@@ -1,5 +1,5 @@
 // Code generated by protoc-gen-gogo. DO NOT EDIT.
-// source: grpc.proto
+// source: pkg/storage/chunk/grpc/grpc.proto
 
 package grpc
 
@@ -38,7 +38,7 @@ type PutChunksRequest struct {
 func (m *PutChunksRequest) Reset()      { *m = PutChunksRequest{} }
 func (*PutChunksRequest) ProtoMessage() {}
 func (*PutChunksRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{0}
+	return fileDescriptor_7322c8818cf62b01, []int{0}
 }
 func (m *PutChunksRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -81,7 +81,7 @@ type GetChunksRequest struct {
 func (m *GetChunksRequest) Reset()      { *m = GetChunksRequest{} }
 func (*GetChunksRequest) ProtoMessage() {}
 func (*GetChunksRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{1}
+	return fileDescriptor_7322c8818cf62b01, []int{1}
 }
 func (m *GetChunksRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -124,7 +124,7 @@ type GetChunksResponse struct {
 func (m *GetChunksResponse) Reset()      { *m = GetChunksResponse{} }
 func (*GetChunksResponse) ProtoMessage() {}
 func (*GetChunksResponse) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{2}
+	return fileDescriptor_7322c8818cf62b01, []int{2}
 }
 func (m *GetChunksResponse) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -169,7 +169,7 @@ type Chunk struct {
 func (m *Chunk) Reset()      { *m = Chunk{} }
 func (*Chunk) ProtoMessage() {}
 func (*Chunk) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{3}
+	return fileDescriptor_7322c8818cf62b01, []int{3}
 }
 func (m *Chunk) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -226,7 +226,7 @@ type ChunkID struct {
 func (m *ChunkID) Reset()      { *m = ChunkID{} }
 func (*ChunkID) ProtoMessage() {}
 func (*ChunkID) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{4}
+	return fileDescriptor_7322c8818cf62b01, []int{4}
 }
 func (m *ChunkID) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -269,7 +269,7 @@ type DeleteTableRequest struct {
 func (m *DeleteTableRequest) Reset()      { *m = DeleteTableRequest{} }
 func (*DeleteTableRequest) ProtoMessage() {}
 func (*DeleteTableRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{5}
+	return fileDescriptor_7322c8818cf62b01, []int{5}
 }
 func (m *DeleteTableRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -312,7 +312,7 @@ type DescribeTableRequest struct {
 func (m *DescribeTableRequest) Reset()      { *m = DescribeTableRequest{} }
 func (*DescribeTableRequest) ProtoMessage() {}
 func (*DescribeTableRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{6}
+	return fileDescriptor_7322c8818cf62b01, []int{6}
 }
 func (m *DescribeTableRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -356,7 +356,7 @@ type WriteBatch struct {
 func (m *WriteBatch) Reset()      { *m = WriteBatch{} }
 func (*WriteBatch) ProtoMessage() {}
 func (*WriteBatch) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{7}
+	return fileDescriptor_7322c8818cf62b01, []int{7}
 }
 func (m *WriteBatch) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -406,7 +406,7 @@ type WriteIndexRequest struct {
 func (m *WriteIndexRequest) Reset()      { *m = WriteIndexRequest{} }
 func (*WriteIndexRequest) ProtoMessage() {}
 func (*WriteIndexRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{8}
+	return fileDescriptor_7322c8818cf62b01, []int{8}
 }
 func (m *WriteIndexRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -449,7 +449,7 @@ type DeleteIndexRequest struct {
 func (m *DeleteIndexRequest) Reset()      { *m = DeleteIndexRequest{} }
 func (*DeleteIndexRequest) ProtoMessage() {}
 func (*DeleteIndexRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{9}
+	return fileDescriptor_7322c8818cf62b01, []int{9}
 }
 func (m *DeleteIndexRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -492,7 +492,7 @@ type QueryIndexResponse struct {
 func (m *QueryIndexResponse) Reset()      { *m = QueryIndexResponse{} }
 func (*QueryIndexResponse) ProtoMessage() {}
 func (*QueryIndexResponse) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{10}
+	return fileDescriptor_7322c8818cf62b01, []int{10}
 }
 func (m *QueryIndexResponse) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -536,7 +536,7 @@ type Row struct {
 func (m *Row) Reset()      { *m = Row{} }
 func (*Row) ProtoMessage() {}
 func (*Row) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{11}
+	return fileDescriptor_7322c8818cf62b01, []int{11}
 }
 func (m *Row) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -589,7 +589,7 @@ type IndexEntry struct {
 func (m *IndexEntry) Reset()      { *m = IndexEntry{} }
 func (*IndexEntry) ProtoMessage() {}
 func (*IndexEntry) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{12}
+	return fileDescriptor_7322c8818cf62b01, []int{12}
 }
 func (m *IndexEntry) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -658,7 +658,7 @@ type QueryIndexRequest struct {
 func (m *QueryIndexRequest) Reset()      { *m = QueryIndexRequest{} }
 func (*QueryIndexRequest) ProtoMessage() {}
 func (*QueryIndexRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{13}
+	return fileDescriptor_7322c8818cf62b01, []int{13}
 }
 func (m *QueryIndexRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -737,7 +737,7 @@ type UpdateTableRequest struct {
 func (m *UpdateTableRequest) Reset()      { *m = UpdateTableRequest{} }
 func (*UpdateTableRequest) ProtoMessage() {}
 func (*UpdateTableRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{14}
+	return fileDescriptor_7322c8818cf62b01, []int{14}
 }
 func (m *UpdateTableRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -788,7 +788,7 @@ type DescribeTableResponse struct {
 func (m *DescribeTableResponse) Reset()      { *m = DescribeTableResponse{} }
 func (*DescribeTableResponse) ProtoMessage() {}
 func (*DescribeTableResponse) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{15}
+	return fileDescriptor_7322c8818cf62b01, []int{15}
 }
 func (m *DescribeTableResponse) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -838,7 +838,7 @@ type CreateTableRequest struct {
 func (m *CreateTableRequest) Reset()      { *m = CreateTableRequest{} }
 func (*CreateTableRequest) ProtoMessage() {}
 func (*CreateTableRequest) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{16}
+	return fileDescriptor_7322c8818cf62b01, []int{16}
 }
 func (m *CreateTableRequest) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -885,7 +885,7 @@ type TableDesc struct {
 func (m *TableDesc) Reset()      { *m = TableDesc{} }
 func (*TableDesc) ProtoMessage() {}
 func (*TableDesc) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{17}
+	return fileDescriptor_7322c8818cf62b01, []int{17}
 }
 func (m *TableDesc) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -956,7 +956,7 @@ type ListTablesResponse struct {
 func (m *ListTablesResponse) Reset()      { *m = ListTablesResponse{} }
 func (*ListTablesResponse) ProtoMessage() {}
 func (*ListTablesResponse) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{18}
+	return fileDescriptor_7322c8818cf62b01, []int{18}
 }
 func (m *ListTablesResponse) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -1000,7 +1000,7 @@ type Labels struct {
 func (m *Labels) Reset()      { *m = Labels{} }
 func (*Labels) ProtoMessage() {}
 func (*Labels) Descriptor() ([]byte, []int) {
-	return fileDescriptor_bedfbfc9b54e5600, []int{19}
+	return fileDescriptor_7322c8818cf62b01, []int{19}
 }
 func (m *Labels) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -1067,68 +1067,69 @@ func init() {
 	proto.RegisterType((*Labels)(nil), "grpc.Labels")
 }
 
-func init() { proto.RegisterFile("grpc.proto", fileDescriptor_bedfbfc9b54e5600) }
-
-var fileDescriptor_bedfbfc9b54e5600 = []byte{
-	// 921 bytes of a gzipped FileDescriptorProto
-	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0x4f, 0x93, 0xd4, 0x44,
-	0x14, 0x9f, 0x9e, 0x7f, 0xbb, 0x79, 0x03, 0xc5, 0x6e, 0x17, 0x42, 0x0c, 0x9a, 0xda, 0x0a, 0x97,
-	0x11, 0x75, 0xb0, 0x86, 0xb5, 0x40, 0x29, 0x44, 0x60, 0xb6, 0x74, 0xab, 0x50, 0xa0, 0x45, 0xf4,
-	0x66, 0x65, 0x92, 0xc7, 0x6c, 0x8a, 0x99, 0x64, 0x48, 0x3a, 0xfb, 0xe7, 0x62, 0x79, 0xf7, 0xe2,
-	0xc7, 0xf0, 0xa3, 0x78, 0xdc, 0x23, 0x47, 0x77, 0xf6, 0xe2, 0x91, 0x8f, 0x60, 0xa5, 0x3b, 0x9d,
-	0x64, 0x92, 0x09, 0xbb, 0x7a, 0xeb, 0xfe, 0xbd, 0x7f, 0xbf, 0xd7, 0xaf, 0xdf, 0x7b, 0x00, 0x93,
-	0x70, 0xee, 0x0c, 0xe6, 0x61, 0xc0, 0x03, 0xda, 0x4e, 0xce, 0xc6, 0xb5, 0x49, 0x10, 0x4c, 0xa6,
-	0x78, 0x53, 0x60, 0xe3, 0xf8, 0xe5, 0x4d, 0x9c, 0xcd, 0xf9, 0x91, 0x54, 0xb1, 0x6e, 0xc3, 0xc6,
-	0xd3, 0x98, 0x3f, 0xda, 0x8b, 0xfd, 0x57, 0x11, 0xc3, 0xd7, 0x31, 0x46, 0x9c, 0x5e, 0x87, 0xae,
-	0x23, 0x00, 0x9d, 0x6c, 0xb5, 0xfa, 0xbd, 0x61, 0x6f, 0x20, 0x7c, 0x0a, 0x25, 0x96, 0x8a, 0x12,
-	0xc3, 0x6f, 0xf0, 0xff, 0x18, 0xde, 0x81, 0xcd, 0x82, 0x61, 0x34, 0x0f, 0xfc, 0x08, 0xcf, 0x67,
-	0xf9, 0x0c, 0x3a, 0x02, 0xa0, 0x3a, 0xac, 0xa1, 0xef, 0x04, 0x2e, 0xba, 0x3a, 0xd9, 0x22, 0xfd,
-	0x0b, 0x4c, 0x5d, 0xe9, 0x06, 0xb4, 0x5e, 0xe1, 0x91, 0xde, 0xdc, 0x22, 0x7d, 0x8d, 0x25, 0x47,
-	0xfa, 0x01, 0x68, 0xdc, 0x1e, 0x4f, 0xf1, 0x7b, 0x7b, 0x86, 0x7a, 0x4b, 0xe0, 0x39, 0x60, 0x5d,
-	0x87, 0x35, 0xe1, 0x72, 0x77, 0x94, 0x38, 0x75, 0xe4, 0x51, 0x38, 0xd5, 0x98, 0xba, 0x5a, 0x43,
-	0xa0, 0x23, 0x9c, 0x22, 0xc7, 0xe7, 0x89, 0x9d, 0x4a, 0x76, 0xc9, 0x31, 0x29, 0x3b, 0xde, 0x86,
-	0xcb, 0x23, 0x8c, 0x9c, 0xd0, 0x1b, 0xff, 0x17, 0xab, 0x31, 0xc0, 0x4f, 0xa1, 0xc7, 0xf1, 0xa1,
-	0xcd, 0x9d, 0x3d, 0xda, 0x87, 0xee, 0x41, 0x72, 0x53, 0x8f, 0xb2, 0x21, 0x1f, 0x65, 0xd7, 0x77,
-	0xf1, 0x70, 0xc7, 0xe7, 0xe1, 0x11, 0x4b, 0xe5, 0xf4, 0x06, 0xac, 0xb9, 0x82, 0x61, 0xa4, 0x37,
-	0x6b, 0x54, 0x95, 0x82, 0x75, 0x0f, 0x36, 0x45, 0x0c, 0x21, 0x53, 0xb4, 0xce, 0x1d, 0xca, 0xfa,
-	0x5a, 0x3d, 0xc6, 0x92, 0x7d, 0x81, 0x00, 0x39, 0x8b, 0xc0, 0x2d, 0xa0, 0xcf, 0x62, 0x0c, 0x8f,
-	0x52, 0x07, 0xe9, 0x0f, 0xf8, 0x10, 0xda, 0x61, 0x70, 0xa0, 0xcc, 0x35, 0x69, 0xce, 0x82, 0x03,
-	0x26, 0x60, 0xeb, 0x2e, 0xb4, 0x58, 0x70, 0x40, 0x4d, 0x80, 0xd0, 0xf6, 0x27, 0xf8, 0xc2, 0x9e,
-	0xc6, 0x98, 0x16, 0xbf, 0x80, 0xd0, 0xcb, 0xd0, 0xd9, 0x17, 0xa2, 0xa6, 0x10, 0xc9, 0x8b, 0xf5,
-	0x2b, 0x40, 0x4e, 0xe4, 0xdd, 0x25, 0x48, 0xa4, 0x7b, 0x76, 0xb4, 0xf7, 0x22, 0xf3, 0xa2, 0xb1,
-	0x1c, 0x28, 0xc5, 0x6f, 0xd5, 0xc7, 0x6f, 0x17, 0xe3, 0x9f, 0x12, 0xd8, 0x2c, 0xa6, 0x7c, 0x8e,
-	0xaf, 0x70, 0x06, 0x8f, 0x1b, 0xb0, 0x91, 0x47, 0x7d, 0x1a, 0xe2, 0x4b, 0xef, 0x30, 0x65, 0x53,
-	0xc1, 0x69, 0x1f, 0x2e, 0xe5, 0xd8, 0x0f, 0xdc, 0x0e, 0x79, 0xca, 0xae, 0x0c, 0x27, 0xd9, 0x09,
-	0xc2, 0x3b, 0xaf, 0x63, 0x7b, 0xaa, 0x77, 0x64, 0x76, 0x39, 0x92, 0x70, 0xf2, 0x66, 0xb3, 0x58,
-	0x90, 0xd4, 0xbb, 0x5b, 0xa4, 0xbf, 0xce, 0x72, 0xc0, 0x9a, 0x02, 0xfd, 0x71, 0xee, 0xda, 0xa5,
-	0x36, 0xf9, 0x08, 0xd6, 0x9c, 0x38, 0x0c, 0xd1, 0xe7, 0x22, 0xc7, 0xde, 0xf0, 0x92, 0x2c, 0xad,
-	0x50, 0x4a, 0x5a, 0x84, 0x29, 0x39, 0xfd, 0x18, 0xd6, 0xf1, 0x70, 0x8e, 0x0e, 0x47, 0x57, 0x64,
-	0xbc, 0x42, 0x37, 0x53, 0xb0, 0x7e, 0x86, 0xf7, 0x4a, 0x0d, 0x96, 0x8d, 0x92, 0xb6, 0x8b, 0x91,
-	0x53, 0x17, 0x4d, 0x08, 0xa9, 0x01, 0xeb, 0x5e, 0xf4, 0xc0, 0xe1, 0xde, 0xbe, 0x7c, 0xdc, 0x75,
-	0x96, 0xdd, 0xad, 0x2f, 0x80, 0x3e, 0x0a, 0xb1, 0x9c, 0xc7, 0x79, 0xdc, 0x5a, 0xbf, 0x37, 0x41,
-	0xcb, 0x30, 0x4a, 0xa1, 0xed, 0xe7, 0xb5, 0x15, 0x67, 0xfa, 0x09, 0x6c, 0xc6, 0x11, 0x3e, 0xf1,
-	0x47, 0x38, 0xb3, 0x7d, 0x77, 0xf7, 0xc9, 0x77, 0x81, 0xab, 0x18, 0x54, 0x05, 0x49, 0xe9, 0xe6,
-	0x61, 0xb0, 0xef, 0x45, 0x5e, 0xe0, 0xa3, 0xcb, 0xd0, 0x76, 0x45, 0x95, 0x5b, 0xac, 0x0c, 0x27,
-	0x1f, 0xa2, 0x00, 0x89, 0x06, 0x17, 0x55, 0x6e, 0xb1, 0x0a, 0x4e, 0x3f, 0x85, 0x36, 0xb7, 0x27,
-	0x91, 0xde, 0x11, 0xad, 0xf6, 0x7e, 0x29, 0x95, 0xc1, 0x73, 0x7b, 0x12, 0xc9, 0x96, 0x15, 0x6a,
-	0xc6, 0xed, 0x24, 0xa7, 0x14, 0x52, 0x03, 0x96, 0xe4, 0x03, 0x76, 0xa9, 0xe5, 0xb4, 0xf4, 0xcb,
-	0x7f, 0xd9, 0xbc, 0x43, 0xac, 0x6d, 0xa0, 0x8f, 0xbd, 0x88, 0x0b, 0xcf, 0xf9, 0xa8, 0x37, 0x01,
-	0xb2, 0x5f, 0x2e, 0xdb, 0x5d, 0x63, 0x05, 0xc4, 0x1a, 0x42, 0xf7, 0xb1, 0x3d, 0xc6, 0x69, 0xb4,
-	0xf2, 0xfd, 0x56, 0x46, 0x1b, 0x1e, 0x77, 0xe4, 0xde, 0xfb, 0x25, 0xe2, 0x41, 0x88, 0xf4, 0x5e,
-	0x3a, 0x46, 0x45, 0xbb, 0xd1, 0xab, 0x32, 0xc1, 0xca, 0xd0, 0x33, 0xae, 0x0c, 0xe4, 0x66, 0x1c,
-	0xa8, 0xcd, 0x38, 0xd8, 0x49, 0x36, 0x23, 0x7d, 0x00, 0x90, 0x77, 0xab, 0x32, 0xaf, 0xf4, 0xaf,
-	0xa1, 0x57, 0x05, 0x32, 0xc5, 0xcf, 0x08, 0xbd, 0x0f, 0xbd, 0xc2, 0x94, 0xa4, 0xa9, 0x6a, 0x75,
-	0x70, 0xd6, 0x72, 0xb8, 0x0b, 0x5a, 0xb6, 0x97, 0xe9, 0x15, 0x69, 0x5e, 0x5e, 0xd4, 0xb5, 0xc6,
-	0x5f, 0x81, 0x96, 0xad, 0x58, 0x65, 0x5c, 0x5e, 0xd6, 0xc6, 0xd5, 0x0a, 0x9e, 0xb1, 0xff, 0x1c,
-	0x2e, 0x48, 0xaa, 0xa9, 0x8b, 0x8b, 0x85, 0x6d, 0xbc, 0x3b, 0x7a, 0x47, 0x58, 0xc8, 0xeb, 0x4d,
-	0x6b, 0xb4, 0xd4, 0xb3, 0xad, 0xf8, 0x19, 0xf7, 0xa1, 0x57, 0x68, 0x3c, 0xf5, 0x68, 0xd5, 0x5e,
-	0xac, 0x25, 0x90, 0xbd, 0xfa, 0x92, 0x83, 0xea, 0xee, 0xae, 0x75, 0xf0, 0x2d, 0x5c, 0x5c, 0x1a,
-	0x2a, 0xd4, 0x50, 0x2e, 0xaa, 0xab, 0xdc, 0xb8, 0xb6, 0x52, 0x96, 0xe7, 0x52, 0x18, 0x86, 0x8a,
-	0x4a, 0x75, 0x3e, 0xd6, 0x51, 0x79, 0xb8, 0x7d, 0x7c, 0x62, 0x36, 0xde, 0x9c, 0x98, 0x8d, 0xb7,
-	0x27, 0x26, 0xf9, 0x6d, 0x61, 0x92, 0x3f, 0x17, 0x26, 0xf9, 0x6b, 0x61, 0x92, 0xe3, 0x85, 0x49,
-	0xfe, 0x5e, 0x98, 0xe4, 0x9f, 0x85, 0xd9, 0x78, 0xbb, 0x30, 0xc9, 0x1f, 0xa7, 0x66, 0xe3, 0xf8,
-	0xd4, 0x6c, 0xbc, 0x39, 0x35, 0x1b, 0xe3, 0xae, 0xf0, 0x72, 0xeb, 0xdf, 0x00, 0x00, 0x00, 0xff,
-	0xff, 0x42, 0x8c, 0xec, 0xe3, 0x06, 0x0a, 0x00, 0x00,
+func init() { proto.RegisterFile("pkg/storage/chunk/grpc/grpc.proto", fileDescriptor_7322c8818cf62b01) }
+
+var fileDescriptor_7322c8818cf62b01 = []byte{
+	// 938 bytes of a gzipped FileDescriptorProto
+	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x55, 0xcd, 0x92, 0xdb, 0x44,
+	0x10, 0xf6, 0xf8, 0x6f, 0x57, 0xed, 0xa4, 0xb2, 0x3b, 0x15, 0x12, 0xe1, 0x80, 0x6a, 0x51, 0x2e,
+	0x26, 0x80, 0x4d, 0x39, 0x4b, 0x25, 0x90, 0x0a, 0x21, 0x89, 0xb7, 0x60, 0xab, 0x02, 0x49, 0x44,
+	0x08, 0xdc, 0x28, 0x59, 0xea, 0x78, 0x55, 0x6b, 0x4b, 0x8e, 0x66, 0xb4, 0x3f, 0x17, 0x8a, 0x3b,
+	0x17, 0x1e, 0x83, 0x47, 0xe1, 0xb8, 0xc7, 0x1c, 0x59, 0xef, 0x85, 0x63, 0x1e, 0x81, 0x9a, 0x91,
+	0x46, 0x92, 0x25, 0x2b, 0x59, 0x72, 0x51, 0x69, 0xbe, 0xfe, 0xfb, 0x7a, 0x7a, 0xba, 0x1b, 0x3e,
+	0x9a, 0xef, 0x4f, 0x06, 0x8c, 0x07, 0xa1, 0x3d, 0xc1, 0x81, 0xb3, 0x17, 0xf9, 0xfb, 0x83, 0x49,
+	0x38, 0x77, 0xe4, 0xa7, 0x3f, 0x0f, 0x03, 0x1e, 0xd0, 0xa6, 0xf8, 0xef, 0x5e, 0x9b, 0x04, 0xc1,
+	0x64, 0x8a, 0x03, 0x89, 0x8d, 0xa3, 0x17, 0x03, 0x9c, 0xcd, 0xf9, 0x71, 0xac, 0x62, 0xde, 0x82,
+	0x8d, 0x27, 0x11, 0x7f, 0x28, 0xcc, 0x99, 0x85, 0x2f, 0x23, 0x64, 0x9c, 0x5e, 0x87, 0xb6, 0xf4,
+	0xc7, 0x74, 0xb2, 0xd5, 0xe8, 0x75, 0x86, 0x9d, 0xbe, 0xf4, 0x29, 0x95, 0xac, 0x44, 0x24, 0x0c,
+	0xbf, 0xc5, 0x77, 0x31, 0xbc, 0x0d, 0x9b, 0x39, 0x43, 0x36, 0x0f, 0x7c, 0x86, 0xe7, 0xb3, 0x7c,
+	0x0a, 0x2d, 0x09, 0x50, 0x1d, 0xd6, 0xd0, 0x77, 0x02, 0x17, 0x5d, 0x9d, 0x6c, 0x91, 0xde, 0x05,
+	0x4b, 0x1d, 0xe9, 0x06, 0x34, 0xf6, 0xf1, 0x58, 0xaf, 0x6f, 0x91, 0x9e, 0x66, 0x89, 0x5f, 0xfa,
+	0x01, 0x68, 0xdc, 0x1e, 0x4f, 0xf1, 0x07, 0x7b, 0x86, 0x7a, 0x43, 0xe2, 0x19, 0x60, 0x5e, 0x87,
+	0x35, 0xe9, 0x72, 0x77, 0x24, 0x9c, 0x3a, 0xf1, 0xaf, 0x74, 0xaa, 0x59, 0xea, 0x68, 0x0e, 0x81,
+	0x8e, 0x70, 0x8a, 0x1c, 0x9f, 0x09, 0x3b, 0x95, 0xec, 0x92, 0x63, 0x52, 0x74, 0xbc, 0x0d, 0x97,
+	0x47, 0xc8, 0x9c, 0xd0, 0x1b, 0xff, 0x1f, 0xab, 0x31, 0xc0, 0xcf, 0xa1, 0xc7, 0xf1, 0x81, 0xcd,
+	0x9d, 0x3d, 0xda, 0x83, 0xf6, 0xa1, 0x38, 0xa9, 0x4b, 0xd9, 0x88, 0x2f, 0x65, 0xd7, 0x77, 0xf1,
+	0x68, 0xc7, 0xe7, 0xe1, 0xb1, 0x95, 0xc8, 0xe9, 0x0d, 0x58, 0x73, 0x25, 0x43, 0xa6, 0xd7, 0x2b,
+	0x54, 0x95, 0x82, 0x79, 0x17, 0x36, 0x65, 0x0c, 0x29, 0x53, 0xb4, 0xce, 0x1d, 0xca, 0xfc, 0x46,
+	0x5d, 0xc6, 0x92, 0x7d, 0x8e, 0x00, 0x79, 0x1b, 0x81, 0x9b, 0x40, 0x9f, 0x46, 0x18, 0x1e, 0x27,
+	0x0e, 0x92, 0x17, 0xf0, 0x21, 0x34, 0xc3, 0xe0, 0x50, 0x99, 0x6b, 0xb1, 0xb9, 0x15, 0x1c, 0x5a,
+	0x12, 0x36, 0xef, 0x40, 0xc3, 0x0a, 0x0e, 0xa9, 0x01, 0x10, 0xda, 0xfe, 0x04, 0x9f, 0xdb, 0xd3,
+	0x08, 0x93, 0xe2, 0xe7, 0x10, 0x7a, 0x19, 0x5a, 0x07, 0x52, 0x54, 0x97, 0xa2, 0xf8, 0x60, 0xfe,
+	0x06, 0x90, 0x11, 0x79, 0x73, 0x09, 0x84, 0x74, 0xcf, 0x66, 0x7b, 0xcf, 0x53, 0x2f, 0x9a, 0x95,
+	0x01, 0x85, 0xf8, 0x8d, 0xea, 0xf8, 0xcd, 0x7c, 0xfc, 0x33, 0x02, 0x9b, 0xf9, 0x94, 0xcf, 0xf1,
+	0x14, 0xde, 0xc2, 0xe3, 0x06, 0x6c, 0x64, 0x51, 0x9f, 0x84, 0xf8, 0xc2, 0x3b, 0x4a, 0xd8, 0x94,
+	0x70, 0xda, 0x83, 0x4b, 0x19, 0xf6, 0x23, 0xb7, 0x43, 0x9e, 0xb0, 0x2b, 0xc2, 0x22, 0x3b, 0x49,
+	0x78, 0xe7, 0x65, 0x64, 0x4f, 0xf5, 0x56, 0x9c, 0x5d, 0x86, 0x08, 0x4e, 0xde, 0x6c, 0x16, 0x49,
+	0x92, 0x7a, 0x7b, 0x8b, 0xf4, 0xd6, 0xad, 0x0c, 0x30, 0xa7, 0x40, 0x7f, 0x9a, 0xbb, 0x76, 0xa1,
+	0x4d, 0x3e, 0x86, 0x35, 0x27, 0x0a, 0x43, 0xf4, 0xb9, 0xcc, 0xb1, 0x33, 0xbc, 0x14, 0x97, 0x56,
+	0x2a, 0x89, 0x16, 0xb1, 0x94, 0x9c, 0x7e, 0x02, 0xeb, 0x78, 0x34, 0x47, 0x87, 0xa3, 0x2b, 0x33,
+	0x5e, 0xa1, 0x9b, 0x2a, 0x98, 0xbf, 0xc0, 0x7b, 0x85, 0x06, 0x4b, 0x47, 0x49, 0xd3, 0x45, 0xe6,
+	0x54, 0x45, 0x93, 0x42, 0xda, 0x85, 0x75, 0x8f, 0xdd, 0x77, 0xb8, 0x77, 0x10, 0x5f, 0xee, 0xba,
+	0x95, 0x9e, 0xcd, 0x2f, 0x81, 0x3e, 0x0c, 0xb1, 0x98, 0xc7, 0x79, 0xdc, 0x9a, 0x7f, 0xd4, 0x41,
+	0x4b, 0x31, 0x4a, 0xa1, 0xe9, 0x67, 0xb5, 0x95, 0xff, 0xf4, 0x53, 0xd8, 0x8c, 0x18, 0x3e, 0xf6,
+	0x47, 0x38, 0xb3, 0x7d, 0x77, 0xf7, 0xf1, 0xf7, 0x81, 0xab, 0x18, 0x94, 0x05, 0xa2, 0x74, 0xf3,
+	0x30, 0x38, 0xf0, 0x98, 0x17, 0xf8, 0xe8, 0x5a, 0x68, 0xbb, 0xb2, 0xca, 0x0d, 0xab, 0x08, 0x8b,
+	0x07, 0x91, 0x83, 0x64, 0x83, 0xcb, 0x2a, 0x37, 0xac, 0x12, 0x4e, 0x3f, 0x83, 0x26, 0xb7, 0x27,
+	0x4c, 0x6f, 0xc9, 0x56, 0x7b, 0xbf, 0x90, 0x4a, 0xff, 0x99, 0x3d, 0x61, 0x71, 0xcb, 0x4a, 0xb5,
+	0xee, 0x2d, 0x91, 0x53, 0x02, 0xa9, 0x01, 0x4b, 0xb2, 0x01, 0xbb, 0xd4, 0x72, 0x5a, 0xf2, 0xe4,
+	0xbf, 0xaa, 0xdf, 0x26, 0xe6, 0x36, 0xd0, 0x47, 0x1e, 0xe3, 0xd2, 0x73, 0x36, 0xea, 0x0d, 0x80,
+	0xf4, 0x95, 0xc7, 0xed, 0xae, 0x59, 0x39, 0xc4, 0x1c, 0x42, 0xfb, 0x91, 0x3d, 0xc6, 0x29, 0x5b,
+	0x79, 0x7f, 0x2b, 0xa3, 0x0d, 0x4f, 0x5a, 0x00, 0x22, 0x8b, 0x5f, 0xc5, 0x3e, 0x44, 0x7a, 0x37,
+	0x19, 0xa3, 0xb2, 0xdd, 0xe8, 0xd5, 0x38, 0xc1, 0xd2, 0xd0, 0xeb, 0x5e, 0xe9, 0xc7, 0x9b, 0xb1,
+	0xaf, 0x36, 0x63, 0x7f, 0x47, 0x6c, 0x46, 0x7a, 0x1f, 0x20, 0xeb, 0x56, 0x65, 0x5e, 0xea, 0xdf,
+	0xae, 0x5e, 0x16, 0xc4, 0x29, 0x7e, 0x4e, 0xe8, 0x3d, 0xe8, 0xe4, 0xa6, 0x24, 0x4d, 0x54, 0xcb,
+	0x83, 0xb3, 0x92, 0xc3, 0x1d, 0xd0, 0xd2, 0xbd, 0x4c, 0xaf, 0xc4, 0xe6, 0xc5, 0x45, 0x5d, 0x69,
+	0xfc, 0x35, 0x68, 0xe9, 0x8a, 0x55, 0xc6, 0xc5, 0x65, 0xdd, 0xbd, 0x5a, 0xc2, 0x53, 0xf6, 0x5f,
+	0xc0, 0x85, 0x98, 0x6a, 0xe2, 0xe2, 0x62, 0x6e, 0x1b, 0xef, 0x8e, 0xde, 0x10, 0x16, 0xb2, 0x7a,
+	0xd3, 0x0a, 0x2d, 0x75, 0x6d, 0x2b, 0x5e, 0xc6, 0x3d, 0xe8, 0xe4, 0x1a, 0x4f, 0x5d, 0x5a, 0xb9,
+	0x17, 0x2b, 0x09, 0xa4, 0xb7, 0xbe, 0xe4, 0xa0, 0xbc, 0xbb, 0x2b, 0x1d, 0x7c, 0x07, 0x17, 0x97,
+	0x86, 0x0a, 0xed, 0x2a, 0x17, 0xe5, 0x55, 0xde, 0xbd, 0xb6, 0x52, 0x96, 0xe5, 0x92, 0x1b, 0x86,
+	0x8a, 0x4a, 0x79, 0x3e, 0x56, 0x51, 0x79, 0xb0, 0x7d, 0x72, 0x6a, 0xd4, 0x5e, 0x9d, 0x1a, 0xb5,
+	0xd7, 0xa7, 0x06, 0xf9, 0x7d, 0x61, 0x90, 0xbf, 0x16, 0x06, 0xf9, 0x7b, 0x61, 0x90, 0x93, 0x85,
+	0x41, 0xfe, 0x59, 0x18, 0xe4, 0xdf, 0x85, 0x51, 0x7b, 0xbd, 0x30, 0xc8, 0x9f, 0x67, 0x46, 0xed,
+	0xe4, 0xcc, 0xa8, 0xbd, 0x3a, 0x33, 0x6a, 0xe3, 0xb6, 0xf4, 0x72, 0xf3, 0xbf, 0x00, 0x00, 0x00,
+	0xff, 0xff, 0xc0, 0x0f, 0xab, 0x83, 0x1d, 0x0a, 0x00, 0x00,
 }
 
 func (this *PutChunksRequest) Equal(that interface{}) bool {
@@ -2502,7 +2503,7 @@ var _GrpcStore_serviceDesc = grpc.ServiceDesc{
 			ServerStreams: true,
 		},
 	},
-	Metadata: "grpc.proto",
+	Metadata: "pkg/storage/chunk/grpc/grpc.proto",
 }
 
 func (m *PutChunksRequest) Marshal() (dAtA []byte, err error) {
diff --git a/pkg/storage/chunk/grpc/grpc_client_test.go b/pkg/storage/chunk/grpc/grpc_client_test.go
index 2bd70d500df86..94848e559bff4 100644
--- a/pkg/storage/chunk/grpc/grpc_client_test.go
+++ b/pkg/storage/chunk/grpc/grpc_client_test.go
@@ -7,9 +7,9 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/encoding"
-	prom_chunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/encoding"
+	prom_chunk "github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 // This includes test for all RPCs in
@@ -137,7 +137,7 @@ func TestGrpcStore(t *testing.T) {
 	err = storageClient.DeleteChunk(context.Background(), "", "")
 	require.NoError(t, err)
 
-	//rpc calls specific to indexClient
+	// rpc calls specific to indexClient
 	writeBatchTestData := writeBatchTestData()
 	err = storageClient.BatchWrite(context.Background(), writeBatchTestData)
 	require.NoError(t, err)
@@ -154,7 +154,6 @@ func TestGrpcStore(t *testing.T) {
 		return true
 	})
 	require.NoError(t, err)
-
 }
 
 func writeBatchTestData() chunk.WriteBatch {
diff --git a/pkg/storage/chunk/grpc/grpc_server_mock_test.go b/pkg/storage/chunk/grpc/grpc_server_mock_test.go
index d552da085284c..a58f8b23cd98e 100644
--- a/pkg/storage/chunk/grpc/grpc_server_mock_test.go
+++ b/pkg/storage/chunk/grpc/grpc_server_mock_test.go
@@ -11,7 +11,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"google.golang.org/grpc"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type server struct {
@@ -50,7 +50,7 @@ func (s server) DeleteIndex(ctx context.Context, deletes *DeleteIndexRequest) (*
 
 // storageClient RPCs
 func (s server) PutChunks(ctx context.Context, request *PutChunksRequest) (*empty.Empty, error) {
-	//encoded :=
+	// encoded :=
 	if request.Chunks[0].TableName == "" && request.Chunks[0].Key == "fake/ddf337b84e835f32:171bc00155a:171bc00155a:fc8fd207" {
 		return &empty.Empty{}, nil
 	}
@@ -90,6 +90,7 @@ func (s server) CreateTable(ctx context.Context, createTableRequest *CreateTable
 	return &empty.Empty{}, err
 }
 
+// nolint
 func (s server) DeleteTable(ctx context.Context, name *DeleteTableRequest) (*empty.Empty, error) {
 	if name.TableName == "chunk_2591" {
 		return &empty.Empty{}, nil
@@ -154,11 +155,11 @@ func NewTestTableClient(cfg Config) (*TableClient, error) {
 }
 
 // NewStorageClient returns a new StorageClient.
-func newTestStorageServer(cfg Config) (*server, error) {
+func newTestStorageServer(cfg Config) *server {
 	client := &server{
 		Cfg: cfg,
 	}
-	return client, nil
+	return client
 }
 
 func createTestGrpcServer(t *testing.T) (func(), string) {
@@ -167,10 +168,8 @@ func createTestGrpcServer(t *testing.T) (func(), string) {
 	require.NoError(t, err)
 	s := grpc.NewServer()
 
-	s1, err := newTestStorageServer(cfg.Cfg)
-	if err != nil {
-		log.Fatalf("Failed to created new storage client")
-	}
+	s1 := newTestStorageServer(cfg.Cfg)
+
 	RegisterGrpcStoreServer(s, s1)
 	go func() {
 		if err := s.Serve(lis); err != nil {
diff --git a/pkg/storage/chunk/grpc/index_client.go b/pkg/storage/chunk/grpc/index_client.go
index 1bc0f31b1dd34..146eb4236fa03 100644
--- a/pkg/storage/chunk/grpc/index_client.go
+++ b/pkg/storage/chunk/grpc/index_client.go
@@ -6,8 +6,8 @@ import (
 
 	"github.com/pkg/errors"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 func (w *WriteBatch) Add(tableName, hashValue string, rangeValue []byte, value []byte) {
diff --git a/pkg/storage/chunk/grpc/storage_client.go b/pkg/storage/chunk/grpc/storage_client.go
index 99595f8c3800d..31bc876a04d0d 100644
--- a/pkg/storage/chunk/grpc/storage_client.go
+++ b/pkg/storage/chunk/grpc/storage_client.go
@@ -7,7 +7,7 @@ import (
 	"github.com/pkg/errors"
 	"google.golang.org/grpc"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type StorageClient struct {
diff --git a/pkg/storage/chunk/grpc/table_client.go b/pkg/storage/chunk/grpc/table_client.go
index 9e7d201f54503..a722b3b997001 100644
--- a/pkg/storage/chunk/grpc/table_client.go
+++ b/pkg/storage/chunk/grpc/table_client.go
@@ -7,7 +7,7 @@ import (
 	"github.com/pkg/errors"
 	"google.golang.org/grpc"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type TableClient struct {
diff --git a/pkg/storage/chunk/local/boltdb_index_client.go b/pkg/storage/chunk/local/boltdb_index_client.go
index 843cab1a5769f..5009db7366c03 100644
--- a/pkg/storage/chunk/local/boltdb_index_client.go
+++ b/pkg/storage/chunk/local/boltdb_index_client.go
@@ -15,9 +15,10 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 var (
@@ -112,7 +113,6 @@ func (b *BoltIndexClient) reload() {
 			delete(b.dbs, name)
 		}
 	}
-
 }
 
 func (b *BoltIndexClient) Stop() {
diff --git a/pkg/storage/chunk/local/boltdb_index_client_test.go b/pkg/storage/chunk/local/boltdb_index_client_test.go
index 578c030b3ad2c..3cd2660e61c39 100644
--- a/pkg/storage/chunk/local/boltdb_index_client_test.go
+++ b/pkg/storage/chunk/local/boltdb_index_client_test.go
@@ -11,7 +11,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/bbolt"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 var (
@@ -168,7 +168,6 @@ func Test_CreateTable_BoltdbRW(t *testing.T) {
 	require.Equal(t, []chunk.IndexEntry{
 		{RangeValue: []byte(fmt.Sprintf("range%s", "value"))},
 	}, have)
-
 }
 
 func TestBoltDB_Writes(t *testing.T) {
diff --git a/pkg/storage/chunk/local/boltdb_table_client.go b/pkg/storage/chunk/local/boltdb_table_client.go
index bb3d6f57d0ea8..81405785415a0 100644
--- a/pkg/storage/chunk/local/boltdb_table_client.go
+++ b/pkg/storage/chunk/local/boltdb_table_client.go
@@ -5,7 +5,7 @@ import (
 	"os"
 	"path/filepath"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type TableClient struct {
@@ -28,7 +28,6 @@ func (c *TableClient) ListTables(ctx context.Context) ([]string, error) {
 		}
 		return nil
 	})
-
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/storage/chunk/local/fixtures.go b/pkg/storage/chunk/local/fixtures.go
index 5e0bc9f9c7b55..d2703808c2797 100644
--- a/pkg/storage/chunk/local/fixtures.go
+++ b/pkg/storage/chunk/local/fixtures.go
@@ -8,9 +8,9 @@ import (
 
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 type fixture struct {
diff --git a/pkg/storage/chunk/local/fs_object_client.go b/pkg/storage/chunk/local/fs_object_client.go
index ff9b5e44b2c9d..d1c1e5d023af2 100644
--- a/pkg/storage/chunk/local/fs_object_client.go
+++ b/pkg/storage/chunk/local/fs_object_client.go
@@ -12,9 +12,10 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/thanos-io/thanos/pkg/runutil"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 // FSConfig is the config for a FSObjectClient.
diff --git a/pkg/storage/chunk/local/fs_object_client_test.go b/pkg/storage/chunk/local/fs_object_client_test.go
index 7f3b374018e1e..1e3537377141a 100644
--- a/pkg/storage/chunk/local/fs_object_client_test.go
+++ b/pkg/storage/chunk/local/fs_object_client_test.go
@@ -13,7 +13,7 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 func TestFSObjectClient_DeleteChunksBefore(t *testing.T) {
diff --git a/pkg/storage/chunk/objectclient/client.go b/pkg/storage/chunk/objectclient/client.go
index 9f7b4a1152be7..23e51a6300a73 100644
--- a/pkg/storage/chunk/objectclient/client.go
+++ b/pkg/storage/chunk/objectclient/client.go
@@ -8,8 +8,8 @@ import (
 
 	"github.com/pkg/errors"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/util"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 // KeyEncoder is used to encode chunk keys before writing/retrieving chunks
diff --git a/pkg/storage/chunk/openstack/swift_object_client.go b/pkg/storage/chunk/openstack/swift_object_client.go
index 19935844c0e7f..238a9f780ee03 100644
--- a/pkg/storage/chunk/openstack/swift_object_client.go
+++ b/pkg/storage/chunk/openstack/swift_object_client.go
@@ -10,9 +10,10 @@ import (
 
 	"github.com/ncw/swift"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	cortex_swift "github.com/cortexproject/cortex/pkg/storage/bucket/swift"
 	"github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type SwiftObjectClient struct {
diff --git a/pkg/storage/chunk/purger/delete_plan.pb.go b/pkg/storage/chunk/purger/delete_plan.pb.go
index 5646b2b4eb674..53383083435ad 100644
--- a/pkg/storage/chunk/purger/delete_plan.pb.go
+++ b/pkg/storage/chunk/purger/delete_plan.pb.go
@@ -1,5 +1,5 @@
 // Code generated by protoc-gen-gogo. DO NOT EDIT.
-// source: delete_plan.proto
+// source: pkg/storage/chunk/purger/delete_plan.proto
 
 package purger
 
@@ -37,7 +37,7 @@ type DeletePlan struct {
 func (m *DeletePlan) Reset()      { *m = DeletePlan{} }
 func (*DeletePlan) ProtoMessage() {}
 func (*DeletePlan) Descriptor() ([]byte, []int) {
-	return fileDescriptor_c38868cf63b27372, []int{0}
+	return fileDescriptor_524fc80666863e37, []int{0}
 }
 func (m *DeletePlan) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -89,7 +89,7 @@ type ChunksGroup struct {
 func (m *ChunksGroup) Reset()      { *m = ChunksGroup{} }
 func (*ChunksGroup) ProtoMessage() {}
 func (*ChunksGroup) Descriptor() ([]byte, []int) {
-	return fileDescriptor_c38868cf63b27372, []int{1}
+	return fileDescriptor_524fc80666863e37, []int{1}
 }
 func (m *ChunksGroup) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -133,7 +133,7 @@ type ChunkDetails struct {
 func (m *ChunkDetails) Reset()      { *m = ChunkDetails{} }
 func (*ChunkDetails) ProtoMessage() {}
 func (*ChunkDetails) Descriptor() ([]byte, []int) {
-	return fileDescriptor_c38868cf63b27372, []int{2}
+	return fileDescriptor_524fc80666863e37, []int{2}
 }
 func (m *ChunkDetails) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -184,7 +184,7 @@ type Interval struct {
 func (m *Interval) Reset()      { *m = Interval{} }
 func (*Interval) ProtoMessage() {}
 func (*Interval) Descriptor() ([]byte, []int) {
-	return fileDescriptor_c38868cf63b27372, []int{3}
+	return fileDescriptor_524fc80666863e37, []int{3}
 }
 func (m *Interval) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -234,38 +234,41 @@ func init() {
 	proto.RegisterType((*Interval)(nil), "purgeplan.Interval")
 }
 
-func init() { proto.RegisterFile("delete_plan.proto", fileDescriptor_c38868cf63b27372) }
+func init() {
+	proto.RegisterFile("pkg/storage/chunk/purger/delete_plan.proto", fileDescriptor_524fc80666863e37)
+}
 
-var fileDescriptor_c38868cf63b27372 = []byte{
-	// 446 bytes of a gzipped FileDescriptorProto
+var fileDescriptor_524fc80666863e37 = []byte{
+	// 459 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x94, 0x52, 0x41, 0x8b, 0xd4, 0x30,
-	0x18, 0x6d, 0xba, 0x52, 0xdc, 0x74, 0x5c, 0xd6, 0x2c, 0x68, 0x99, 0x43, 0x76, 0xe9, 0x69, 0x0e,
-	0xda, 0x81, 0x15, 0x41, 0x41, 0x90, 0x1d, 0x0b, 0x32, 0xa0, 0xb0, 0x16, 0x4f, 0x5e, 0x4a, 0xda,
-	0xc6, 0x6e, 0xdd, 0xb4, 0x89, 0x69, 0x2a, 0x7a, 0xf3, 0xe6, 0xd5, 0x9f, 0xe1, 0x0f, 0xf0, 0x47,
-	0xec, 0x71, 0x8e, 0x8b, 0x87, 0xc1, 0xe9, 0x5c, 0x3c, 0xce, 0x4f, 0x90, 0xa6, 0xed, 0x4c, 0x15,
-	0x3c, 0x78, 0xcb, 0xfb, 0xde, 0x7b, 0xc9, 0xcb, 0x4b, 0xe0, 0xed, 0x84, 0x32, 0xaa, 0x68, 0x28,
-	0x18, 0x29, 0x3c, 0x21, 0xb9, 0xe2, 0x68, 0x5f, 0x54, 0x32, 0xa5, 0xcd, 0x60, 0x7c, 0x3f, 0xcd,
-	0xd4, 0x45, 0x15, 0x79, 0x31, 0xcf, 0xa7, 0x29, 0x4f, 0xf9, 0x54, 0x2b, 0xa2, 0xea, 0xad, 0x46,
-	0x1a, 0xe8, 0x55, 0xeb, 0x1c, 0x3f, 0x1e, 0xc8, 0x63, 0x2e, 0x15, 0xfd, 0x28, 0x24, 0x7f, 0x47,
-	0x63, 0xd5, 0xa1, 0xa9, 0xb8, 0x4c, 0x7b, 0x22, 0xea, 0x16, 0xad, 0xd5, 0xfd, 0x02, 0x20, 0xf4,
-	0x75, 0x94, 0x73, 0x46, 0x0a, 0xf4, 0x08, 0xde, 0x6a, 0x02, 0x84, 0x59, 0xa1, 0xa8, 0xfc, 0x40,
-	0x98, 0x03, 0x4e, 0xc0, 0xc4, 0x3e, 0x3d, 0xf2, 0xb6, 0xd9, 0xbc, 0x79, 0x47, 0x05, 0xa3, 0x06,
-	0xf6, 0x08, 0x3d, 0x85, 0xa3, 0xf8, 0xa2, 0x2a, 0x2e, 0xcb, 0x30, 0x95, 0xbc, 0x12, 0x8e, 0x79,
-	0xb2, 0x37, 0xb1, 0x4f, 0xef, 0x0c, 0x8c, 0xcf, 0x34, 0xfd, 0xbc, 0x61, 0x67, 0x37, 0xae, 0x96,
-	0xc7, 0x46, 0x60, 0xc7, 0xbb, 0x91, 0xfb, 0x1d, 0x40, 0x7b, 0x20, 0x41, 0x05, 0xb4, 0x18, 0x89,
-	0x28, 0x2b, 0x1d, 0xa0, 0xb7, 0x3a, 0xf2, 0xfa, 0x1b, 0x78, 0x2f, 0x9a, 0xf9, 0x39, 0xc9, 0xe4,
-	0xec, 0xac, 0xd9, 0xe7, 0xc7, 0xf2, 0xf8, 0xbf, 0x1a, 0x68, 0xfd, 0x67, 0x09, 0x11, 0x8a, 0xca,
-	0xa0, 0x3b, 0x05, 0x3d, 0x84, 0x56, 0x1b, 0xa7, 0x8b, 0x7e, 0xf7, 0xef, 0xe8, 0x3e, 0x55, 0x24,
-	0x63, 0x65, 0x97, 0xbd, 0x13, 0xbb, 0xef, 0xe1, 0x68, 0xc8, 0xa2, 0x03, 0x68, 0xce, 0x7d, 0x5d,
-	0xdb, 0x7e, 0x60, 0xce, 0x7d, 0xf4, 0x0a, 0x8e, 0x05, 0x91, 0x2a, 0x23, 0x8c, 0x7d, 0x0a, 0xdb,
-	0x47, 0x4f, 0x76, 0xf5, 0x9a, 0xff, 0xae, 0xd7, 0xd9, 0xda, 0xda, 0xf7, 0x49, 0x7a, 0xc6, 0x8d,
-	0xe0, 0xcd, 0x6d, 0xed, 0xf7, 0x20, 0x2a, 0x15, 0x91, 0x2a, 0x54, 0x59, 0x4e, 0x4b, 0x45, 0x72,
-	0x11, 0xe6, 0xa5, 0x3e, 0x7e, 0x2f, 0x38, 0xd4, 0xcc, 0xeb, 0x9e, 0x78, 0x59, 0xa2, 0x09, 0x3c,
-	0xa4, 0x45, 0xf2, 0xa7, 0xd6, 0xd4, 0xda, 0x03, 0x5a, 0x24, 0x03, 0xe5, 0xec, 0xc9, 0x62, 0x85,
-	0x8d, 0xeb, 0x15, 0x36, 0x36, 0x2b, 0x0c, 0x3e, 0xd7, 0x18, 0x7c, 0xab, 0x31, 0xb8, 0xaa, 0x31,
-	0x58, 0xd4, 0x18, 0xfc, 0xac, 0x31, 0xf8, 0x55, 0x63, 0x63, 0x53, 0x63, 0xf0, 0x75, 0x8d, 0x8d,
-	0xc5, 0x1a, 0x1b, 0xd7, 0x6b, 0x6c, 0xbc, 0xb1, 0xf4, 0x3d, 0x64, 0x64, 0xe9, 0xcf, 0xf5, 0xe0,
-	0x77, 0x00, 0x00, 0x00, 0xff, 0xff, 0xf5, 0x46, 0x96, 0xf6, 0xe6, 0x02, 0x00, 0x00,
+	0x18, 0x6d, 0xba, 0x32, 0xb8, 0x99, 0x71, 0x59, 0xb2, 0xa0, 0xc3, 0x1c, 0xb2, 0xcb, 0x9c, 0x06,
+	0xd1, 0x16, 0x56, 0x04, 0x05, 0x41, 0x76, 0x2c, 0xc8, 0x80, 0xc2, 0x5a, 0x3c, 0x79, 0x29, 0x69,
+	0x1b, 0xbb, 0x75, 0xd3, 0x26, 0x26, 0xa9, 0xe8, 0xcd, 0x9b, 0x57, 0x7f, 0x86, 0x3f, 0xc0, 0x1f,
+	0xb1, 0xc7, 0x39, 0x2e, 0x1e, 0x16, 0xa7, 0x73, 0xf1, 0xb8, 0x3f, 0x41, 0x9a, 0xa6, 0x33, 0x55,
+	0xf0, 0xe0, 0x2d, 0xef, 0x7b, 0xef, 0x25, 0x2f, 0x2f, 0x81, 0x77, 0xc5, 0x79, 0xe6, 0x2b, 0xcd,
+	0x25, 0xc9, 0xa8, 0x9f, 0x9c, 0x55, 0xe5, 0xb9, 0x2f, 0x2a, 0x99, 0x51, 0xe9, 0xa7, 0x94, 0x51,
+	0x4d, 0x23, 0xc1, 0x48, 0xe9, 0x09, 0xc9, 0x35, 0x47, 0xbb, 0x86, 0x69, 0x06, 0x93, 0xfb, 0x59,
+	0xae, 0xcf, 0xaa, 0xd8, 0x4b, 0x78, 0xe1, 0x67, 0x3c, 0xe3, 0xbe, 0x51, 0xc4, 0xd5, 0x5b, 0x83,
+	0x0c, 0x30, 0xab, 0xd6, 0x39, 0x79, 0xdc, 0x93, 0x27, 0x5c, 0x6a, 0xfa, 0x51, 0x48, 0xfe, 0x8e,
+	0x26, 0xda, 0x22, 0xbf, 0x49, 0x61, 0x89, 0xd8, 0x2e, 0x5a, 0xeb, 0xf4, 0x0b, 0x80, 0x30, 0x30,
+	0x51, 0x4e, 0x19, 0x29, 0xd1, 0x23, 0x78, 0xab, 0x09, 0x10, 0xe5, 0xa5, 0xa6, 0xf2, 0x03, 0x61,
+	0x63, 0x70, 0x04, 0x66, 0xc3, 0xe3, 0x03, 0x6f, 0x93, 0xcd, 0x5b, 0x58, 0x2a, 0x1c, 0x35, 0xb0,
+	0x43, 0xe8, 0x29, 0x1c, 0x99, 0xfb, 0xa9, 0x28, 0x93, 0xbc, 0x12, 0x63, 0xf7, 0x68, 0x67, 0x36,
+	0x3c, 0xbe, 0xdd, 0x33, 0x3e, 0x33, 0xf4, 0xf3, 0x86, 0x9d, 0xdf, 0xb8, 0xb8, 0x3a, 0x74, 0xc2,
+	0x61, 0xb2, 0x1d, 0x4d, 0xbf, 0x03, 0x38, 0xec, 0x49, 0x50, 0x09, 0x07, 0x8c, 0xc4, 0x94, 0xa9,
+	0x31, 0x30, 0x5b, 0x1d, 0x78, 0xdd, 0x0d, 0xbc, 0x17, 0xcd, 0xfc, 0x94, 0xe4, 0x72, 0x7e, 0xd2,
+	0xec, 0xf3, 0xe3, 0xea, 0xf0, 0xbf, 0x1a, 0x68, 0xfd, 0x27, 0x29, 0x11, 0x9a, 0xca, 0xd0, 0x9e,
+	0x82, 0x1e, 0xc2, 0x41, 0x1b, 0xc7, 0x46, 0xbf, 0xf3, 0x77, 0xf4, 0x80, 0x6a, 0x92, 0x33, 0x65,
+	0xb3, 0x5b, 0xf1, 0xf4, 0x3d, 0x1c, 0xf5, 0x59, 0xb4, 0x07, 0xdd, 0x45, 0x60, 0x6a, 0xdb, 0x0d,
+	0xdd, 0x45, 0x80, 0x5e, 0xc1, 0x89, 0x20, 0x52, 0xe7, 0x84, 0xb1, 0x4f, 0x51, 0xfb, 0xe8, 0xe9,
+	0xb6, 0x5e, 0xf7, 0xdf, 0xf5, 0x8e, 0x37, 0xb6, 0xf6, 0x7d, 0xd2, 0x8e, 0x99, 0xc6, 0xf0, 0xe6,
+	0xa6, 0xf6, 0x7b, 0x10, 0x29, 0x4d, 0xa4, 0x8e, 0x74, 0x5e, 0x50, 0xa5, 0x49, 0x21, 0xa2, 0x42,
+	0x99, 0xe3, 0x77, 0xc2, 0x7d, 0xc3, 0xbc, 0xee, 0x88, 0x97, 0x0a, 0xcd, 0xe0, 0x3e, 0x2d, 0xd3,
+	0x3f, 0xb5, 0xae, 0xd1, 0xee, 0xd1, 0x32, 0xed, 0x29, 0xe7, 0x4f, 0x96, 0x2b, 0xec, 0x5c, 0xae,
+	0xb0, 0x73, 0xbd, 0xc2, 0xe0, 0x73, 0x8d, 0xc1, 0xb7, 0x1a, 0x83, 0x8b, 0x1a, 0x83, 0x65, 0x8d,
+	0xc1, 0xcf, 0x1a, 0x83, 0x5f, 0x35, 0x76, 0xae, 0x6b, 0x0c, 0xbe, 0xae, 0xb1, 0xb3, 0x5c, 0x63,
+	0xe7, 0x72, 0x8d, 0x9d, 0x37, 0x83, 0xf6, 0x73, 0xc7, 0x03, 0xf3, 0xb9, 0x1e, 0xfc, 0x0e, 0x00,
+	0x00, 0xff, 0xff, 0x80, 0x2a, 0x14, 0x47, 0xff, 0x02, 0x00, 0x00,
 }
 
 func (this *DeletePlan) Equal(that interface{}) bool {
diff --git a/pkg/storage/chunk/purger/delete_requests_store.go b/pkg/storage/chunk/purger/delete_requests_store.go
index 396c51c053d95..1b55254b321d2 100644
--- a/pkg/storage/chunk/purger/delete_requests_store.go
+++ b/pkg/storage/chunk/purger/delete_requests_store.go
@@ -12,7 +12,7 @@ import (
 	"strings"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -98,7 +98,6 @@ func NewDeleteStore(cfg DeleteStoreConfig, indexClient chunk.IndexClient) (*Dele
 // Add creates entries for a new delete request.
 func (ds *DeleteStore) AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error {
 	return ds.addDeleteRequest(ctx, userID, model.Now(), startTime, endTime, selectors)
-
 }
 
 // addDeleteRequest is also used for tests to create delete requests with different createdAt time.
@@ -193,7 +192,6 @@ func (ds *DeleteStore) GetDeleteRequest(ctx context.Context, userID, requestID s
 		HashValue:        string(deleteRequestID),
 		RangeValuePrefix: []byte(userIDAndRequestID),
 	})
-
 	if err != nil {
 		return nil, err
 	}
@@ -264,7 +262,6 @@ func (ds *DeleteStore) queryDeleteRequests(ctx context.Context, deleteQuery chun
 
 			return true
 		})
-
 		if err != nil {
 			return nil, err
 		}
@@ -304,7 +301,6 @@ func (ds *DeleteStore) queryCacheGenerationNumber(ctx context.Context, userID st
 		}
 		return false
 	})
-
 	if err != nil {
 		return "", err
 	}
@@ -345,12 +341,10 @@ func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest
 	from, err := strconv.ParseInt(hexParts[1], 16, 64)
 	if err != nil {
 		return deleteRequest, err
-
 	}
 	through, err := strconv.ParseInt(hexParts[2], 16, 64)
 	if err != nil {
 		return deleteRequest, err
-
 	}
 
 	deleteRequest.CreatedAt = model.Time(createdAt)
diff --git a/pkg/storage/chunk/purger/purger.go b/pkg/storage/chunk/purger/purger.go
index faa62b1ebabc0..640cb04d18a8d 100644
--- a/pkg/storage/chunk/purger/purger.go
+++ b/pkg/storage/chunk/purger/purger.go
@@ -20,10 +20,11 @@ import (
 	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/cortexpb"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const (
@@ -47,32 +48,32 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
 	m := purgerMetrics{}
 
 	m.deleteRequestsProcessedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_delete_requests_processed_total",
 		Help:      "Number of delete requests processed per user",
 	}, []string{"user"})
 	m.deleteRequestsChunksSelectedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_delete_requests_chunks_selected_total",
 		Help:      "Number of chunks selected while building delete plans per user",
 	}, []string{"user"})
 	m.deleteRequestsProcessingFailures = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_delete_requests_processing_failures_total",
 		Help:      "Number of delete requests processing failures per user",
 	}, []string{"user"})
 	m.loadPendingRequestsAttempsTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_load_pending_requests_attempts_total",
 		Help:      "Number of attempts that were made to load pending requests with status",
 	}, []string{"status"})
 	m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_oldest_pending_delete_request_age_seconds",
 		Help:      "Age of oldest pending delete request in seconds, since they are over their cancellation period",
 	})
 	m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_pending_delete_requests_count",
 		Help:      "Count of delete requests which are over their cancellation period and have not finished processing yet",
 	})
@@ -297,8 +298,10 @@ func (p *Purger) jobScheduler(ctx context.Context) {
 			p.pendingPlansCountMtx.Unlock()
 
 			for i := 0; i < numPlans; i++ {
-				p.workerJobChan <- workerJob{planNo: i, userID: req.UserID,
-					deleteRequestID: req.RequestID, logger: req.logger}
+				p.workerJobChan <- workerJob{
+					planNo: i, userID: req.UserID,
+					deleteRequestID: req.RequestID, logger: req.logger,
+				}
 			}
 		case <-ctx.Done():
 			close(p.workerJobChan)
diff --git a/pkg/storage/chunk/purger/purger_test.go b/pkg/storage/chunk/purger/purger_test.go
index c48398bef9b81..0b52356ca14ac 100644
--- a/pkg/storage/chunk/purger/purger_test.go
+++ b/pkg/storage/chunk/purger/purger_test.go
@@ -15,12 +15,13 @@ import (
 	"github.com/prometheus/prometheus/promql/parser"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 	"github.com/cortexproject/cortex/pkg/util/test"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 const (
@@ -116,8 +117,10 @@ var purgePlanTestCases = []struct {
 		deleteRequestInterval:  model.Interval{End: model.Time(millisecondPerDay / 2)},
 		expectedNumberOfPlans:  1,
 		numChunksToDelete:      12 + 1, // one chunk for each hour + end time touches chunk at boundary
-		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(millisecondPerDay / 2),
-			EndTimestampMs: int64(millisecondPerDay / 2)},
+		lastChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: millisecondPerDay / 2,
+			EndTimestampMs:   millisecondPerDay / 2,
+		},
 	},
 	{
 		name:                   "deleting a full day from 2 days data",
@@ -125,32 +128,46 @@ var purgePlanTestCases = []struct {
 		deleteRequestInterval:  model.Interval{End: modelTimeDay},
 		expectedNumberOfPlans:  1,
 		numChunksToDelete:      24 + 1, // one chunk for each hour + end time touches chunk at boundary
-		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: millisecondPerDay,
-			EndTimestampMs: millisecondPerDay},
+		lastChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: millisecondPerDay,
+			EndTimestampMs:   millisecondPerDay,
+		},
 	},
 	{
 		name:                   "deleting 2 days partially from 2 days data",
 		chunkStoreDataInterval: model.Interval{End: modelTimeDay * 2},
-		deleteRequestInterval: model.Interval{Start: model.Time(millisecondPerDay / 2),
-			End: model.Time(millisecondPerDay + millisecondPerDay/2)},
+		deleteRequestInterval: model.Interval{
+			Start: model.Time(millisecondPerDay / 2),
+			End:   model.Time(millisecondPerDay + millisecondPerDay/2),
+		},
 		expectedNumberOfPlans: 2,
 		numChunksToDelete:     24 + 2, // one chunk for each hour + start and end time touches chunk at boundary
-		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(millisecondPerDay / 2),
-			EndTimestampMs: int64(millisecondPerDay / 2)},
-		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: millisecondPerDay + millisecondPerDay/2,
-			EndTimestampMs: millisecondPerDay + millisecondPerDay/2},
+		firstChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: millisecondPerDay / 2,
+			EndTimestampMs:   millisecondPerDay / 2,
+		},
+		lastChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: millisecondPerDay + millisecondPerDay/2,
+			EndTimestampMs:   millisecondPerDay + millisecondPerDay/2,
+		},
 	},
 	{
 		name:                   "deleting 2 days partially, not aligned with hour, from 2 days data",
 		chunkStoreDataInterval: model.Interval{End: modelTimeDay * 2},
-		deleteRequestInterval: model.Interval{Start: model.Time(millisecondPerDay / 2).Add(time.Minute),
-			End: model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute)},
+		deleteRequestInterval: model.Interval{
+			Start: model.Time(millisecondPerDay / 2).Add(time.Minute),
+			End:   model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute),
+		},
 		expectedNumberOfPlans: 2,
 		numChunksToDelete:     24, // one chunk for each hour, no chunks touched at boundary
-		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(model.Time(millisecondPerDay / 2).Add(time.Minute)),
-			EndTimestampMs: int64(model.Time(millisecondPerDay / 2).Add(time.Hour))},
-		lastChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Hour)),
-			EndTimestampMs: int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute))},
+		firstChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: int64(model.Time(millisecondPerDay / 2).Add(time.Minute)),
+			EndTimestampMs:   int64(model.Time(millisecondPerDay / 2).Add(time.Hour)),
+		},
+		lastChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Hour)),
+			EndTimestampMs:   int64(model.Time(millisecondPerDay + millisecondPerDay/2).Add(-time.Minute)),
+		},
 	},
 	{
 		name:                   "deleting data outside of period of existing data",
@@ -165,8 +182,10 @@ var purgePlanTestCases = []struct {
 		deleteRequestInterval:  model.Interval{Start: modelTimeDay.Add(-30 * time.Minute), End: modelTimeDay.Add(-15 * time.Minute)},
 		expectedNumberOfPlans:  1,
 		numChunksToDelete:      1,
-		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(modelTimeDay.Add(-30 * time.Minute)),
-			EndTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute))},
+		firstChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: int64(modelTimeDay.Add(-30 * time.Minute)),
+			EndTimestampMs:   int64(modelTimeDay.Add(-15 * time.Minute)),
+		},
 	},
 	{
 		name:                   "building multi-day chunk and deleting part of it for each day",
@@ -174,8 +193,10 @@ var purgePlanTestCases = []struct {
 		deleteRequestInterval:  model.Interval{Start: modelTimeDay.Add(-15 * time.Minute), End: modelTimeDay.Add(15 * time.Minute)},
 		expectedNumberOfPlans:  2,
 		numChunksToDelete:      1,
-		firstChunkPartialDeletionInterval: &Interval{StartTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute)),
-			EndTimestampMs: int64(modelTimeDay.Add(15 * time.Minute))},
+		firstChunkPartialDeletionInterval: &Interval{
+			StartTimestampMs: int64(modelTimeDay.Add(-15 * time.Minute)),
+			EndTimestampMs:   int64(modelTimeDay.Add(15 * time.Minute)),
+		},
 	},
 }
 
@@ -382,6 +403,7 @@ func TestPurger_Restarts(t *testing.T) {
 	})
 }
 
+// nolint
 func TestPurger_Metrics(t *testing.T) {
 	deleteStore, chunkStore, storageClient, purger, registry := setupStoresAndPurger(t)
 	defer func() {
@@ -425,7 +447,7 @@ func TestPurger_Metrics(t *testing.T) {
 
 	// wait until purger_delete_requests_processed_total starts to show up.
 	test.Poll(t, 2*time.Second, 1, func() interface{} {
-		count, err := testutil.GatherAndCount(registry, "cortex_purger_delete_requests_processed_total")
+		count, err := testutil.GatherAndCount(registry, "loki_purger_delete_requests_processed_total")
 		require.NoError(t, err)
 		return count
 	})
diff --git a/pkg/storage/chunk/purger/request_handler.go b/pkg/storage/chunk/purger/request_handler.go
index d8fc70d788d51..79888e719d055 100644
--- a/pkg/storage/chunk/purger/request_handler.go
+++ b/pkg/storage/chunk/purger/request_handler.go
@@ -26,7 +26,7 @@ func newDeleteRequestHandlerMetrics(r prometheus.Registerer) *deleteRequestHandl
 	m := deleteRequestHandlerMetrics{}
 
 	m.deleteRequestsReceivedTotal = promauto.With(r).NewCounterVec(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "purger_delete_requests_received_total",
 		Help:      "Number of delete requests received per user",
 	}, []string{"user"})
diff --git a/pkg/storage/chunk/purger/table_provisioning.go b/pkg/storage/chunk/purger/table_provisioning.go
index e8ce5d6364af9..ab7e17d0f0a0a 100644
--- a/pkg/storage/chunk/purger/table_provisioning.go
+++ b/pkg/storage/chunk/purger/table_provisioning.go
@@ -3,7 +3,7 @@ package purger
 import (
 	"flag"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // TableProvisioningConfig holds config for table throuput and autoscaling. Currently only used by DynamoDB.
diff --git a/pkg/storage/chunk/purger/tombstones.go b/pkg/storage/chunk/purger/tombstones.go
index fdf2cc0914de1..69c649dc7cc5e 100644
--- a/pkg/storage/chunk/purger/tombstones.go
+++ b/pkg/storage/chunk/purger/tombstones.go
@@ -29,12 +29,12 @@ func newtombstonesLoaderMetrics(r prometheus.Registerer) *tombstonesLoaderMetric
 	m := tombstonesLoaderMetrics{}
 
 	m.cacheGenLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "tombstones_loader_cache_gen_load_failures_total",
 		Help:      "Total number of failures while loading cache generation number using tombstones loader",
 	})
 	m.deleteRequestsLoadFailures = promauto.With(r).NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "tombstones_loader_cache_delete_requests_load_failures_total",
 		Help:      "Total number of failures while loading delete requests using tombstones loader",
 	})
diff --git a/pkg/storage/chunk/schema.go b/pkg/storage/chunk/schema.go
index 6f6cc8f4d3018..6fd7892446f9c 100644
--- a/pkg/storage/chunk/schema.go
+++ b/pkg/storage/chunk/schema.go
@@ -197,7 +197,6 @@ func (s seriesStoreSchema) GetChunkWriteEntries(from, through model.Time, userID
 		result = append(result, entries...)
 	}
 	return result, nil
-
 }
 
 func (s baseSchema) GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error) {
@@ -384,7 +383,7 @@ func (originalEntries) GetWriteEntries(bucket Bucket, metricName string, labels
 		if v.Name == model.MetricNameLabel {
 			continue
 		}
-		if strings.ContainsRune(string(v.Value), '\x00') {
+		if strings.ContainsRune(v.Value, '\x00') {
 			return nil, fmt.Errorf("label values cannot contain null byte")
 		}
 		result = append(result, IndexEntry{
@@ -417,7 +416,7 @@ func (originalEntries) GetReadMetricLabelQueries(bucket Bucket, metricName strin
 }
 
 func (originalEntries) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) {
-	if strings.ContainsRune(string(labelValue), '\x00') {
+	if strings.ContainsRune(labelValue, '\x00') {
 		return nil, fmt.Errorf("label values cannot contain null byte")
 	}
 	return []IndexQuery{
diff --git a/pkg/storage/chunk/schema_caching.go b/pkg/storage/chunk/schema_caching.go
index 467f572f33783..cbc7dcac07b46 100644
--- a/pkg/storage/chunk/schema_caching.go
+++ b/pkg/storage/chunk/schema_caching.go
@@ -54,7 +54,7 @@ func (s *schemaCaching) GetLabelNamesForSeries(from, through model.Time, userID
 	return s.setImmutability(from, through, queries), nil
 }
 
-func (s *schemaCaching) setImmutability(from, through model.Time, queries []IndexQuery) []IndexQuery {
+func (s *schemaCaching) setImmutability(_, through model.Time, queries []IndexQuery) []IndexQuery {
 	cacheBefore := model.TimeFromUnix(mtime.Now().Add(-s.cacheOlderThan).Unix())
 
 	// If the entire query is cacheable then cache it.
diff --git a/pkg/storage/chunk/schema_config.go b/pkg/storage/chunk/schema_config.go
index ce37df393b392..083767d9e59c7 100644
--- a/pkg/storage/chunk/schema_config.go
+++ b/pkg/storage/chunk/schema_config.go
@@ -374,7 +374,7 @@ func (cfg *PeriodicTableConfig) periodicTables(from, through model.Time, pCfg Pr
 		endGraceSecs   = int64(endGrace / time.Second)
 		firstTable     = from.Unix() / periodSecs
 		lastTable      = through.Unix() / periodSecs
-		tablesToKeep   = int64(int64(retention/time.Second) / periodSecs)
+		tablesToKeep   = int64(retention/time.Second) / periodSecs
 		now            = mtime.Now().Unix()
 		nowWeek        = now / periodSecs
 		result         = []TableDesc{}
diff --git a/pkg/storage/chunk/series_store.go b/pkg/storage/chunk/series_store.go
index 22ceb43fa1119..4c4c7be738719 100644
--- a/pkg/storage/chunk/series_store.go
+++ b/pkg/storage/chunk/series_store.go
@@ -13,10 +13,11 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 // CardinalityExceededError is returned when the user reads a row that
@@ -33,34 +34,34 @@ func (e CardinalityExceededError) Error() string {
 
 var (
 	indexLookupsPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_index_lookups_per_query",
 		Help:      "Distribution of #index lookups per query.",
 		Buckets:   prometheus.ExponentialBuckets(1, 2, 5),
 	})
 	preIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_series_pre_intersection_per_query",
 		Help:      "Distribution of #series (pre intersection) per query.",
 		// A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	postIntersectionPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_series_post_intersection_per_query",
 		Help:      "Distribution of #series (post intersection) per query.",
 		// A reasonable upper bound is around 100k - 10*(8^(6-1)) = 327k.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 6),
 	})
 	chunksPerQuery = promauto.NewHistogram(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_chunks_per_query",
 		Help:      "Distribution of #chunks per query.",
 		// For 100k series for 7 week, could be 1.2m - 10*(8^(7-1)) = 2.6m.
 		Buckets: prometheus.ExponentialBuckets(10, 8, 7),
 	})
 	dedupedChunksTotal = promauto.NewCounter(prometheus.CounterOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "chunk_store_deduped_chunks_total",
 		Help:      "Count of chunks which were not stored because they have already been stored by another replica.",
 	})
@@ -258,6 +259,7 @@ func (c *seriesStore) lookupLabelNamesByChunks(ctx context.Context, from, throug
 	}
 	return labelNamesFromChunks(allChunks), nil
 }
+
 func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from, through model.Time, userID, metricName string, matchers []*labels.Matcher) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
@@ -540,7 +542,7 @@ func (c *seriesStore) DeleteChunk(ctx context.Context, from, through model.Time,
 		return ErrMetricNameLabelMissing
 	}
 
-	chunkWriteEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, string(metricName), metric, chunkID)
+	chunkWriteEntries, err := c.schema.GetChunkWriteEntries(from, through, userID, metricName, metric, chunkID)
 	if err != nil {
 		return errors.Wrapf(err, "when getting chunk index entries to delete for chunkID=%s", chunkID)
 	}
@@ -551,7 +553,6 @@ func (c *seriesStore) DeleteChunk(ctx context.Context, from, through model.Time,
 }
 
 func (c *seriesStore) DeleteSeriesIDs(ctx context.Context, from, through model.Time, userID string, metric labels.Labels) error {
-
 	entries, err := c.schema.GetSeriesDeleteEntries(from, through, userID, metric, func(userID, seriesID string, from, through model.Time) (b bool, e error) {
 		return c.hasChunksForInterval(ctx, userID, seriesID, from, through)
 	})
diff --git a/pkg/storage/chunk/storage/by_key_test.go b/pkg/storage/chunk/storage/by_key_test.go
index 9d2b84f90dc72..59141a864837b 100644
--- a/pkg/storage/chunk/storage/by_key_test.go
+++ b/pkg/storage/chunk/storage/by_key_test.go
@@ -1,7 +1,7 @@
 package storage
 
 import (
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // ByKey allow you to sort chunks by ID
diff --git a/pkg/storage/chunk/storage/caching_fixtures.go b/pkg/storage/chunk/storage/caching_fixtures.go
index 6c14f4962852c..0747c041a5ae2 100644
--- a/pkg/storage/chunk/storage/caching_fixtures.go
+++ b/pkg/storage/chunk/storage/caching_fixtures.go
@@ -10,11 +10,11 @@ import (
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
 
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/chunk/gcp"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/gcp"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 type fixture struct {
diff --git a/pkg/storage/chunk/storage/caching_index_client.go b/pkg/storage/chunk/storage/caching_index_client.go
index add31d03cdc1c..d0fbf88fdbc17 100644
--- a/pkg/storage/chunk/storage/caching_index_client.go
+++ b/pkg/storage/chunk/storage/caching_index_client.go
@@ -11,33 +11,39 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/cortexproject/cortex/pkg/tenant"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 var (
 	cacheCorruptErrs = promauto.NewCounter(prometheus.CounterOpts{
-		Name: "querier_index_cache_corruptions_total",
-		Help: "The number of cache corruptions for the index cache.",
+		Namespace: "loki",
+		Name:      "querier_index_cache_corruptions_total",
+		Help:      "The number of cache corruptions for the index cache.",
 	})
 	cacheHits = promauto.NewCounter(prometheus.CounterOpts{
-		Name: "querier_index_cache_hits_total",
-		Help: "The number of cache hits for the index cache.",
+		Namespace: "loki",
+		Name:      "querier_index_cache_hits_total",
+		Help:      "The number of cache hits for the index cache.",
 	})
 	cacheGets = promauto.NewCounter(prometheus.CounterOpts{
-		Name: "querier_index_cache_gets_total",
-		Help: "The number of gets for the index cache.",
+		Namespace: "loki",
+		Name:      "querier_index_cache_gets_total",
+		Help:      "The number of gets for the index cache.",
 	})
 	cachePuts = promauto.NewCounter(prometheus.CounterOpts{
-		Name: "querier_index_cache_puts_total",
-		Help: "The number of puts for the index cache.",
+		Namespace: "loki",
+		Name:      "querier_index_cache_puts_total",
+		Help:      "The number of puts for the index cache.",
 	})
 	cacheEncodeErrs = promauto.NewCounter(prometheus.CounterOpts{
-		Name: "querier_index_cache_encode_errors_total",
-		Help: "The number of errors for the index cache while encoding the body.",
+		Namespace: "loki",
+		Name:      "querier_index_cache_encode_errors_total",
+		Help:      "The number of errors for the index cache while encoding the body.",
 	})
 )
 
diff --git a/pkg/storage/chunk/storage/caching_index_client.pb.go b/pkg/storage/chunk/storage/caching_index_client.pb.go
index a0bfe0a53a609..c33d2c9fa1d90 100644
--- a/pkg/storage/chunk/storage/caching_index_client.pb.go
+++ b/pkg/storage/chunk/storage/caching_index_client.pb.go
@@ -1,5 +1,5 @@
 // Code generated by protoc-gen-gogo. DO NOT EDIT.
-// source: caching_index_client.proto
+// source: pkg/storage/chunk/storage/caching_index_client.proto
 
 package storage
 
@@ -33,7 +33,7 @@ type Entry struct {
 func (m *Entry) Reset()      { *m = Entry{} }
 func (*Entry) ProtoMessage() {}
 func (*Entry) Descriptor() ([]byte, []int) {
-	return fileDescriptor_6a83955bbc783296, []int{0}
+	return fileDescriptor_c778f3e219e9888b, []int{0}
 }
 func (m *Entry) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -75,7 +75,7 @@ type ReadBatch struct {
 func (m *ReadBatch) Reset()      { *m = ReadBatch{} }
 func (*ReadBatch) ProtoMessage() {}
 func (*ReadBatch) Descriptor() ([]byte, []int) {
-	return fileDescriptor_6a83955bbc783296, []int{1}
+	return fileDescriptor_c778f3e219e9888b, []int{1}
 }
 func (m *ReadBatch) XXX_Unmarshal(b []byte) error {
 	return m.Unmarshal(b)
@@ -137,30 +137,33 @@ func init() {
 	proto.RegisterType((*ReadBatch)(nil), "storage.ReadBatch")
 }
 
-func init() { proto.RegisterFile("caching_index_client.proto", fileDescriptor_6a83955bbc783296) }
+func init() {
+	proto.RegisterFile("pkg/storage/chunk/storage/caching_index_client.proto", fileDescriptor_c778f3e219e9888b)
+}
 
-var fileDescriptor_6a83955bbc783296 = []byte{
-	// 311 bytes of a gzipped FileDescriptorProto
+var fileDescriptor_c778f3e219e9888b = []byte{
+	// 322 bytes of a gzipped FileDescriptorProto
 	0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x6c, 0x90, 0xb1, 0x6e, 0xea, 0x30,
-	0x14, 0x86, 0x7d, 0x6e, 0x08, 0x08, 0x73, 0xef, 0xd5, 0x95, 0x87, 0xab, 0x88, 0xe1, 0x10, 0x51,
-	0x55, 0xca, 0xd2, 0x20, 0xb5, 0x7d, 0x82, 0x54, 0x7d, 0x81, 0x54, 0xea, 0x8a, 0x4c, 0x70, 0x83,
-	0xd5, 0x60, 0xa3, 0xe0, 0x48, 0x64, 0xeb, 0xd6, 0xb5, 0x8f, 0xd1, 0x47, 0x61, 0x64, 0x44, 0x1d,
-	0x50, 0x31, 0x4b, 0x47, 0x1e, 0xa1, 0xc2, 0x50, 0xa9, 0x43, 0xb7, 0xff, 0xf7, 0xf7, 0xfb, 0xfc,
-	0x47, 0x87, 0x76, 0x33, 0x9e, 0x4d, 0xa4, 0xca, 0x87, 0x52, 0x8d, 0xc5, 0x62, 0x98, 0x15, 0x52,
-	0x28, 0x13, 0xcf, 0x4a, 0x6d, 0x34, 0x6b, 0xcd, 0x8d, 0x2e, 0x79, 0x2e, 0xba, 0x17, 0xb9, 0x34,
-	0x93, 0x6a, 0x14, 0x67, 0x7a, 0x3a, 0xc8, 0x75, 0xae, 0x07, 0x8e, 0x8f, 0xaa, 0x07, 0xe7, 0x9c,
-	0x71, 0xea, 0xf8, 0xaf, 0x7f, 0x47, 0xfd, 0x5b, 0x65, 0xca, 0x9a, 0x9d, 0xd3, 0xe6, 0x8d, 0x2e,
-	0xaa, 0xa9, 0x0a, 0x20, 0x84, 0xe8, 0x77, 0xf2, 0x67, 0xb9, 0xe9, 0x91, 0xb7, 0x4d, 0xcf, 0x4f,
-	0x6a, 0x23, 0xe6, 0xe9, 0x09, 0xb2, 0x33, 0xea, 0xdf, 0xf3, 0xa2, 0x12, 0xc1, 0xaf, 0x9f, 0x52,
-	0x47, 0xd6, 0x7f, 0x06, 0xda, 0x4e, 0x05, 0x1f, 0x27, 0xdc, 0x64, 0x13, 0x16, 0xd3, 0x96, 0x50,
-	0xa6, 0x94, 0x62, 0x1e, 0x40, 0xe8, 0x45, 0x9d, 0xcb, 0xbf, 0xf1, 0x69, 0xd9, 0xd8, 0x55, 0x27,
-	0x8d, 0xc3, 0x90, 0xf4, 0x2b, 0xc4, 0xfe, 0x51, 0xef, 0x51, 0xd4, 0xae, 0xa0, 0x9d, 0x1e, 0x24,
-	0xfb, 0x4f, 0x9b, 0x62, 0x31, 0x93, 0x65, 0x1d, 0x78, 0x21, 0x44, 0x5e, 0x7a, 0x72, 0x2c, 0xa4,
-	0x9d, 0x8c, 0x97, 0x63, 0xa9, 0x78, 0x21, 0x4d, 0x1d, 0x34, 0x42, 0x88, 0xfc, 0xf4, 0xfb, 0x53,
-	0x72, 0xbd, 0xda, 0x22, 0x59, 0x6f, 0x91, 0xec, 0xb7, 0x08, 0x4f, 0x16, 0xe1, 0xd5, 0x22, 0x2c,
-	0x2d, 0xc2, 0xca, 0x22, 0xbc, 0x5b, 0x84, 0x0f, 0x8b, 0x64, 0x6f, 0x11, 0x5e, 0x76, 0x48, 0x56,
-	0x3b, 0x24, 0xeb, 0x1d, 0x92, 0x51, 0xd3, 0xdd, 0xe6, 0xea, 0x33, 0x00, 0x00, 0xff, 0xff, 0xc2,
-	0xe7, 0xfe, 0xff, 0x71, 0x01, 0x00, 0x00,
+	0x14, 0x86, 0xed, 0x1b, 0x02, 0xc2, 0xdc, 0x7b, 0x75, 0xe5, 0xe1, 0x2a, 0xea, 0x70, 0x88, 0xa8,
+	0x2a, 0x65, 0x69, 0x22, 0xb5, 0x3c, 0x41, 0xaa, 0xbe, 0x40, 0x2a, 0x75, 0x45, 0x26, 0xb8, 0x89,
+	0x45, 0xb0, 0x51, 0x70, 0x24, 0xb2, 0x75, 0xeb, 0xda, 0xc7, 0xe8, 0xa3, 0x30, 0x32, 0xa2, 0x0e,
+	0xa8, 0x98, 0xa5, 0x23, 0x8f, 0x50, 0x61, 0x52, 0xb5, 0x43, 0xb7, 0xff, 0xf3, 0xff, 0xfb, 0xfc,
+	0x47, 0x87, 0x0c, 0xe7, 0xd3, 0x2c, 0x5a, 0x68, 0x55, 0xb2, 0x8c, 0x47, 0x69, 0x5e, 0xc9, 0xe9,
+	0x17, 0xb1, 0x34, 0x17, 0x32, 0x1b, 0x09, 0x39, 0xe1, 0xcb, 0x51, 0x5a, 0x08, 0x2e, 0x75, 0x38,
+	0x2f, 0x95, 0x56, 0xb4, 0xd3, 0x64, 0xce, 0x2e, 0x33, 0xa1, 0xf3, 0x6a, 0x1c, 0xa6, 0x6a, 0x16,
+	0x65, 0x2a, 0x53, 0x91, 0xf5, 0xc7, 0xd5, 0x83, 0x25, 0x0b, 0x56, 0x9d, 0xfe, 0x0d, 0xee, 0x88,
+	0x7b, 0x2b, 0x75, 0x59, 0xd3, 0x0b, 0xd2, 0xbe, 0x51, 0x45, 0x35, 0x93, 0x1e, 0xf6, 0x71, 0xf0,
+	0x3b, 0xfe, 0xb3, 0xda, 0xf6, 0xd1, 0xeb, 0xb6, 0xef, 0xc6, 0xb5, 0xe6, 0x8b, 0xa4, 0x31, 0xe9,
+	0x39, 0x71, 0xef, 0x59, 0x51, 0x71, 0xef, 0xd7, 0x4f, 0xa9, 0x93, 0x37, 0x78, 0xc2, 0xa4, 0x9b,
+	0x70, 0x36, 0x89, 0x99, 0x4e, 0x73, 0x1a, 0x92, 0x0e, 0x97, 0xba, 0x14, 0x7c, 0xe1, 0x61, 0xdf,
+	0x09, 0x7a, 0x57, 0x7f, 0xc3, 0x66, 0xd9, 0xd0, 0x56, 0xc7, 0xad, 0xe3, 0x90, 0xe4, 0x33, 0x44,
+	0xff, 0x11, 0x67, 0xca, 0x6b, 0x5b, 0xd0, 0x4d, 0x8e, 0x92, 0xfe, 0x27, 0x6d, 0xbe, 0x9c, 0x8b,
+	0xb2, 0xf6, 0x1c, 0x1f, 0x07, 0x4e, 0xd2, 0x10, 0xf5, 0x49, 0x2f, 0x65, 0xe5, 0x44, 0x48, 0x56,
+	0x08, 0x5d, 0x7b, 0x2d, 0x1f, 0x07, 0x6e, 0xf2, 0xfd, 0x29, 0x1e, 0xae, 0x77, 0x80, 0x36, 0x3b,
+	0x40, 0x87, 0x1d, 0xe0, 0x47, 0x03, 0xf8, 0xc5, 0x00, 0x5e, 0x19, 0xc0, 0x6b, 0x03, 0xf8, 0xcd,
+	0x00, 0x7e, 0x37, 0x80, 0x0e, 0x06, 0xf0, 0xf3, 0x1e, 0xd0, 0x7a, 0x0f, 0x68, 0xb3, 0x07, 0x34,
+	0x6e, 0xdb, 0xdb, 0x5c, 0x7f, 0x04, 0x00, 0x00, 0xff, 0xff, 0x1d, 0xec, 0xe2, 0x46, 0x8b, 0x01,
+	0x00, 0x00,
 }
 
 func (this *Entry) Equal(that interface{}) bool {
diff --git a/pkg/storage/chunk/storage/caching_index_client_test.go b/pkg/storage/chunk/storage/caching_index_client_test.go
index 2bfc5176dc2fe..9a5323ecea128 100644
--- a/pkg/storage/chunk/storage/caching_index_client_test.go
+++ b/pkg/storage/chunk/storage/caching_index_client_test.go
@@ -10,8 +10,8 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 var ctx = user.InjectOrgID(context.Background(), "1")
diff --git a/pkg/storage/chunk/storage/chunk_client_test.go b/pkg/storage/chunk/storage/chunk_client_test.go
index c3682a49147f4..bafa4b72afced 100644
--- a/pkg/storage/chunk/storage/chunk_client_test.go
+++ b/pkg/storage/chunk/storage/chunk_client_test.go
@@ -12,8 +12,8 @@ import (
 
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 func TestChunksBasic(t *testing.T) {
diff --git a/pkg/storage/chunk/storage/factory.go b/pkg/storage/chunk/storage/factory.go
index 73c96ed2ed6b6..607fdd41aade9 100644
--- a/pkg/storage/chunk/storage/factory.go
+++ b/pkg/storage/chunk/storage/factory.go
@@ -12,18 +12,19 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/aws"
-	"github.com/cortexproject/cortex/pkg/chunk/azure"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
-	"github.com/cortexproject/cortex/pkg/chunk/gcp"
-	"github.com/cortexproject/cortex/pkg/chunk/grpc"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
-	"github.com/cortexproject/cortex/pkg/chunk/openstack"
-	"github.com/cortexproject/cortex/pkg/chunk/purger"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/aws"
+	"github.com/grafana/loki/pkg/storage/chunk/azure"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk/cassandra"
+	"github.com/grafana/loki/pkg/storage/chunk/gcp"
+	"github.com/grafana/loki/pkg/storage/chunk/grpc"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
+	"github.com/grafana/loki/pkg/storage/chunk/openstack"
+	"github.com/grafana/loki/pkg/storage/chunk/purger"
 )
 
 // Supported storage engines
diff --git a/pkg/storage/chunk/storage/factory_test.go b/pkg/storage/chunk/storage/factory_test.go
index 13141238cef09..045be7982716b 100644
--- a/pkg/storage/chunk/storage/factory_test.go
+++ b/pkg/storage/chunk/storage/factory_test.go
@@ -11,11 +11,12 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cassandra"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 )
 
 func TestFactoryStop(t *testing.T) {
diff --git a/pkg/storage/chunk/storage/index_client_test.go b/pkg/storage/chunk/storage/index_client_test.go
index d83a8a8b66680..7d1874dbe6da4 100644
--- a/pkg/storage/chunk/storage/index_client_test.go
+++ b/pkg/storage/chunk/storage/index_client_test.go
@@ -9,8 +9,8 @@ import (
 	"github.com/go-kit/kit/log"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
 func TestIndexBasic(t *testing.T) {
diff --git a/pkg/storage/chunk/storage/metrics.go b/pkg/storage/chunk/storage/metrics.go
index 628c8924517af..0821134999fa0 100644
--- a/pkg/storage/chunk/storage/metrics.go
+++ b/pkg/storage/chunk/storage/metrics.go
@@ -6,7 +6,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/promauto"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // takes a chunk client and exposes metrics for its operations.
@@ -33,22 +33,22 @@ type chunkClientMetrics struct {
 func newChunkClientMetrics(reg prometheus.Registerer) chunkClientMetrics {
 	return chunkClientMetrics{
 		chunksPutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
-			Namespace: "cortex",
+			Namespace: "loki",
 			Name:      "chunk_store_stored_chunks_total",
 			Help:      "Total stored chunks per user.",
 		}, []string{"user"}),
 		chunksSizePutPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
-			Namespace: "cortex",
+			Namespace: "loki",
 			Name:      "chunk_store_stored_chunk_bytes_total",
 			Help:      "Total bytes stored in chunks per user.",
 		}, []string{"user"}),
 		chunksFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
-			Namespace: "cortex",
+			Namespace: "loki",
 			Name:      "chunk_store_fetched_chunks_total",
 			Help:      "Total fetched chunks per user.",
 		}, []string{"user"}),
 		chunksSizeFetchedPerUser: promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
-			Namespace: "cortex",
+			Namespace: "loki",
 			Name:      "chunk_store_fetched_chunk_bytes_total",
 			Help:      "Total bytes fetched in chunks per user.",
 		}, []string{"user"}),
diff --git a/pkg/storage/chunk/storage/utils_test.go b/pkg/storage/chunk/storage/utils_test.go
index a22f915ca08d0..450ee997ddfe8 100644
--- a/pkg/storage/chunk/storage/utils_test.go
+++ b/pkg/storage/chunk/storage/utils_test.go
@@ -5,12 +5,12 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/aws"
-	"github.com/cortexproject/cortex/pkg/chunk/cassandra"
-	"github.com/cortexproject/cortex/pkg/chunk/gcp"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/testutils"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/aws"
+	"github.com/grafana/loki/pkg/storage/chunk/cassandra"
+	"github.com/grafana/loki/pkg/storage/chunk/gcp"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/testutils"
 )
 
 const (
diff --git a/pkg/storage/chunk/table_manager.go b/pkg/storage/chunk/table_manager.go
index c4f46830471ea..d9a7a299ce770 100644
--- a/pkg/storage/chunk/table_manager.go
+++ b/pkg/storage/chunk/table_manager.go
@@ -40,31 +40,31 @@ type tableManagerMetrics struct {
 func newTableManagerMetrics(r prometheus.Registerer) *tableManagerMetrics {
 	m := tableManagerMetrics{}
 	m.syncTableDuration = promauto.With(r).NewHistogramVec(prometheus.HistogramOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "table_manager_sync_duration_seconds",
 		Help:      "Time spent synching tables.",
 		Buckets:   prometheus.DefBuckets,
 	}, []string{"operation", "status_code"})
 
 	m.tableCapacity = promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "table_capacity_units",
 		Help:      "Per-table capacity, measured in DynamoDB capacity units.",
 	}, []string{"op", "table"})
 
 	m.createFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "table_manager_create_failures",
 		Help:      "Number of table creation failures during the last table-manager reconciliation",
 	})
 	m.deleteFailures = promauto.With(r).NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "table_manager_delete_failures",
 		Help:      "Number of table deletion failures during the last table-manager reconciliation",
 	})
 
 	m.lastSuccessfulSync = promauto.With(r).NewGauge(prometheus.GaugeOpts{
-		Namespace: "cortex",
+		Namespace: "loki",
 		Name:      "table_manager_sync_success_timestamp_seconds",
 		Help:      "Timestamp of the last successful table manager sync.",
 	})
@@ -104,7 +104,6 @@ type TableManagerConfig struct {
 
 // UnmarshalYAML implements the yaml.Unmarshaler interface. To support RetentionPeriod.
 func (cfg *TableManagerConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
-
 	// If we call unmarshal on TableManagerConfig, it will call UnmarshalYAML leading to infinite recursion.
 	// To make unmarshal fill the plain data struct rather than calling UnmarshalYAML
 	// again, we have to hide it using a type indirection.
@@ -303,7 +302,6 @@ func (m *TableManager) checkAndCreateExtraTables() error {
 // single iteration of bucket retention loop
 func (m *TableManager) bucketRetentionIteration(ctx context.Context) error {
 	err := m.bucketClient.DeleteChunksBefore(ctx, mtime.Now().Add(-m.cfg.RetentionPeriod))
-
 	if err != nil {
 		level.Error(util_log.Logger).Log("msg", "error enforcing filesystem retention", "err", err)
 	}
@@ -374,7 +372,6 @@ func (m *TableManager) calculateExpectedTables() []TableDesc {
 				)
 				if now >= endTime+gracePeriodSecs+maxChunkAgeSecs {
 					isActive = false
-
 				}
 			}
 			if isActive {
diff --git a/pkg/storage/chunk/table_manager_test.go b/pkg/storage/chunk/table_manager_test.go
index 3b4477e389ebc..90a1e96b3f4a1 100644
--- a/pkg/storage/chunk/table_manager_test.go
+++ b/pkg/storage/chunk/table_manager_test.go
@@ -94,6 +94,7 @@ func (m *mockTableClient) UpdateTable(_ context.Context, current, expected Table
 
 func (*mockTableClient) Stop() {}
 
+// nolint
 func tmTest(t *testing.T, client *mockTableClient, tableManager *TableManager, name string, tm time.Time, expected []TableDesc) {
 	t.Run(name, func(t *testing.T) {
 		ctx := context.Background()
@@ -113,6 +114,7 @@ var activeScalingConfig = AutoScalingConfig{
 	MaxCapacity: autoScaleMax * 2,
 	TargetValue: autoScaleTarget,
 }
+
 var inactiveScalingConfig = AutoScalingConfig{
 	Enabled:     true,
 	MinCapacity: autoScaleMin,
@@ -323,7 +325,6 @@ func TestTableManager(t *testing.T) {
 			{Name: chunkTable2Prefix + "5", ProvisionedRead: read, ProvisionedWrite: write},
 		},
 	)
-
 }
 
 func TestTableManagerAutoscaleInactiveOnly(t *testing.T) {
diff --git a/pkg/storage/chunk/testutils/testutils.go b/pkg/storage/chunk/testutils/testutils.go
index 1f20596e97d1d..99019ff77a5ab 100644
--- a/pkg/storage/chunk/testutils/testutils.go
+++ b/pkg/storage/chunk/testutils/testutils.go
@@ -9,12 +9,13 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
-	promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/cortexproject/cortex/pkg/util/validation"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
+	promchunk "github.com/grafana/loki/pkg/storage/chunk/encoding"
 )
 
 const (
diff --git a/pkg/storage/chunk/util/parallel_chunk_fetch.go b/pkg/storage/chunk/util/parallel_chunk_fetch.go
index 2a68959adc2c3..8f7b07c98c04c 100644
--- a/pkg/storage/chunk/util/parallel_chunk_fetch.go
+++ b/pkg/storage/chunk/util/parallel_chunk_fetch.go
@@ -6,8 +6,9 @@ import (
 
 	otlog "github.com/opentracing/opentracing-go/log"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 const maxParallel = 1000
@@ -51,7 +52,7 @@ func GetParallelChunks(ctx context.Context, chunks []chunk.Chunk, f func(context
 		}()
 	}
 
-	var result = make([]chunk.Chunk, 0, len(chunks))
+	result := make([]chunk.Chunk, 0, len(chunks))
 	var lastErr error
 	for i := 0; i < len(chunks); i++ {
 		select {
diff --git a/pkg/storage/chunk/util/parallel_chunk_fetch_test.go b/pkg/storage/chunk/util/parallel_chunk_fetch_test.go
index 23e4d33143e1c..359c94bb66dd1 100644
--- a/pkg/storage/chunk/util/parallel_chunk_fetch_test.go
+++ b/pkg/storage/chunk/util/parallel_chunk_fetch_test.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 func BenchmarkGetParallelChunks(b *testing.B) {
diff --git a/pkg/storage/chunk/util/util.go b/pkg/storage/chunk/util/util.go
index 3241d74943650..64a250e010cda 100644
--- a/pkg/storage/chunk/util/util.go
+++ b/pkg/storage/chunk/util/util.go
@@ -4,14 +4,14 @@ import (
 	"bytes"
 	"context"
 	"fmt"
-	"os"
-
 	"io"
+	"os"
 
 	ot "github.com/opentracing/opentracing-go"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/util/math"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // Callback from an IndexQuery.
@@ -64,7 +64,6 @@ func DoParallelQueries(
 	for i := 0; i < len(queries); i++ {
 		err := <-incomingErrors
 		if err != nil {
-
 			lastErr = err
 		}
 	}
diff --git a/pkg/storage/hack/main.go b/pkg/storage/hack/main.go
index f069964fb288c..eb030478259e0 100644
--- a/pkg/storage/hack/main.go
+++ b/pkg/storage/hack/main.go
@@ -14,9 +14,6 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 
@@ -24,6 +21,9 @@ import (
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
 	lstore "github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/validation"
 )
 
diff --git a/pkg/storage/lazy_chunk.go b/pkg/storage/lazy_chunk.go
index 9be9dd3f1d8ea..6babce8f8fb01 100644
--- a/pkg/storage/lazy_chunk.go
+++ b/pkg/storage/lazy_chunk.go
@@ -5,7 +5,7 @@ import (
 	"errors"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 
 	"github.com/grafana/loki/pkg/chunkenc"
 	"github.com/grafana/loki/pkg/iter"
diff --git a/pkg/storage/lazy_chunk_test.go b/pkg/storage/lazy_chunk_test.go
index 896295b7aa66e..27c823da5ed37 100644
--- a/pkg/storage/lazy_chunk_test.go
+++ b/pkg/storage/lazy_chunk_test.go
@@ -6,7 +6,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 
@@ -14,6 +13,7 @@ import (
 	"github.com/grafana/loki/pkg/iter"
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql/log"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util"
 )
 
@@ -67,7 +67,6 @@ func TestLazyChunksPop(t *testing.T) {
 		{2, 1, 1, 1},
 		{3, 4, 3, 0},
 	} {
-
 		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
 			lc := &lazyChunks{}
 			for i := 0; i < tc.initial; i++ {
@@ -177,6 +176,7 @@ func (f fakeBlock) MaxTime() int64 { return f.maxt }
 func (fakeBlock) Iterator(context.Context, log.StreamPipeline) iter.EntryIterator {
 	return nil
 }
+
 func (fakeBlock) SampleIterator(context.Context, log.StreamSampleExtractor) iter.SampleIterator {
 	return nil
 }
diff --git a/pkg/storage/store.go b/pkg/storage/store.go
index 9949bf61d8f34..f1ff4b4610ddf 100644
--- a/pkg/storage/store.go
+++ b/pkg/storage/store.go
@@ -7,9 +7,6 @@ import (
 	"sort"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	cortex_local "github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 	"github.com/go-kit/kit/log"
@@ -23,6 +20,9 @@ import (
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/logqlmodel/stats"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_local "github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 	"github.com/grafana/loki/pkg/util"
 )
@@ -137,7 +137,7 @@ func NewStore(cfg Config, schemaCfg SchemaConfig, chunkStore chunk.Store, regist
 func NewTableClient(name string, cfg Config) (chunk.TableClient, error) {
 	if name == shipper.BoltDBShipperType {
 		name = "boltdb"
-		cfg.FSConfig = cortex_local.FSConfig{Directory: cfg.BoltDBShipperConfig.ActiveIndexDirectory}
+		cfg.FSConfig = chunk_local.FSConfig{Directory: cfg.BoltDBShipperConfig.ActiveIndexDirectory}
 	}
 	return storage.NewTableClient(name, cfg.Config, prometheus.DefaultRegisterer)
 }
diff --git a/pkg/storage/store_test.go b/pkg/storage/store_test.go
index 3187a38005dc1..023c102385cbb 100644
--- a/pkg/storage/store_test.go
+++ b/pkg/storage/store_test.go
@@ -22,15 +22,15 @@ import (
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/user"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	cortex_local "github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util/flagext"
 
 	"github.com/grafana/loki/pkg/iter"
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_local "github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 	"github.com/grafana/loki/pkg/util/marshal"
 	"github.com/grafana/loki/pkg/validation"
@@ -192,8 +192,8 @@ func getLocalStore() Store {
 
 	storeConfig := Config{
 		Config: storage.Config{
-			BoltDBConfig: cortex_local.BoltDBConfig{Directory: "/tmp/benchmark/index"},
-			FSConfig:     cortex_local.FSConfig{Directory: "/tmp/benchmark/chunks"},
+			BoltDBConfig: chunk_local.BoltDBConfig{Directory: "/tmp/benchmark/index"},
+			FSConfig:     chunk_local.FSConfig{Directory: "/tmp/benchmark/chunks"},
 		},
 		MaxChunkBatchSize: 10,
 	}
@@ -810,7 +810,7 @@ func TestStore_MultipleBoltDBShippersInConfig(t *testing.T) {
 
 	config := Config{
 		Config: storage.Config{
-			FSConfig: cortex_local.FSConfig{Directory: path.Join(tempDir, "chunks")},
+			FSConfig: chunk_local.FSConfig{Directory: path.Join(tempDir, "chunks")},
 		},
 		BoltDBShipperConfig: boltdbShipperConfig,
 	}
diff --git a/pkg/storage/stores/shipper/compactor/compactor.go b/pkg/storage/stores/shipper/compactor/compactor.go
index f9c58254a5663..16c0f5b09e6d3 100644
--- a/pkg/storage/stores/shipper/compactor/compactor.go
+++ b/pkg/storage/stores/shipper/compactor/compactor.go
@@ -10,11 +10,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/services"
 	"github.com/go-kit/kit/log/level"
@@ -22,6 +17,11 @@ import (
 	"github.com/prometheus/common/model"
 
 	loki_storage "github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/deletion"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/retention"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
diff --git a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store.go b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store.go
index 8660256c1f1e7..845bdbe601bd3 100644
--- a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store.go
+++ b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store.go
@@ -14,7 +14,7 @@ import (
 
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type (
@@ -35,9 +35,7 @@ const (
 	DeleteRequestsTableName = "delete_requests"
 )
 
-var (
-	ErrDeleteRequestNotFound = errors.New("could not find matching delete request")
-)
+var ErrDeleteRequestNotFound = errors.New("could not find matching delete request")
 
 type DeleteRequestsStore interface {
 	AddDeleteRequest(ctx context.Context, userID string, startTime, endTime model.Time, selectors []string) error
@@ -150,7 +148,6 @@ func (ds *deleteRequestsStore) GetDeleteRequest(ctx context.Context, userID, req
 		HashValue:        string(deleteRequestID),
 		RangeValuePrefix: []byte(userIDAndRequestID),
 	})
-
 	if err != nil {
 		return nil, err
 	}
@@ -206,7 +203,6 @@ func (ds *deleteRequestsStore) queryDeleteRequests(ctx context.Context, deleteQu
 
 			return true
 		})
-
 		if err != nil {
 			return nil, err
 		}
@@ -248,12 +244,10 @@ func parseDeleteRequestTimestamps(rangeValue []byte, deleteRequest DeleteRequest
 	from, err := strconv.ParseInt(hexParts[1], 16, 64)
 	if err != nil {
 		return deleteRequest, err
-
 	}
 	through, err := strconv.ParseInt(hexParts[2], 16, 64)
 	if err != nil {
 		return deleteRequest, err
-
 	}
 
 	deleteRequest.CreatedAt = model.Time(createdAt)
diff --git a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store_test.go b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store_test.go
index e916c526622ad..0502bb950c724 100644
--- a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store_test.go
+++ b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_store_test.go
@@ -13,7 +13,7 @@ import (
 	"github.com/prometheus/common/model"
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 )
 
 func TestDeleteRequestsStore(t *testing.T) {
diff --git a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table.go b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table.go
index 78e19c68d0d5d..64769027a15ff 100644
--- a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table.go
+++ b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table.go
@@ -12,11 +12,11 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 
 	"github.com/grafana/loki/pkg/chunkenc"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
diff --git a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table_test.go b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table_test.go
index 050c07f0e77bc..dba3b082222eb 100644
--- a/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table_test.go
+++ b/pkg/storage/stores/shipper/compactor/deletion/delete_requests_table_test.go
@@ -9,8 +9,8 @@ import (
 
 	"github.com/stretchr/testify/require"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/util"
diff --git a/pkg/storage/stores/shipper/compactor/retention/index.go b/pkg/storage/stores/shipper/compactor/retention/index.go
index 0c35f651b3484..713d0c87b6d91 100644
--- a/pkg/storage/stores/shipper/compactor/retention/index.go
+++ b/pkg/storage/stores/shipper/compactor/retention/index.go
@@ -9,11 +9,11 @@ import (
 
 	"github.com/prometheus/common/model"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/go-kit/kit/log/level"
 
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 )
 
diff --git a/pkg/storage/stores/shipper/compactor/retention/index_test.go b/pkg/storage/stores/shipper/compactor/retention/index_test.go
index 81c39abc36e0c..a7165cdad6291 100644
--- a/pkg/storage/stores/shipper/compactor/retention/index_test.go
+++ b/pkg/storage/stores/shipper/compactor/retention/index_test.go
@@ -5,10 +5,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/stretchr/testify/require"
 
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 func Test_schemaPeriodForTable(t *testing.T) {
diff --git a/pkg/storage/stores/shipper/compactor/retention/iterator.go b/pkg/storage/stores/shipper/compactor/retention/iterator.go
index b95ff5e2b68a0..7a2f7e25a3647 100644
--- a/pkg/storage/stores/shipper/compactor/retention/iterator.go
+++ b/pkg/storage/stores/shipper/compactor/retention/iterator.go
@@ -6,9 +6,10 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"go.etcd.io/bbolt"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 var (
diff --git a/pkg/storage/stores/shipper/compactor/retention/iterator_test.go b/pkg/storage/stores/shipper/compactor/retention/iterator_test.go
index 972c7c7151657..7bf7ead4a2b73 100644
--- a/pkg/storage/stores/shipper/compactor/retention/iterator_test.go
+++ b/pkg/storage/stores/shipper/compactor/retention/iterator_test.go
@@ -7,10 +7,11 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/bbolt"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 func Test_ChunkIterator(t *testing.T) {
diff --git a/pkg/storage/stores/shipper/compactor/retention/marker.go b/pkg/storage/stores/shipper/compactor/retention/marker.go
index ae3d8d2de7d40..159af1c85f98e 100644
--- a/pkg/storage/stores/shipper/compactor/retention/marker.go
+++ b/pkg/storage/stores/shipper/compactor/retention/marker.go
@@ -13,11 +13,11 @@ import (
 	"sync"
 	"time"
 
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
diff --git a/pkg/storage/stores/shipper/compactor/retention/retention.go b/pkg/storage/stores/shipper/compactor/retention/retention.go
index 3c739878211b2..ba0090af77c1e 100644
--- a/pkg/storage/stores/shipper/compactor/retention/retention.go
+++ b/pkg/storage/stores/shipper/compactor/retention/retention.go
@@ -7,7 +7,6 @@ import (
 	"fmt"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
@@ -16,6 +15,7 @@ import (
 
 	"github.com/grafana/loki/pkg/chunkenc"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 var (
diff --git a/pkg/storage/stores/shipper/compactor/retention/retention_test.go b/pkg/storage/stores/shipper/compactor/retention/retention_test.go
index 4799c6b280796..492c71a3ca832 100644
--- a/pkg/storage/stores/shipper/compactor/retention/retention_test.go
+++ b/pkg/storage/stores/shipper/compactor/retention/retention_test.go
@@ -12,8 +12,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/objectclient"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
@@ -24,6 +22,8 @@ import (
 
 	"github.com/grafana/loki/pkg/chunkenc"
 	"github.com/grafana/loki/pkg/logproto"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/objectclient"
 	"github.com/grafana/loki/pkg/validation"
 )
 
diff --git a/pkg/storage/stores/shipper/compactor/retention/series.go b/pkg/storage/stores/shipper/compactor/retention/series.go
index f33dbb7975985..690018d32ec54 100644
--- a/pkg/storage/stores/shipper/compactor/retention/series.go
+++ b/pkg/storage/stores/shipper/compactor/retention/series.go
@@ -1,9 +1,10 @@
 package retention
 
 import (
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"go.etcd.io/bbolt"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type userSeries struct {
diff --git a/pkg/storage/stores/shipper/compactor/retention/util_test.go b/pkg/storage/stores/shipper/compactor/retention/util_test.go
index e906d751e297d..2f159df90864b 100644
--- a/pkg/storage/stores/shipper/compactor/retention/util_test.go
+++ b/pkg/storage/stores/shipper/compactor/retention/util_test.go
@@ -7,10 +7,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	cortex_storage "github.com/cortexproject/cortex/pkg/chunk/storage"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/model"
@@ -22,6 +18,10 @@ import (
 
 	"github.com/grafana/loki/pkg/logql"
 	"github.com/grafana/loki/pkg/storage"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	chunk_storage "github.com/grafana/loki/pkg/storage/chunk/storage"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
 	"github.com/grafana/loki/pkg/validation"
@@ -115,7 +115,7 @@ type testStore struct {
 	indexDir, chunkDir string
 	schemaCfg          storage.SchemaConfig
 	t                  testing.TB
-	limits             cortex_storage.StoreLimits
+	limits             chunk_storage.StoreLimits
 }
 
 // testObjectClient is a testing object client
@@ -125,7 +125,7 @@ type testObjectClient struct {
 }
 
 func newTestObjectClient(path string) chunk.ObjectClient {
-	c, err := cortex_storage.NewObjectClient("filesystem", cortex_storage.Config{
+	c, err := chunk_storage.NewObjectClient("filesystem", chunk_storage.Config{
 		FSConfig: local.FSConfig{
 			Directory: path,
 		},
@@ -183,7 +183,7 @@ func (t *testStore) GetChunks(userID string, from, through model.Time, metric la
 }
 
 func (t *testStore) open() {
-	chunkStore, err := cortex_storage.NewStore(
+	chunkStore, err := chunk_storage.NewStore(
 		t.cfg.Config,
 		chunk.StoreConfig{},
 		schemaCfg.SchemaConfig,
@@ -223,7 +223,7 @@ func newTestStore(t testing.TB) *testStore {
 	require.NoError(t, schemaCfg.SchemaConfig.Validate())
 
 	config := storage.Config{
-		Config: cortex_storage.Config{
+		Config: chunk_storage.Config{
 			BoltDBConfig: local.BoltDBConfig{
 				Directory: indexDir,
 			},
@@ -240,7 +240,7 @@ func newTestStore(t testing.TB) *testStore {
 			Mode:                 shipper.ModeReadWrite,
 		},
 	}
-	chunkStore, err := cortex_storage.NewStore(
+	chunkStore, err := chunk_storage.NewStore(
 		config.Config,
 		chunk.StoreConfig{},
 		schemaCfg.SchemaConfig,
diff --git a/pkg/storage/stores/shipper/compactor/table.go b/pkg/storage/stores/shipper/compactor/table.go
index a6c79e745a3b1..ed86f08270dfa 100644
--- a/pkg/storage/stores/shipper/compactor/table.go
+++ b/pkg/storage/stores/shipper/compactor/table.go
@@ -8,13 +8,13 @@ import (
 	"path/filepath"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	util_math "github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/retention"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/util"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
diff --git a/pkg/storage/stores/shipper/compactor/table_test.go b/pkg/storage/stores/shipper/compactor/table_test.go
index c18d3a9b547ad..2f2a0e9e35d72 100644
--- a/pkg/storage/stores/shipper/compactor/table_test.go
+++ b/pkg/storage/stores/shipper/compactor/table_test.go
@@ -9,10 +9,10 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/bbolt"
 
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/compactor/retention"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
diff --git a/pkg/storage/stores/shipper/downloads/table.go b/pkg/storage/stores/shipper/downloads/table.go
index 5eb228a108906..947bf6addb0ed 100644
--- a/pkg/storage/stores/shipper/downloads/table.go
+++ b/pkg/storage/stores/shipper/downloads/table.go
@@ -13,8 +13,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	util_math "github.com/cortexproject/cortex/pkg/util/math"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
@@ -22,6 +20,8 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
diff --git a/pkg/storage/stores/shipper/downloads/table_manager.go b/pkg/storage/stores/shipper/downloads/table_manager.go
index 548306b5c47f1..bc0f5377cf50e 100644
--- a/pkg/storage/stores/shipper/downloads/table_manager.go
+++ b/pkg/storage/stores/shipper/downloads/table_manager.go
@@ -12,13 +12,13 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
diff --git a/pkg/storage/stores/shipper/downloads/table_manager_test.go b/pkg/storage/stores/shipper/downloads/table_manager_test.go
index a21746a4db38e..abdeb7c6c8330 100644
--- a/pkg/storage/stores/shipper/downloads/table_manager_test.go
+++ b/pkg/storage/stores/shipper/downloads/table_manager_test.go
@@ -10,11 +10,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk/util"
-
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
 
diff --git a/pkg/storage/stores/shipper/downloads/table_test.go b/pkg/storage/stores/shipper/downloads/table_test.go
index 7aa35dcd5efce..bff6a55fcffe1 100644
--- a/pkg/storage/stores/shipper/downloads/table_test.go
+++ b/pkg/storage/stores/shipper/downloads/table_test.go
@@ -10,10 +10,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
 
diff --git a/pkg/storage/stores/shipper/shipper_index_client.go b/pkg/storage/stores/shipper/shipper_index_client.go
index 4b6bd5803a225..1af393812f1e1 100644
--- a/pkg/storage/stores/shipper/shipper_index_client.go
+++ b/pkg/storage/stores/shipper/shipper_index_client.go
@@ -10,9 +10,6 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log/level"
@@ -20,6 +17,9 @@ import (
 	"github.com/weaveworks/common/instrument"
 	"go.etcd.io/bbolt"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/downloads"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/uploads"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
diff --git a/pkg/storage/stores/shipper/table_client.go b/pkg/storage/stores/shipper/table_client.go
index f64fbdae38f23..4a82121644d6e 100644
--- a/pkg/storage/stores/shipper/table_client.go
+++ b/pkg/storage/stores/shipper/table_client.go
@@ -7,9 +7,9 @@ import (
 
 	"github.com/go-kit/kit/log/level"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/storage/stores/util"
 )
 
diff --git a/pkg/storage/stores/shipper/table_client_test.go b/pkg/storage/stores/shipper/table_client_test.go
index 76d7fa278538c..bdc8f8d82146b 100644
--- a/pkg/storage/stores/shipper/table_client_test.go
+++ b/pkg/storage/stores/shipper/table_client_test.go
@@ -8,12 +8,11 @@ import (
 	"path"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	"github.com/cortexproject/cortex/pkg/chunk/storage"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	"github.com/grafana/loki/pkg/storage/chunk/storage"
 	"github.com/grafana/loki/pkg/storage/stores/util"
 )
 
diff --git a/pkg/storage/stores/shipper/testutil/testutil.go b/pkg/storage/stores/shipper/testutil/testutil.go
index 7b3c3ff449bdc..98e06684b6690 100644
--- a/pkg/storage/stores/shipper/testutil/testutil.go
+++ b/pkg/storage/stores/shipper/testutil/testutil.go
@@ -10,12 +10,13 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/klauspost/compress/gzip"
 	"github.com/stretchr/testify/require"
 	"go.etcd.io/bbolt"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 var boltBucketName = []byte("index")
diff --git a/pkg/storage/stores/shipper/uploads/table.go b/pkg/storage/stores/shipper/uploads/table.go
index 46cd871538253..e45a97b8487dc 100644
--- a/pkg/storage/stores/shipper/uploads/table.go
+++ b/pkg/storage/stores/shipper/uploads/table.go
@@ -13,14 +13,14 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/go-kit/kit/log/level"
 	"go.etcd.io/bbolt"
 
 	"github.com/grafana/loki/pkg/chunkenc"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
@@ -203,7 +203,6 @@ func (lt *Table) MultiQueries(ctx context.Context, queries []chunk.IndexQuery, c
 			}
 			return nil
 		})
-
 		if err != nil {
 			return err
 		}
diff --git a/pkg/storage/stores/shipper/uploads/table_manager.go b/pkg/storage/stores/shipper/uploads/table_manager.go
index 9860d6b639f6e..5ed5ccf1e9540 100644
--- a/pkg/storage/stores/shipper/uploads/table_manager.go
+++ b/pkg/storage/stores/shipper/uploads/table_manager.go
@@ -11,14 +11,14 @@ import (
 	"sync"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 	"github.com/go-kit/kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/util"
 )
 
diff --git a/pkg/storage/stores/shipper/uploads/table_manager_test.go b/pkg/storage/stores/shipper/uploads/table_manager_test.go
index a9def651c3fc1..7cea38373d9ed 100644
--- a/pkg/storage/stores/shipper/uploads/table_manager_test.go
+++ b/pkg/storage/stores/shipper/uploads/table_manager_test.go
@@ -8,10 +8,10 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
 
diff --git a/pkg/storage/stores/shipper/uploads/table_test.go b/pkg/storage/stores/shipper/uploads/table_test.go
index 8ba5d6b0f448f..763e84830f403 100644
--- a/pkg/storage/stores/shipper/uploads/table_test.go
+++ b/pkg/storage/stores/shipper/uploads/table_test.go
@@ -12,11 +12,11 @@ import (
 	"testing"
 	"time"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/klauspost/compress/gzip"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
 
diff --git a/pkg/storage/stores/shipper/util/queries.go b/pkg/storage/stores/shipper/util/queries.go
index 1150e2086f14c..7a8fdea181262 100644
--- a/pkg/storage/stores/shipper/util/queries.go
+++ b/pkg/storage/stores/shipper/util/queries.go
@@ -5,9 +5,10 @@ import (
 	"sync"
 	"unsafe"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	util_math "github.com/cortexproject/cortex/pkg/util/math"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 const maxQueriesPerGoroutine = 100
diff --git a/pkg/storage/stores/shipper/util/queries_test.go b/pkg/storage/stores/shipper/util/queries_test.go
index fdbc3579bbd0c..ed5ab80cf2404 100644
--- a/pkg/storage/stores/shipper/util/queries_test.go
+++ b/pkg/storage/stores/shipper/util/queries_test.go
@@ -6,9 +6,10 @@ import (
 	"sync"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	chunk_util "github.com/cortexproject/cortex/pkg/chunk/util"
 	"github.com/stretchr/testify/require"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	chunk_util "github.com/grafana/loki/pkg/storage/chunk/util"
 )
 
 type mockTableQuerier struct {
@@ -72,7 +73,6 @@ func TestDoParallelQueries(t *testing.T) {
 			tableQuerier.hasQueries(t, tc.queryCount)
 		})
 	}
-
 }
 
 func buildQueries(n int) []chunk.IndexQuery {
diff --git a/pkg/storage/stores/shipper/util/util.go b/pkg/storage/stores/shipper/util/util.go
index bed0b4e87bd8b..cd3ae1934bff7 100644
--- a/pkg/storage/stores/shipper/util/util.go
+++ b/pkg/storage/stores/shipper/util/util.go
@@ -10,12 +10,13 @@ import (
 	"strings"
 	"sync"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 	"github.com/go-kit/kit/log/level"
 	gzip "github.com/klauspost/pgzip"
 	"go.etcd.io/bbolt"
+
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 )
 
 const delimiter = "/"
diff --git a/pkg/storage/stores/shipper/util/util_test.go b/pkg/storage/stores/shipper/util/util_test.go
index 6275020a79807..a10845c5e2638 100644
--- a/pkg/storage/stores/shipper/util/util_test.go
+++ b/pkg/storage/stores/shipper/util/util_test.go
@@ -7,11 +7,11 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/local"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/local"
 	"github.com/grafana/loki/pkg/storage/stores/shipper/testutil"
 )
 
diff --git a/pkg/storage/stores/util/object_client.go b/pkg/storage/stores/util/object_client.go
index 380c99e668c41..4d571abb49460 100644
--- a/pkg/storage/stores/util/object_client.go
+++ b/pkg/storage/stores/util/object_client.go
@@ -5,7 +5,7 @@ import (
 	"io"
 	"strings"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 type PrefixedObjectClient struct {
@@ -45,6 +45,7 @@ func (p PrefixedObjectClient) DeleteObject(ctx context.Context, objectKey string
 func (p PrefixedObjectClient) Stop() {
 	p.downstreamClient.Stop()
 }
+
 func NewPrefixedObjectClient(downstreamClient chunk.ObjectClient, prefix string) chunk.ObjectClient {
 	return PrefixedObjectClient{downstreamClient: downstreamClient, prefix: prefix}
 }
diff --git a/pkg/storage/util_test.go b/pkg/storage/util_test.go
index ecc0cdf1b36c2..2c3b9c0a6570e 100644
--- a/pkg/storage/util_test.go
+++ b/pkg/storage/util_test.go
@@ -8,8 +8,6 @@ import (
 
 	util_log "github.com/cortexproject/cortex/pkg/util/log"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
-	"github.com/cortexproject/cortex/pkg/chunk/cache"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/davecgh/go-spew/spew"
@@ -20,10 +18,14 @@ import (
 	"github.com/grafana/loki/pkg/chunkenc"
 	"github.com/grafana/loki/pkg/logproto"
 	"github.com/grafana/loki/pkg/logql"
+	"github.com/grafana/loki/pkg/storage/chunk"
+	"github.com/grafana/loki/pkg/storage/chunk/cache"
 )
 
-var fooLabelsWithName = "{foo=\"bar\", __name__=\"logs\"}"
-var fooLabels = "{foo=\"bar\"}"
+var (
+	fooLabelsWithName = "{foo=\"bar\", __name__=\"logs\"}"
+	fooLabels         = "{foo=\"bar\"}"
+)
 
 var from = time.Unix(0, time.Millisecond.Nanoseconds())
 
@@ -154,8 +156,10 @@ type mockChunkStore struct {
 
 // mockChunkStore cannot implement both chunk.Store and chunk.Client,
 // since there is a conflict in signature for DeleteChunk method.
-var _ chunk.Store = &mockChunkStore{}
-var _ chunk.Client = &mockChunkStoreClient{}
+var (
+	_ chunk.Store  = &mockChunkStore{}
+	_ chunk.Client = &mockChunkStoreClient{}
+)
 
 func newMockChunkStore(streams []*logproto.Stream) *mockChunkStore {
 	chunks := make([]chunk.Chunk, 0, len(streams))
@@ -169,9 +173,11 @@ func (m *mockChunkStore) Put(ctx context.Context, chunks []chunk.Chunk) error {
 func (m *mockChunkStore) PutOne(ctx context.Context, from, through model.Time, chunk chunk.Chunk) error {
 	return nil
 }
+
 func (m *mockChunkStore) LabelValuesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string, labelName string) ([]string, error) {
 	return nil, nil
 }
+
 func (m *mockChunkStore) LabelNamesForMetricName(ctx context.Context, userID string, from, through model.Time, metricName string) ([]string, error) {
 	return nil, nil
 }
@@ -187,6 +193,7 @@ func (m *mockChunkStore) Stop() {}
 func (m *mockChunkStore) Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]chunk.Chunk, error) {
 	return nil, nil
 }
+
 func (m *mockChunkStore) GetChunkFetcher(_ model.Time) *chunk.Fetcher {
 	return nil
 }
diff --git a/pkg/util/server/error.go b/pkg/util/server/error.go
index 12e2073692a8c..da41c229adc14 100644
--- a/pkg/util/server/error.go
+++ b/pkg/util/server/error.go
@@ -5,12 +5,12 @@ import (
 	"errors"
 	"net/http"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
 
 	"github.com/grafana/loki/pkg/logqlmodel"
+	"github.com/grafana/loki/pkg/storage/chunk"
 )
 
 // StatusClientClosedRequest is the status code for when a client request cancellation of an http request
diff --git a/pkg/util/server/error_test.go b/pkg/util/server/error_test.go
index 7ce78ea15934b..948ed2fb9abb9 100644
--- a/pkg/util/server/error_test.go
+++ b/pkg/util/server/error_test.go
@@ -9,13 +9,13 @@ import (
 	"net/http/httptest"
 	"testing"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/prometheus/prometheus/promql"
 	"github.com/stretchr/testify/require"
 	"github.com/weaveworks/common/httpgrpc"
 	"github.com/weaveworks/common/user"
 
 	"github.com/grafana/loki/pkg/logqlmodel"
+	"github.com/grafana/loki/pkg/storage/chunk"
 	"github.com/grafana/loki/pkg/util"
 )
 
diff --git a/vendor/github.com/alicebob/gopher-json/LICENSE b/vendor/github.com/alicebob/gopher-json/LICENSE
new file mode 100644
index 0000000000000..68a49daad8ff7
--- /dev/null
+++ b/vendor/github.com/alicebob/gopher-json/LICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/vendor/github.com/alicebob/gopher-json/README.md b/vendor/github.com/alicebob/gopher-json/README.md
new file mode 100644
index 0000000000000..84fe1bdf94937
--- /dev/null
+++ b/vendor/github.com/alicebob/gopher-json/README.md
@@ -0,0 +1,7 @@
+# gopher-json [![GoDoc](https://godoc.org/layeh.com/gopher-json?status.svg)](https://godoc.org/layeh.com/gopher-json)
+
+Package json is a simple JSON encoder/decoder for [gopher-lua](https://github.com/yuin/gopher-lua).
+
+## License
+
+Public domain
diff --git a/vendor/github.com/alicebob/gopher-json/doc.go b/vendor/github.com/alicebob/gopher-json/doc.go
new file mode 100644
index 0000000000000..b73aeafd53015
--- /dev/null
+++ b/vendor/github.com/alicebob/gopher-json/doc.go
@@ -0,0 +1,33 @@
+// Package json is a simple JSON encoder/decoder for gopher-lua.
+//
+// Documentation
+//
+// The following functions are exposed by the library:
+//  decode(string): Decodes a JSON string. Returns nil and an error string if
+//                  the string could not be decoded.
+//  encode(value):  Encodes a value into a JSON string. Returns nil and an error
+//                  string if the value could not be encoded.
+//
+// The following types are supported:
+//
+//  Lua      | JSON
+//  ---------+-----
+//  nil      | null
+//  number   | number
+//  string   | string
+//  table    | object: when table is non-empty and has only string keys
+//           | array:  when table is empty, or has only sequential numeric keys
+//           |         starting from 1
+//
+// Attempting to encode any other Lua type will result in an error.
+//
+// Example
+//
+// Below is an example usage of the library:
+//  import (
+//      luajson "layeh.com/gopher-json"
+//  )
+//
+//  L := lua.NewState()
+//  luajson.Preload(s)
+package json
diff --git a/vendor/github.com/alicebob/gopher-json/json.go b/vendor/github.com/alicebob/gopher-json/json.go
new file mode 100644
index 0000000000000..11561333de948
--- /dev/null
+++ b/vendor/github.com/alicebob/gopher-json/json.go
@@ -0,0 +1,189 @@
+package json
+
+import (
+	"encoding/json"
+	"errors"
+
+	"github.com/yuin/gopher-lua"
+)
+
+// Preload adds json to the given Lua state's package.preload table. After it
+// has been preloaded, it can be loaded using require:
+//
+//  local json = require("json")
+func Preload(L *lua.LState) {
+	L.PreloadModule("json", Loader)
+}
+
+// Loader is the module loader function.
+func Loader(L *lua.LState) int {
+	t := L.NewTable()
+	L.SetFuncs(t, api)
+	L.Push(t)
+	return 1
+}
+
+var api = map[string]lua.LGFunction{
+	"decode": apiDecode,
+	"encode": apiEncode,
+}
+
+func apiDecode(L *lua.LState) int {
+	if L.GetTop() != 1 {
+		L.Error(lua.LString("bad argument #1 to decode"), 1)
+		return 0
+	}
+	str := L.CheckString(1)
+
+	value, err := Decode(L, []byte(str))
+	if err != nil {
+		L.Push(lua.LNil)
+		L.Push(lua.LString(err.Error()))
+		return 2
+	}
+	L.Push(value)
+	return 1
+}
+
+func apiEncode(L *lua.LState) int {
+	if L.GetTop() != 1 {
+		L.Error(lua.LString("bad argument #1 to encode"), 1)
+		return 0
+	}
+	value := L.CheckAny(1)
+
+	data, err := Encode(value)
+	if err != nil {
+		L.Push(lua.LNil)
+		L.Push(lua.LString(err.Error()))
+		return 2
+	}
+	L.Push(lua.LString(string(data)))
+	return 1
+}
+
+var (
+	errNested      = errors.New("cannot encode recursively nested tables to JSON")
+	errSparseArray = errors.New("cannot encode sparse array")
+	errInvalidKeys = errors.New("cannot encode mixed or invalid key types")
+)
+
+type invalidTypeError lua.LValueType
+
+func (i invalidTypeError) Error() string {
+	return `cannot encode ` + lua.LValueType(i).String() + ` to JSON`
+}
+
+// Encode returns the JSON encoding of value.
+func Encode(value lua.LValue) ([]byte, error) {
+	return json.Marshal(jsonValue{
+		LValue:  value,
+		visited: make(map[*lua.LTable]bool),
+	})
+}
+
+type jsonValue struct {
+	lua.LValue
+	visited map[*lua.LTable]bool
+}
+
+func (j jsonValue) MarshalJSON() (data []byte, err error) {
+	switch converted := j.LValue.(type) {
+	case lua.LBool:
+		data, err = json.Marshal(bool(converted))
+	case lua.LNumber:
+		data, err = json.Marshal(float64(converted))
+	case *lua.LNilType:
+		data = []byte(`null`)
+	case lua.LString:
+		data, err = json.Marshal(string(converted))
+	case *lua.LTable:
+		if j.visited[converted] {
+			return nil, errNested
+		}
+		j.visited[converted] = true
+
+		key, value := converted.Next(lua.LNil)
+
+		switch key.Type() {
+		case lua.LTNil: // empty table
+			data = []byte(`[]`)
+		case lua.LTNumber:
+			arr := make([]jsonValue, 0, converted.Len())
+			expectedKey := lua.LNumber(1)
+			for key != lua.LNil {
+				if key.Type() != lua.LTNumber {
+					err = errInvalidKeys
+					return
+				}
+				if expectedKey != key {
+					err = errSparseArray
+					return
+				}
+				arr = append(arr, jsonValue{value, j.visited})
+				expectedKey++
+				key, value = converted.Next(key)
+			}
+			data, err = json.Marshal(arr)
+		case lua.LTString:
+			obj := make(map[string]jsonValue)
+			for key != lua.LNil {
+				if key.Type() != lua.LTString {
+					err = errInvalidKeys
+					return
+				}
+				obj[key.String()] = jsonValue{value, j.visited}
+				key, value = converted.Next(key)
+			}
+			data, err = json.Marshal(obj)
+		default:
+			err = errInvalidKeys
+		}
+	default:
+		err = invalidTypeError(j.LValue.Type())
+	}
+	return
+}
+
+// Decode converts the JSON encoded data to Lua values.
+func Decode(L *lua.LState, data []byte) (lua.LValue, error) {
+	var value interface{}
+	err := json.Unmarshal(data, &value)
+	if err != nil {
+		return nil, err
+	}
+	return DecodeValue(L, value), nil
+}
+
+// DecodeValue converts the value to a Lua value.
+//
+// This function only converts values that the encoding/json package decodes to.
+// All other values will return lua.LNil.
+func DecodeValue(L *lua.LState, value interface{}) lua.LValue {
+	switch converted := value.(type) {
+	case bool:
+		return lua.LBool(converted)
+	case float64:
+		return lua.LNumber(converted)
+	case string:
+		return lua.LString(converted)
+	case json.Number:
+		return lua.LString(converted)
+	case []interface{}:
+		arr := L.CreateTable(len(converted), 0)
+		for _, item := range converted {
+			arr.Append(DecodeValue(L, item))
+		}
+		return arr
+	case map[string]interface{}:
+		tbl := L.CreateTable(0, len(converted))
+		for key, item := range converted {
+			tbl.RawSetH(lua.LString(key), DecodeValue(L, item))
+		}
+		return tbl
+	case nil:
+		return lua.LNil
+	}
+
+	return lua.LNil
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/.gitignore b/vendor/github.com/alicebob/miniredis/v2/.gitignore
new file mode 100644
index 0000000000000..7ba06b06ce676
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/.gitignore
@@ -0,0 +1,4 @@
+/integration/redis_src/
+/integration/dump.rdb
+*.swp
+/integration/nodes.conf
diff --git a/vendor/github.com/alicebob/miniredis/v2/.travis.yml b/vendor/github.com/alicebob/miniredis/v2/.travis.yml
new file mode 100644
index 0000000000000..87bbb8d255a5a
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/.travis.yml
@@ -0,0 +1,16 @@
+language: go
+arch:
+  - ppc64le
+  - amd64
+os: linux
+
+before_script:
+  - (cd ./integration && ./get_redis.sh)
+
+install: go get -t
+
+script: make test testrace int
+
+go:
+  - "1.14"
+  - "1.15"
diff --git a/vendor/github.com/alicebob/miniredis/v2/CHANGELOG.md b/vendor/github.com/alicebob/miniredis/v2/CHANGELOG.md
new file mode 100644
index 0000000000000..464f96aa03009
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/CHANGELOG.md
@@ -0,0 +1,127 @@
+## Changelog
+
+### v2.14.3
+
+- fix problem where Lua code didn't set the selected DB
+- update to redis 6.0.10 (thanks @lazappa)
+
+
+### v2.14.2
+
+- update LUA dependency
+- deal with (p)unsubscribe when there are no channels
+
+
+### v2.14.1
+
+- mod tidy
+
+
+### v2.14.0
+
+- support for HELLO and the RESP3 protocol
+- KEEPTTL in SET (thanks @johnpena)
+
+
+### v2.13.3
+
+- support Go 1.14 and 1.15
+- update the `Check...()` methods
+- support for XREAD (thanks @pieterlexis)
+
+
+### v2.13.2
+
+- Use SAN instead of CN in self signed cert for testing (thanks @johejo)
+- Travis CI now tests against the most recent two versions of Go (thanks @johejo)
+- changed unit and integration tests to compare raw payloads, not parsed payloads
+- remove "redigo" dependency
+
+
+### v2.13.1
+
+- added HSTRLEN
+- minimal support for ACL users in AUTH
+
+
+### v2.13.0
+
+- added RunTLS(...)
+- added SetError(...)
+
+
+### v2.12.0
+
+- redis 6
+- Lua json update (thanks @gsmith85)
+- CLUSTER commands (thanks @kratisto)
+- fix TOUCH
+- fix a shutdown race condition
+
+
+### v2.11.4
+
+- ZUNIONSTORE now supports standard set types (thanks @wshirey)
+
+
+### v2.11.3
+
+- support for TOUCH (thanks @cleroux)
+- support for cluster and stream commands (thanks @kak-tus)
+
+
+### v2.11.2
+
+- make sure Lua code is executed concurrently
+- add command GEORADIUSBYMEMBER (thanks @kyeett)
+
+
+### v2.11.1
+
+- globals protection for Lua code (thanks @vk-outreach)
+- HSET update (thanks @carlgreen)
+- fix BLPOP block on shutdown (thanks @Asalle)
+
+
+### v2.11.0
+
+- added XRANGE/XREVRANGE, XADD, and XLEN (thanks @skateinmars)
+- added GEODIST
+- improved precision for geohashes, closer to what real redis does
+- use 128bit floats internally for INCRBYFLOAT and related (thanks @timnd)
+
+
+### v2.10.1
+
+- added m.Server()
+
+
+### v2.10.0
+
+- added UNLINK
+- fix DEL zero-argument case
+- cleanup some direct access commands
+- added GEOADD, GEOPOS, GEORADIUS, and GEORADIUS_RO
+
+
+### v2.9.1
+
+- fix issue with ZRANGEBYLEX
+- fix issue with BRPOPLPUSH and direct access
+
+
+### v2.9.0
+
+- proper versioned import of github.com/gomodule/redigo (thanks @yfei1)
+- fix messages generated by PSUBSCRIBE
+- optional internal seed (thanks @zikaeroh)
+
+
+### v2.8.0
+
+Proper `v2` in go.mod.
+
+
+### older
+
+See https://github.com/alicebob/miniredis/releases for the full changelog
diff --git a/vendor/github.com/alicebob/miniredis/v2/LICENSE b/vendor/github.com/alicebob/miniredis/v2/LICENSE
new file mode 100644
index 0000000000000..bb02657caa27e
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Harmen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/alicebob/miniredis/v2/Makefile b/vendor/github.com/alicebob/miniredis/v2/Makefile
new file mode 100644
index 0000000000000..2aa4cd2c57340
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/Makefile
@@ -0,0 +1,12 @@
+.PHONY: all test testrace int
+
+all: test
+
+test:
+	go test ./...
+
+testrace:
+	go test -race ./...
+
+int:
+	${MAKE} -C integration all
diff --git a/vendor/github.com/alicebob/miniredis/v2/README.md b/vendor/github.com/alicebob/miniredis/v2/README.md
new file mode 100644
index 0000000000000..f9d64004c8eda
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/README.md
@@ -0,0 +1,324 @@
+# Miniredis
+
+Pure Go Redis test server, used in Go unittests.
+
+
+##
+
+Sometimes you want to test code which uses Redis, without making it a full-blown
+integration test.
+Miniredis implements (parts of) the Redis server, to be used in unittests. It
+enables a simple, cheap, in-memory, Redis replacement, with a real TCP interface. Think of it as the Redis version of `net/http/httptest`.
+
+It saves you from using mock code, and since the redis server lives in the
+test process you can query for values directly, without going through the server
+stack.
+
+There are no dependencies on external binaries, so you can easily integrate it in automated build processes.
+
+Be sure to import v2:
+```
+import "github.com/alicebob/miniredis/v2"
+```
+
+## Commands
+
+Implemented commands:
+
+ - Connection (complete)
+   - AUTH -- see RequireAuth()
+   - ECHO
+   - HELLO -- see RequireUserAuth()
+   - PING
+   - SELECT
+   - SWAPDB
+   - QUIT
+ - Key
+   - DEL
+   - EXISTS
+   - EXPIRE
+   - EXPIREAT
+   - KEYS
+   - MOVE
+   - PERSIST
+   - PEXPIRE
+   - PEXPIREAT
+   - PTTL
+   - RENAME
+   - RENAMENX
+   - RANDOMKEY -- see m.Seed(...)
+   - SCAN
+   - TOUCH
+   - TTL
+   - TYPE
+   - UNLINK
+ - Transactions (complete)
+   - DISCARD
+   - EXEC
+   - MULTI
+   - UNWATCH
+   - WATCH
+ - Server
+   - DBSIZE
+   - FLUSHALL
+   - FLUSHDB
+   - TIME -- returns time.Now() or value set by SetTime()
+ - String keys (complete)
+   - APPEND
+   - BITCOUNT
+   - BITOP
+   - BITPOS
+   - DECR
+   - DECRBY
+   - GET
+   - GETBIT
+   - GETRANGE
+   - GETSET
+   - INCR
+   - INCRBY
+   - INCRBYFLOAT
+   - MGET
+   - MSET
+   - MSETNX
+   - PSETEX
+   - SET
+   - SETBIT
+   - SETEX
+   - SETNX
+   - SETRANGE
+   - STRLEN
+ - Hash keys (complete)
+   - HDEL
+   - HEXISTS
+   - HGET
+   - HGETALL
+   - HINCRBY
+   - HINCRBYFLOAT
+   - HKEYS
+   - HLEN
+   - HMGET
+   - HMSET
+   - HSET
+   - HSETNX
+   - HSTRLEN
+   - HVALS
+   - HSCAN
+ - List keys (complete)
+   - BLPOP
+   - BRPOP
+   - BRPOPLPUSH
+   - LINDEX
+   - LINSERT
+   - LLEN
+   - LPOP
+   - LPUSH
+   - LPUSHX
+   - LRANGE
+   - LREM
+   - LSET
+   - LTRIM
+   - RPOP
+   - RPOPLPUSH
+   - RPUSH
+   - RPUSHX
+ - Pub/Sub (complete)
+   - PSUBSCRIBE
+   - PUBLISH
+   - PUBSUB
+   - PUNSUBSCRIBE
+   - SUBSCRIBE
+   - UNSUBSCRIBE
+ - Set keys (complete)
+   - SADD
+   - SCARD
+   - SDIFF
+   - SDIFFSTORE
+   - SINTER
+   - SINTERSTORE
+   - SISMEMBER
+   - SMEMBERS
+   - SMOVE
+   - SPOP -- see m.Seed(...)
+   - SRANDMEMBER -- see m.Seed(...)
+   - SREM
+   - SUNION
+   - SUNIONSTORE
+   - SSCAN
+ - Sorted Set keys (complete)
+   - ZADD
+   - ZCARD
+   - ZCOUNT
+   - ZINCRBY
+   - ZINTERSTORE
+   - ZLEXCOUNT
+   - ZPOPMIN
+   - ZPOPMAX
+   - ZRANGE
+   - ZRANGEBYLEX
+   - ZRANGEBYSCORE
+   - ZRANK
+   - ZREM
+   - ZREMRANGEBYLEX
+   - ZREMRANGEBYRANK
+   - ZREMRANGEBYSCORE
+   - ZREVRANGE
+   - ZREVRANGEBYLEX
+   - ZREVRANGEBYSCORE
+   - ZREVRANK
+   - ZSCORE
+   - ZUNIONSTORE
+   - ZSCAN
+ - Stream keys
+   - XACK
+   - XADD
+   - XDEL
+   - XGROUP CREATE
+   - XINFO STREAM -- partly
+   - XLEN
+   - XRANGE
+   - XREAD -- partly
+   - XREADGROUP -- partly
+   - XREVRANGE
+ - Scripting
+   - EVAL
+   - EVALSHA
+   - SCRIPT LOAD
+   - SCRIPT EXISTS
+   - SCRIPT FLUSH
+ - GEO
+   - GEOADD
+   - GEODIST
+   - ~~GEOHASH~~
+   - GEOPOS
+   - GEORADIUS
+   - GEORADIUS_RO
+   - GEORADIUSBYMEMBER
+   - GEORADIUSBYMEMBER_RO
+ - Server
+   - COMMAND -- partly
+ - Cluster
+   - CLUSTER SLOTS
+   - CLUSTER KEYSLOT
+   - CLUSTER NODES
+
+
+## TTLs, key expiration, and time
+
+Since miniredis is intended to be used in unittests TTLs don't decrease
+automatically. You can use `TTL()` to get the TTL (as a time.Duration) of a
+key. It will return 0 when no TTL is set.
+
+`m.FastForward(d)` can be used to decrement all TTLs. All TTLs which become <=
+0 will be removed.
+
+EXPIREAT and PEXPIREAT values will be
+converted to a duration. For that you can either set m.SetTime(t) to use that
+time as the base for the (P)EXPIREAT conversion, or don't call SetTime(), in
+which case time.Now() will be used.
+
+SetTime() also sets the value returned by TIME, which defaults to time.Now().
+It is not updated by FastForward, only by SetTime.
+
+## Randomness and Seed()
+
+Miniredis will use `math/rand`'s global RNG for randomness unless a seed is
+provided by calling `m.Seed(...)`. If a seed is provided, then miniredis will
+use its own RNG based on that seed.
+
+Commands which use randomness are: RANDOMKEY, SPOP, and SRANDMEMBER.
+
+## Example
+
+``` Go
+
+import (
+    ...
+    "github.com/alicebob/miniredis/v2"
+    ...
+)
+
+func TestSomething(t *testing.T) {
+	s, err := miniredis.Run()
+	if err != nil {
+		panic(err)
+	}
+	defer s.Close()
+
+	// Optionally set some keys your code expects:
+	s.Set("foo", "bar")
+	s.HSet("some", "other", "key")
+
+	// Run your code and see if it behaves.
+	// An example using the redigo library from "github.com/gomodule/redigo/redis":
+	c, err := redis.Dial("tcp", s.Addr())
+	_, err = c.Do("SET", "foo", "bar")
+
+	// Optionally check values in redis...
+	if got, err := s.Get("foo"); err != nil || got != "bar" {
+		t.Error("'foo' has the wrong value")
+	}
+	// ... or use a helper for that:
+	s.CheckGet(t, "foo", "bar")
+
+	// TTL and expiration:
+	s.Set("foo", "bar")
+	s.SetTTL("foo", 10*time.Second)
+	s.FastForward(11 * time.Second)
+	if s.Exists("foo") {
+		t.Fatal("'foo' should not have existed anymore")
+	}
+}
+```
+
+## Not supported
+
+Commands which will probably not be implemented:
+
+ - CLUSTER (all)
+    - ~~CLUSTER *~~
+    - ~~READONLY~~
+    - ~~READWRITE~~
+ - HyperLogLog (all) -- unless someone needs these
+    - ~~PFADD~~
+    - ~~PFCOUNT~~
+    - ~~PFMERGE~~
+ - Key
+    - ~~DUMP~~
+    - ~~MIGRATE~~
+    - ~~OBJECT~~
+    - ~~RESTORE~~
+    - ~~WAIT~~
+ - Scripting
+    - ~~SCRIPT DEBUG~~
+    - ~~SCRIPT KILL~~
+ - Server
+    - ~~BGSAVE~~
+    - ~~BGWRITEAOF~~
+    - ~~CLIENT *~~
+    - ~~CONFIG *~~
+    - ~~DEBUG *~~
+    - ~~INFO~~
+    - ~~LASTSAVE~~
+    - ~~MONITOR~~
+    - ~~ROLE~~
+    - ~~SAVE~~
+    - ~~SHUTDOWN~~
+    - ~~SLAVEOF~~
+    - ~~SLOWLOG~~
+    - ~~SYNC~~
+
+
+## &c.
+
+Integration tests are run against Redis 6.0.10. The [./integration](./integration/) subdir
+compares miniredis against a real redis instance.
+
+The Redis 6 RESP3 protocol is supported. If there are problems, please open
+an issue.
+
+If you want to test Redis Sentinel have a look at [minisentinel](https://github.com/Bose/minisentinel).
+
+A changelog is kept at [CHANGELOG.md](https://github.com/alicebob/miniredis/blob/master/CHANGELOG.md).
+
+[![Build Status](https://travis-ci.org/alicebob/miniredis.svg?branch=master)](https://travis-ci.org/alicebob/miniredis)
+[![Go Reference](https://pkg.go.dev/badge/github.com/alicebob/miniredis/v2.svg)](https://pkg.go.dev/github.com/alicebob/miniredis/v2)
diff --git a/vendor/github.com/alicebob/miniredis/v2/check.go b/vendor/github.com/alicebob/miniredis/v2/check.go
new file mode 100644
index 0000000000000..acd0d55393dd3
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/check.go
@@ -0,0 +1,63 @@
+package miniredis
+
+import (
+	"reflect"
+	"sort"
+)
+
+// T is implemented by Testing.T
+type T interface {
+	Helper()
+	Errorf(string, ...interface{})
+}
+
+// CheckGet does not call Errorf() iff there is a string key with the
+// expected value. Normal use case is `m.CheckGet(t, "username", "theking")`.
+func (m *Miniredis) CheckGet(t T, key, expected string) {
+	t.Helper()
+
+	found, err := m.Get(key)
+	if err != nil {
+		t.Errorf("GET error, key %#v: %v", key, err)
+		return
+	}
+	if found != expected {
+		t.Errorf("GET error, key %#v: Expected %#v, got %#v", key, expected, found)
+		return
+	}
+}
+
+// CheckList does not call Errorf() iff there is a list key with the
+// expected values.
+// Normal use case is `m.CheckGet(t, "favorite_colors", "red", "green", "infrared")`.
+func (m *Miniredis) CheckList(t T, key string, expected ...string) {
+	t.Helper()
+
+	found, err := m.List(key)
+	if err != nil {
+		t.Errorf("List error, key %#v: %v", key, err)
+		return
+	}
+	if !reflect.DeepEqual(expected, found) {
+		t.Errorf("List error, key %#v: Expected %#v, got %#v", key, expected, found)
+		return
+	}
+}
+
+// CheckSet does not call Errorf() iff there is a set key with the
+// expected values.
+// Normal use case is `m.CheckSet(t, "visited", "Rome", "Stockholm", "Dublin")`.
+func (m *Miniredis) CheckSet(t T, key string, expected ...string) {
+	t.Helper()
+
+	found, err := m.Members(key)
+	if err != nil {
+		t.Errorf("Set error, key %#v: %v", key, err)
+		return
+	}
+	sort.Strings(expected)
+	if !reflect.DeepEqual(expected, found) {
+		t.Errorf("Set error, key %#v: Expected %#v, got %#v", key, expected, found)
+		return
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_cluster.go b/vendor/github.com/alicebob/miniredis/v2/cmd_cluster.go
new file mode 100644
index 0000000000000..083c4ecf7d72f
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_cluster.go
@@ -0,0 +1,66 @@
+// Commands from https://redis.io/commands#cluster
+
+package miniredis
+
+import (
+	"fmt"
+	"github.com/alicebob/miniredis/v2/server"
+	"strings"
+)
+
+// commandsCluster handles some cluster operations.
+func commandsCluster(m *Miniredis) {
+	_ = m.srv.Register("CLUSTER", m.cmdCluster)
+}
+
+func (m *Miniredis) cmdCluster(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	switch strings.ToUpper(args[0]) {
+	case "SLOTS":
+		m.cmdClusterSlots(c, cmd, args)
+	case "KEYSLOT":
+		m.cmdClusterKeySlot(c, cmd, args)
+	case "NODES":
+		m.cmdClusterNodes(c, cmd, args)
+	default:
+		setDirty(c)
+		c.WriteError(fmt.Sprintf("ERR 'CLUSTER %s' not supported", strings.Join(args, " ")))
+		return
+	}
+}
+
+// CLUSTER SLOTS
+func (m *Miniredis) cmdClusterSlots(c *server.Peer, cmd string, args []string) {
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		c.WriteLen(1)
+		c.WriteLen(3)
+		c.WriteInt(0)
+		c.WriteInt(16383)
+		c.WriteLen(3)
+		c.WriteBulk(m.srv.Addr().IP.String())
+		c.WriteInt(m.srv.Addr().Port)
+		c.WriteBulk("09dbe9720cda62f7865eabc5fd8857c5d2678366")
+	})
+}
+
+//CLUSTER KEYSLOT
+func (m *Miniredis) cmdClusterKeySlot(c *server.Peer, cmd string, args []string) {
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		c.WriteInt(163)
+	})
+}
+
+//CLUSTER NODES
+func (m *Miniredis) cmdClusterNodes(c *server.Peer, cmd string, args []string) {
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		c.WriteBulk("e7d1eecce10fd6bb5eb35b9f99a514335d9ba9ca 127.0.0.1:7000@7000 myself,master - 0 0 1 connected 0-16383")
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_command.go b/vendor/github.com/alicebob/miniredis/v2/cmd_command.go
new file mode 100644
index 0000000000000..33c691c193e54
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_command.go
@@ -0,0 +1,2049 @@
+// Command 'COMMAND' from https://redis.io/commands#server
+
+package miniredis
+
+import "github.com/alicebob/miniredis/v2/server"
+
+func commandsCommand(m *Miniredis) {
+	_ = m.srv.Register("COMMAND", m.cmdCommand)
+}
+
+func (m *Miniredis) cmdCommand(c *server.Peer, cmd string, args []string) {
+	// Got from redis 5.0.7 with
+	// echo 'COMMAND' | nc redis_addr redis_port
+	//
+	res := `
+*200
+*6
+$12
+hincrbyfloat
+:4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$10
+xreadgroup
+:-7
+*3
++write
++noscript
++movablekeys
+:1
+:1
+:1
+*6
+$10
+sdiffstore
+:-3
+*2
++write
++denyoom
+:1
+:-1
+:1
+*6
+$8
+lastsave
+:1
+*2
++random
++fast
+:0
+:0
+:0
+*6
+$5
+setnx
+:3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$8
+bzpopmax
+:-3
+*3
++write
++noscript
++fast
+:1
+:-2
+:1
+*6
+$12
+punsubscribe
+:-1
+*4
++pubsub
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+xack
+:-4
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$10
+pfselftest
+:1
+*1
++admin
+:0
+:0
+:0
+*6
+$6
+substr
+:4
+*1
++readonly
+:1
+:1
+:1
+*6
+$8
+smembers
+:2
+*2
++readonly
++sort_for_script
+:1
+:1
+:1
+*6
+$11
+unsubscribe
+:-1
+*4
++pubsub
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$11
+zinterstore
+:-4
+*3
++write
++denyoom
++movablekeys
+:0
+:0
+:0
+*6
+$6
+strlen
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$7
+pfmerge
+:-2
+*2
++write
++denyoom
+:1
+:-1
+:1
+*6
+$9
+randomkey
+:1
+*2
++readonly
++random
+:0
+:0
+:0
+*6
+$6
+lolwut
+:-1
+*1
++readonly
+:0
+:0
+:0
+*6
+$4
+rpop
+:2
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$5
+hkeys
+:2
+*2
++readonly
++sort_for_script
+:1
+:1
+:1
+*6
+$6
+client
+:-2
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$6
+module
+:-2
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$7
+slowlog
+:-2
+*2
++admin
++random
+:0
+:0
+:0
+*6
+$7
+geohash
+:-2
+*1
++readonly
+:1
+:1
+:1
+*6
+$6
+lrange
+:4
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+ping
+:-1
+*2
++stale
++fast
+:0
+:0
+:0
+*6
+$8
+bitcount
+:-2
+*1
++readonly
+:1
+:1
+:1
+*6
+$6
+pubsub
+:-2
+*4
++pubsub
++random
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+role
+:1
+*3
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+hget
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+object
+:-2
+*2
++readonly
++random
+:2
+:2
+:1
+*6
+$9
+zrevrange
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$7
+hincrby
+:4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$9
+zlexcount
+:4
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$5
+scard
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+append
+:3
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$7
+hstrlen
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+config
+:-2
+*4
++admin
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+hset
+:-4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$16
+zrevrangebyscore
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+incr
+:2
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$6
+setbit
+:4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$9
+rpoplpush
+:3
+*2
++write
++denyoom
+:1
+:2
+:1
+*6
+$6
+xclaim
+:-6
+*3
++write
++random
++fast
+:1
+:1
+:1
+*6
+$11
+sinterstore
+:-3
+*2
++write
++denyoom
+:1
+:-1
+:1
+*6
+$7
+publish
+:3
+*4
++pubsub
++loading
++stale
++fast
+:0
+:0
+:0
+*6
+$5
+hscan
+:-3
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$5
+multi
+:1
+*2
++noscript
++fast
+:0
+:0
+:0
+*6
+$3
+set
+:-3
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$6
+lpushx
+:-3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$16
+zremrangebyscore
+:4
+*1
++write
+:1
+:1
+:1
+*6
+$9
+pexpireat
+:3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$4
+hdel
+:-3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$12
+bgrewriteaof
+:1
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$7
+migrate
+:-6
+*3
++write
++random
++movablekeys
+:0
+:0
+:0
+*6
+$9
+replicaof
+:3
+*3
++admin
++noscript
++stale
+:0
+:0
+:0
+*6
+$5
+touch
+:-2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+xsetid
+:3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$5
+bitop
+:-4
+*2
++write
++denyoom
+:2
+:-1
+:1
+*6
+$6
+swapdb
+:3
+*2
++write
++fast
+:0
+:0
+:0
+*6
+$5
+sdiff
+:-2
+*2
++readonly
++sort_for_script
+:1
+:-1
+:1
+*6
+$6
+lindex
+:3
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+wait
+:3
+*1
++noscript
+:0
+:0
+:0
+*6
+$4
+lrem
+:4
+*1
++write
+:1
+:1
+:1
+*6
+$6
+hsetnx
+:4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$8
+getrange
+:4
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+hlen
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$4
+post
+:-1
+*2
++loading
++stale
+:0
+:0
+:0
+*6
+$9
+sismember
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$7
+unwatch
+:1
+*2
++noscript
++fast
+:0
+:0
+:0
+*6
+$5
+lpush
+:-3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$4
+scan
+:-2
+*2
++readonly
++random
+:0
+:0
+:0
+*6
+$5
+smove
+:4
+*2
++write
++fast
+:1
+:2
+:1
+*6
+$7
+cluster
+:-2
+*1
++admin
+:0
+:0
+:0
+*6
+$6
+bgsave
+:-1
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$4
+dump
+:2
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$7
+latency
+:-2
+*4
++admin
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$8
+bzpopmin
+:-3
+*3
++write
++noscript
++fast
+:1
+:-2
+:1
+*6
+$6
+getbit
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$7
+hgetall
+:2
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$6
+rename
+:3
+*1
++write
+:1
+:2
+:1
+*6
+$9
+subscribe
+:-2
+*4
++pubsub
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+xdel
+:-3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$15
+zremrangebyrank
+:4
+*1
++write
+:1
+:1
+:1
+*6
+$4
+type
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+script
+:-2
+*1
++noscript
+:0
+:0
+:0
+*6
+$5
+hmset
+:-4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$6
+sunion
+:-2
+*2
++readonly
++sort_for_script
+:1
+:-1
+:1
+*6
+$4
+mget
+:-2
+*2
++readonly
++fast
+:1
+:-1
+:1
+*6
+$10
+brpoplpush
+:4
+*3
++write
++denyoom
++noscript
+:1
+:2
+:1
+*6
+$6
+geoadd
+:-5
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$6
+decrby
+:3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$4
+echo
+:2
+*1
++fast
+:0
+:0
+:0
+*6
+$6
+dbsize
+:1
+*2
++readonly
++fast
+:0
+:0
+:0
+*6
+$5
+zcard
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+select
+:2
+*2
++loading
++fast
+:0
+:0
+:0
+*6
+$4
+sadd
+:-3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$5
+host:
+:-1
+*2
++loading
++stale
+:0
+:0
+:0
+*6
+$5
+sscan
+:-3
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$12
+georadius_ro
+:-6
+*2
++readonly
++movablekeys
+:1
+:1
+:1
+*6
+$7
+monitor
+:1
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$14
+zremrangebylex
+:4
+*1
++write
+:1
+:1
+:1
+*6
+$11
+sunionstore
+:-3
+*2
++write
++denyoom
+:1
+:-1
+:1
+*6
+$5
+zscan
+:-3
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$9
+readwrite
+:1
+*1
++fast
+:0
+:0
+:0
+*6
+$6
+xgroup
+:-2
+*2
++write
++denyoom
+:2
+:2
+:1
+*6
+$5
+setex
+:4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$4
+save
+:1
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$5
+hvals
+:2
+*2
++readonly
++sort_for_script
+:1
+:1
+:1
+*6
+$5
+watch
+:-2
+*2
++noscript
++fast
+:1
+:-1
+:1
+*6
+$7
+hexists
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$4
+info
+:-1
+*3
++random
++loading
++stale
+:0
+:0
+:0
+*6
+$5
+psync
+:3
+*3
++readonly
++admin
++noscript
+:0
+:0
+:0
+*6
+$11
+zrangebylex
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+zadd
+:-4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$4
+xlen
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$4
+auth
+:2
+*4
++noscript
++loading
++stale
++fast
+:0
+:0
+:0
+*6
+$4
+srem
+:-3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$9
+georadius
+:-6
+*2
++write
++movablekeys
+:1
+:1
+:1
+*6
+$4
+exec
+:1
+*2
++noscript
++skip_monitor
+:0
+:0
+:0
+*6
+$7
+pfcount
+:-2
+*1
++readonly
+:1
+:-1
+:1
+*6
+$7
+zpopmin
+:-2
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$4
+move
+:3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$5
+xtrim
+:-2
+*3
++write
++random
++fast
+:1
+:1
+:1
+*6
+$6
+asking
+:1
+*1
++fast
+:0
+:0
+:0
+*6
+$4
+pttl
+:2
+*3
++readonly
++random
++fast
+:1
+:1
+:1
+*6
+$11
+srandmember
+:-2
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$8
+flushall
+:-1
+*1
++write
+:0
+:0
+:0
+*6
+$4
+sort
+:-2
+*3
++write
++denyoom
++movablekeys
+:1
+:1
+:1
+*6
+$3
+del
+:-2
+*1
++write
+:1
+:-1
+:1
+*6
+$14
+restore-asking
+:-4
+*3
++write
++denyoom
++asking
+:1
+:1
+:1
+*6
+$10
+psubscribe
+:-2
+*4
++pubsub
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$4
+decr
+:2
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$6
+incrby
+:3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$14
+zrevrangebylex
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$8
+bitfield
+:-2
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$6
+exists
+:-2
+*2
++readonly
++fast
+:1
+:-1
+:1
+*6
+$8
+replconf
+:-1
+*4
++admin
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$7
+zincrby
+:4
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$5
+blpop
+:-3
+*2
++write
++noscript
+:1
+:-2
+:1
+*6
+$4
+lpop
+:2
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$3
+ttl
+:2
+*3
++readonly
++random
++fast
+:1
+:1
+:1
+*6
+$5
+xread
+:-4
+*3
++readonly
++noscript
++movablekeys
+:1
+:1
+:1
+*6
+$5
+rpush
+:-3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$8
+zrevrank
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$11
+incrbyfloat
+:3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$5
+brpop
+:-3
+*2
++write
++noscript
+:1
+:-2
+:1
+*6
+$4
+xadd
+:-5
+*4
++write
++denyoom
++random
++fast
+:1
+:1
+:1
+*6
+$8
+setrange
+:4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$17
+georadiusbymember
+:-5
+*2
++write
++movablekeys
+:1
+:1
+:1
+*6
+$6
+unlink
+:-2
+*2
++write
++fast
+:1
+:-1
+:1
+*6
+$8
+expireat
+:3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$5
+debug
+:-2
+*2
++admin
++noscript
+:0
+:0
+:0
+*6
+$20
+georadiusbymember_ro
+:-5
+*2
++readonly
++movablekeys
+:1
+:1
+:1
+*6
+$4
+lset
+:4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$6
+zscore
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$4
+llen
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$4
+time
+:1
+*2
++random
++fast
+:0
+:0
+:0
+*6
+$8
+shutdown
+:-1
+*4
++admin
++noscript
++loading
++stale
+:0
+:0
+:0
+*6
+$7
+evalsha
+:-3
+*2
++noscript
++movablekeys
+:0
+:0
+:0
+*6
+$6
+zcount
+:4
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+memory
+:-2
+*2
++readonly
++random
+:0
+:0
+:0
+*6
+$5
+xinfo
+:-2
+*2
++readonly
++random
+:2
+:2
+:1
+*6
+$8
+xpending
+:-3
+*2
++readonly
++random
+:1
+:1
+:1
+*6
+$4
+eval
+:-3
+*2
++noscript
++movablekeys
+:0
+:0
+:0
+*6
+$6
+xrange
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$7
+restore
+:-4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$7
+zpopmax
+:-2
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$4
+mset
+:-3
+*2
++write
++denyoom
+:1
+:-1
+:2
+*6
+$4
+spop
+:-2
+*3
++write
++random
++fast
+:1
+:1
+:1
+*6
+$5
+ltrim
+:4
+*1
++write
+:1
+:1
+:1
+*6
+$5
+zrank
+:3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$9
+xrevrange
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$3
+get
+:2
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$7
+flushdb
+:-1
+*1
++write
+:0
+:0
+:0
+*6
+$5
+hmget
+:-3
+*2
++readonly
++fast
+:1
+:1
+:1
+*6
+$6
+msetnx
+:-3
+*2
++write
++denyoom
+:1
+:-1
+:2
+*6
+$7
+persist
+:2
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$11
+zunionstore
+:-4
+*3
++write
++denyoom
++movablekeys
+:0
+:0
+:0
+*6
+$7
+command
+:0
+*3
++random
++loading
++stale
+:0
+:0
+:0
+*6
+$8
+renamenx
+:3
+*2
++write
++fast
+:1
+:2
+:1
+*6
+$6
+zrange
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$7
+pexpire
+:3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$4
+keys
+:2
+*2
++readonly
++sort_for_script
+:0
+:0
+:0
+*6
+$4
+zrem
+:-3
+*2
++write
++fast
+:1
+:1
+:1
+*6
+$5
+pfadd
+:-2
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$6
+psetex
+:4
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$13
+zrangebyscore
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$4
+sync
+:1
+*3
++readonly
++admin
++noscript
+:0
+:0
+:0
+*6
+$7
+pfdebug
+:-3
+*1
++write
+:0
+:0
+:0
+*6
+$7
+discard
+:1
+*2
++noscript
++fast
+:0
+:0
+:0
+*6
+$8
+readonly
+:1
+*1
++fast
+:0
+:0
+:0
+*6
+$7
+geodist
+:-4
+*1
++readonly
+:1
+:1
+:1
+*6
+$6
+geopos
+:-2
+*1
++readonly
+:1
+:1
+:1
+*6
+$6
+bitpos
+:-3
+*1
++readonly
+:1
+:1
+:1
+*6
+$6
+sinter
+:-2
+*2
++readonly
++sort_for_script
+:1
+:-1
+:1
+*6
+$6
+getset
+:3
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$7
+slaveof
+:3
+*3
++admin
++noscript
++stale
+:0
+:0
+:0
+*6
+$6
+rpushx
+:-3
+*3
++write
++denyoom
++fast
+:1
+:1
+:1
+*6
+$7
+linsert
+:5
+*2
++write
++denyoom
+:1
+:1
+:1
+*6
+$6
+expire
+:3
+*2
++write
++fast
+:1
+:1
+:1
+	`
+
+	c.WriteBulk(res)
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_connection.go b/vendor/github.com/alicebob/miniredis/v2/cmd_connection.go
new file mode 100644
index 0000000000000..176c15106b60d
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_connection.go
@@ -0,0 +1,277 @@
+// Commands from https://redis.io/commands#connection
+
+package miniredis
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+func commandsConnection(m *Miniredis) {
+	m.srv.Register("AUTH", m.cmdAuth)
+	m.srv.Register("ECHO", m.cmdEcho)
+	m.srv.Register("HELLO", m.cmdHello)
+	m.srv.Register("PING", m.cmdPing)
+	m.srv.Register("QUIT", m.cmdQuit)
+	m.srv.Register("SELECT", m.cmdSelect)
+	m.srv.Register("SWAPDB", m.cmdSwapdb)
+}
+
+// PING
+func (m *Miniredis) cmdPing(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+
+	if len(args) > 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	payload := ""
+	if len(args) > 0 {
+		payload = args[0]
+	}
+
+	// PING is allowed in subscribed state
+	if sub := getCtx(c).subscriber; sub != nil {
+		c.Block(func(c *server.Writer) {
+			c.WriteLen(2)
+			c.WriteBulk("pong")
+			c.WriteBulk(payload)
+		})
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		if payload == "" {
+			c.WriteInline("PONG")
+			return
+		}
+		c.WriteBulk(payload)
+	})
+}
+
+// AUTH
+func (m *Miniredis) cmdAuth(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	if len(args) > 2 {
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+	username := "default"
+	pw := args[0]
+	if len(args) == 2 {
+		username, pw = args[0], args[1]
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		if len(m.passwords) == 0 && username == "default" {
+			c.WriteError("ERR AUTH <password> called without any password configured for the default user. Are you sure your configuration is correct?")
+			return
+		}
+		setPW, ok := m.passwords[username]
+		if !ok {
+			c.WriteError("WRONGPASS invalid username-password pair")
+			return
+		}
+		if setPW != pw {
+			c.WriteError("WRONGPASS invalid username-password pair")
+			return
+		}
+
+		ctx.authenticated = true
+		c.WriteOK()
+	})
+}
+
+// HELLO
+func (m *Miniredis) cmdHello(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	versionArg, args := args[0], args[1:]
+	var version int
+	switch versionArg {
+	case "2":
+		version = 2
+	case "3":
+		version = 3
+	default:
+		c.WriteError("NOPROTO unsupported protocol version")
+		return
+	}
+
+	var (
+		checkAuth          bool
+		username, password string
+	)
+	for len(args) > 0 {
+		switch strings.ToUpper(args[0]) {
+		case "AUTH":
+			if len(args) < 3 {
+				c.WriteError(fmt.Sprintf("ERR Syntax error in HELLO option '%s'", args[0]))
+				return
+			}
+			username, password, args = args[1], args[2], args[3:]
+			checkAuth = true
+		case "SETNAME":
+			if len(args) < 2 {
+				c.WriteError(fmt.Sprintf("ERR Syntax error in HELLO option '%s'", args[0]))
+				return
+			}
+			_, args = args[1], args[2:]
+		default:
+			c.WriteError(fmt.Sprintf("ERR Syntax error in HELLO option '%s'", args[0]))
+			return
+		}
+	}
+
+	if len(m.passwords) == 0 && username == "default" {
+		// redis ignores legacy "AUTH" if it's not enabled.
+		checkAuth = false
+	}
+	if checkAuth {
+		setPW, ok := m.passwords[username]
+		if !ok {
+			c.WriteError("WRONGPASS invalid username-password pair")
+			return
+		}
+		if setPW != password {
+			c.WriteError("WRONGPASS invalid username-password pair")
+			return
+		}
+		getCtx(c).authenticated = true
+	}
+
+	c.Resp3 = version == 3
+
+	c.WriteMapLen(7)
+	c.WriteBulk("server")
+	c.WriteBulk("miniredis")
+	c.WriteBulk("version")
+	c.WriteBulk("6.0.5")
+	c.WriteBulk("proto")
+	c.WriteInt(version)
+	c.WriteBulk("id")
+	c.WriteInt(42)
+	c.WriteBulk("mode")
+	c.WriteBulk("standalone")
+	c.WriteBulk("role")
+	c.WriteBulk("master")
+	c.WriteBulk("modules")
+	c.WriteLen(0)
+}
+
+// ECHO
+func (m *Miniredis) cmdEcho(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		msg := args[0]
+		c.WriteBulk(msg)
+	})
+}
+
+// SELECT
+func (m *Miniredis) cmdSelect(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		id, err := strconv.Atoi(args[0])
+		if err != nil {
+			c.WriteError("ERR invalid DB index")
+			setDirty(c)
+			return
+		}
+		if id < 0 {
+			c.WriteError("ERR DB index is out of range")
+			setDirty(c)
+			return
+		}
+
+		ctx.selectedDB = id
+		c.WriteOK()
+	})
+}
+
+// SWAPDB
+func (m *Miniredis) cmdSwapdb(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		id1, err := strconv.Atoi(args[0])
+		if err != nil {
+			c.WriteError("ERR invalid first DB index")
+			setDirty(c)
+			return
+		}
+		id2, err := strconv.Atoi(args[1])
+		if err != nil {
+			c.WriteError("ERR invalid second DB index")
+			setDirty(c)
+			return
+		}
+		if id1 < 0 || id2 < 0 {
+			c.WriteError("ERR DB index is out of range")
+			setDirty(c)
+			return
+		}
+
+		m.swapDB(id1, id2)
+
+		c.WriteOK()
+	})
+}
+
+// QUIT
+func (m *Miniredis) cmdQuit(c *server.Peer, cmd string, args []string) {
+	// QUIT isn't transactionfied and accepts any arguments.
+	c.WriteOK()
+	c.Close()
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_generic.go b/vendor/github.com/alicebob/miniredis/v2/cmd_generic.go
new file mode 100644
index 0000000000000..c012c8cdf1c01
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_generic.go
@@ -0,0 +1,553 @@
+// Commands from https://redis.io/commands#generic
+
+package miniredis
+
+import (
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsGeneric handles EXPIRE, TTL, PERSIST, &c.
+func commandsGeneric(m *Miniredis) {
+	m.srv.Register("DEL", m.cmdDel)
+	m.srv.Register("UNLINK", m.cmdDel)
+	// DUMP
+	m.srv.Register("EXISTS", m.cmdExists)
+	m.srv.Register("EXPIRE", makeCmdExpire(m, false, time.Second))
+	m.srv.Register("EXPIREAT", makeCmdExpire(m, true, time.Second))
+	m.srv.Register("KEYS", m.cmdKeys)
+	// MIGRATE
+	m.srv.Register("MOVE", m.cmdMove)
+	// OBJECT
+	m.srv.Register("PERSIST", m.cmdPersist)
+	m.srv.Register("PEXPIRE", makeCmdExpire(m, false, time.Millisecond))
+	m.srv.Register("PEXPIREAT", makeCmdExpire(m, true, time.Millisecond))
+	m.srv.Register("PTTL", m.cmdPTTL)
+	m.srv.Register("RANDOMKEY", m.cmdRandomkey)
+	m.srv.Register("RENAME", m.cmdRename)
+	m.srv.Register("RENAMENX", m.cmdRenamenx)
+	// RESTORE
+	// SORT
+	m.srv.Register("TOUCH", m.cmdTouch)
+	m.srv.Register("TTL", m.cmdTTL)
+	m.srv.Register("TYPE", m.cmdType)
+	m.srv.Register("SCAN", m.cmdScan)
+}
+
+// generic expire command for EXPIRE, PEXPIRE, EXPIREAT, PEXPIREAT
+// d is the time unit. If unix is set it'll be seen as a unixtimestamp and
+// converted to a duration.
+func makeCmdExpire(m *Miniredis, unix bool, d time.Duration) func(*server.Peer, string, []string) {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) != 2 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		key := args[0]
+		value := args[1]
+		i, err := strconv.Atoi(value)
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			// Key must be present.
+			if _, ok := db.keys[key]; !ok {
+				c.WriteInt(0)
+				return
+			}
+			if unix {
+				var ts time.Time
+				switch d {
+				case time.Millisecond:
+					ts = time.Unix(int64(i/1000), 1000000*int64(i%1000))
+				case time.Second:
+					ts = time.Unix(int64(i), 0)
+				default:
+					panic("invalid time unit (d). Fixme!")
+				}
+				now := m.effectiveNow()
+				db.ttl[key] = ts.Sub(now)
+			} else {
+				db.ttl[key] = time.Duration(i) * d
+			}
+			db.keyVersion[key]++
+			db.checkTTL(key)
+			c.WriteInt(1)
+		})
+	}
+}
+
+// TOUCH
+func (m *Miniredis) cmdTouch(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if len(args) == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		count := 0
+		for _, key := range args {
+			if db.exists(key) {
+				count++
+			}
+		}
+		c.WriteInt(count)
+	})
+}
+
+// TTL
+func (m *Miniredis) cmdTTL(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if _, ok := db.keys[key]; !ok {
+			// No such key
+			c.WriteInt(-2)
+			return
+		}
+
+		v, ok := db.ttl[key]
+		if !ok {
+			// no expire value
+			c.WriteInt(-1)
+			return
+		}
+		c.WriteInt(int(v.Seconds()))
+	})
+}
+
+// PTTL
+func (m *Miniredis) cmdPTTL(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if _, ok := db.keys[key]; !ok {
+			// no such key
+			c.WriteInt(-2)
+			return
+		}
+
+		v, ok := db.ttl[key]
+		if !ok {
+			// no expire value
+			c.WriteInt(-1)
+			return
+		}
+		c.WriteInt(int(v.Nanoseconds() / 1000000))
+	})
+}
+
+// PERSIST
+func (m *Miniredis) cmdPersist(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if _, ok := db.keys[key]; !ok {
+			// no such key
+			c.WriteInt(0)
+			return
+		}
+
+		if _, ok := db.ttl[key]; !ok {
+			// no expire value
+			c.WriteInt(0)
+			return
+		}
+		delete(db.ttl, key)
+		db.keyVersion[key]++
+		c.WriteInt(1)
+	})
+}
+
+// DEL and UNLINK
+func (m *Miniredis) cmdDel(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if len(args) == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		count := 0
+		for _, key := range args {
+			if db.exists(key) {
+				count++
+			}
+			db.del(key, true) // delete expire
+		}
+		c.WriteInt(count)
+	})
+}
+
+// TYPE
+func (m *Miniredis) cmdType(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError("usage error")
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteInline("none")
+			return
+		}
+
+		c.WriteInline(t)
+	})
+}
+
+// EXISTS
+func (m *Miniredis) cmdExists(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		found := 0
+		for _, k := range args {
+			if db.exists(k) {
+				found++
+			}
+		}
+		c.WriteInt(found)
+	})
+}
+
+// MOVE
+func (m *Miniredis) cmdMove(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	targetDB, err := strconv.Atoi(args[1])
+	if err != nil {
+		targetDB = 0
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		if ctx.selectedDB == targetDB {
+			c.WriteError("ERR source and destination objects are the same")
+			return
+		}
+		db := m.db(ctx.selectedDB)
+		targetDB := m.db(targetDB)
+
+		if !db.move(key, targetDB) {
+			c.WriteInt(0)
+			return
+		}
+		c.WriteInt(1)
+	})
+}
+
+// KEYS
+func (m *Miniredis) cmdKeys(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		keys, _ := matchKeys(db.allKeys(), key)
+		c.WriteLen(len(keys))
+		for _, s := range keys {
+			c.WriteBulk(s)
+		}
+	})
+}
+
+// RANDOMKEY
+func (m *Miniredis) cmdRandomkey(c *server.Peer, cmd string, args []string) {
+	if len(args) != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if len(db.keys) == 0 {
+			c.WriteNull()
+			return
+		}
+		nr := m.randIntn(len(db.keys))
+		for k := range db.keys {
+			if nr == 0 {
+				c.WriteBulk(k)
+				return
+			}
+			nr--
+		}
+	})
+}
+
+// RENAME
+func (m *Miniredis) cmdRename(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	from, to := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(from) {
+			c.WriteError(msgKeyNotFound)
+			return
+		}
+
+		db.rename(from, to)
+		c.WriteOK()
+	})
+}
+
+// RENAMENX
+func (m *Miniredis) cmdRenamenx(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	from, to := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(from) {
+			c.WriteError(msgKeyNotFound)
+			return
+		}
+
+		if db.exists(to) {
+			c.WriteInt(0)
+			return
+		}
+
+		db.rename(from, to)
+		c.WriteInt(1)
+	})
+}
+
+// SCAN
+func (m *Miniredis) cmdScan(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	cursor, err := strconv.Atoi(args[0])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidCursor)
+		return
+	}
+	args = args[1:]
+
+	// MATCH and COUNT options
+	var withMatch bool
+	var match string
+	for len(args) > 0 {
+		if strings.ToLower(args[0]) == "count" {
+			// we do nothing with count
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			if _, err := strconv.Atoi(args[1]); err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			args = args[2:]
+			continue
+		}
+		if strings.ToLower(args[0]) == "match" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			withMatch = true
+			match, args = args[1], args[2:]
+			continue
+		}
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		// We return _all_ (matched) keys every time.
+
+		if cursor != 0 {
+			// Invalid cursor.
+			c.WriteLen(2)
+			c.WriteBulk("0") // no next cursor
+			c.WriteLen(0)    // no elements
+			return
+		}
+
+		keys := db.allKeys()
+		if withMatch {
+			keys, _ = matchKeys(keys, match)
+		}
+
+		c.WriteLen(2)
+		c.WriteBulk("0") // no next cursor
+		c.WriteLen(len(keys))
+		for _, k := range keys {
+			c.WriteBulk(k)
+		}
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_geo.go b/vendor/github.com/alicebob/miniredis/v2/cmd_geo.go
new file mode 100644
index 0000000000000..a6c1901d684a8
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_geo.go
@@ -0,0 +1,601 @@
+// Commands from https://redis.io/commands#geo
+
+package miniredis
+
+import (
+	"fmt"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsGeo handles GEOADD, GEORADIUS etc.
+func commandsGeo(m *Miniredis) {
+	m.srv.Register("GEOADD", m.cmdGeoadd)
+	m.srv.Register("GEODIST", m.cmdGeodist)
+	m.srv.Register("GEOPOS", m.cmdGeopos)
+	m.srv.Register("GEORADIUS", m.cmdGeoradius)
+	m.srv.Register("GEORADIUS_RO", m.cmdGeoradius)
+	m.srv.Register("GEORADIUSBYMEMBER", m.cmdGeoradiusbymember)
+	m.srv.Register("GEORADIUSBYMEMBER_RO", m.cmdGeoradiusbymember)
+}
+
+// GEOADD
+func (m *Miniredis) cmdGeoadd(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 || len(args[1:])%3 != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		toSet := map[string]float64{}
+		for len(args) > 2 {
+			rawLong, rawLat, name := args[0], args[1], args[2]
+			args = args[3:]
+			longitude, err := strconv.ParseFloat(rawLong, 64)
+			if err != nil {
+				c.WriteError("ERR value is not a valid float")
+				return
+			}
+			latitude, err := strconv.ParseFloat(rawLat, 64)
+			if err != nil {
+				c.WriteError("ERR value is not a valid float")
+				return
+			}
+
+			if latitude < -85.05112878 ||
+				latitude > 85.05112878 ||
+				longitude < -180 ||
+				longitude > 180 {
+				c.WriteError(fmt.Sprintf("ERR invalid longitude,latitude pair %.6f,%.6f", longitude, latitude))
+				return
+			}
+
+			toSet[name] = float64(toGeohash(longitude, latitude))
+		}
+
+		set := 0
+		for name, score := range toSet {
+			if db.ssetAdd(key, score, name) {
+				set++
+			}
+		}
+		c.WriteInt(set)
+	})
+}
+
+// GEODIST
+func (m *Miniredis) cmdGeodist(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, from, to, args := args[0], args[1], args[2], args[3:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		if !db.exists(key) {
+			c.WriteNull()
+			return
+		}
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		unit := "m"
+		if len(args) > 0 {
+			unit, args = args[0], args[1:]
+		}
+		if len(args) > 0 {
+			c.WriteError(msgSyntaxError)
+			return
+		}
+
+		toMeter := parseUnit(unit)
+		if toMeter == 0 {
+			c.WriteError(msgUnsupportedUnit)
+			return
+		}
+
+		members := db.sortedsetKeys[key]
+		fromD, okFrom := members.get(from)
+		toD, okTo := members.get(to)
+		if !okFrom || !okTo {
+			c.WriteNull()
+			return
+		}
+
+		fromLo, fromLat := fromGeohash(uint64(fromD))
+		toLo, toLat := fromGeohash(uint64(toD))
+
+		dist := distance(fromLat, fromLo, toLat, toLo) / toMeter
+		c.WriteBulk(fmt.Sprintf("%.4f", dist))
+	})
+}
+
+// GEOPOS
+func (m *Miniredis) cmdGeopos(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		c.WriteLen(len(args))
+		for _, l := range args {
+			if !db.ssetExists(key, l) {
+				c.WriteLen(-1)
+				continue
+			}
+			score := db.ssetScore(key, l)
+			c.WriteLen(2)
+			long, lat := fromGeohash(uint64(score))
+			c.WriteBulk(fmt.Sprintf("%f", long))
+			c.WriteBulk(fmt.Sprintf("%f", lat))
+		}
+	})
+}
+
+type geoDistance struct {
+	Name      string
+	Score     float64
+	Distance  float64
+	Longitude float64
+	Latitude  float64
+}
+
+// GEORADIUS and GEORADIUS_RO
+func (m *Miniredis) cmdGeoradius(c *server.Peer, cmd string, args []string) {
+	if len(args) < 5 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	longitude, err := strconv.ParseFloat(args[1], 64)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	latitude, err := strconv.ParseFloat(args[2], 64)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	radius, err := strconv.ParseFloat(args[3], 64)
+	if err != nil || radius < 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	toMeter := parseUnit(args[4])
+	if toMeter == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	args = args[5:]
+
+	var (
+		withDist      = false
+		withCoord     = false
+		direction     = unsorted
+		count         = 0
+		withStore     = false
+		storeKey      = ""
+		withStoredist = false
+		storedistKey  = ""
+	)
+	for len(args) > 0 {
+		arg := args[0]
+		args = args[1:]
+		switch strings.ToUpper(arg) {
+		case "WITHCOORD":
+			withCoord = true
+		case "WITHDIST":
+			withDist = true
+		case "ASC":
+			direction = asc
+		case "DESC":
+			direction = desc
+		case "COUNT":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			n, err := strconv.Atoi(args[0])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			if n <= 0 {
+				setDirty(c)
+				c.WriteError("ERR COUNT must be > 0")
+				return
+			}
+			args = args[1:]
+			count = n
+		case "STORE":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			withStore = true
+			storeKey = args[0]
+			args = args[1:]
+		case "STOREDIST":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			withStoredist = true
+			storedistKey = args[0]
+			args = args[1:]
+		default:
+			setDirty(c)
+			c.WriteError("ERR syntax error")
+			return
+		}
+	}
+
+	if strings.ToUpper(cmd) == "GEORADIUS_RO" && (withStore || withStoredist) {
+		setDirty(c)
+		c.WriteError("ERR syntax error")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		if (withStore || withStoredist) && (withDist || withCoord) {
+			c.WriteError("ERR STORE option in GEORADIUS is not compatible with WITHDIST, WITHHASH and WITHCOORDS options")
+			return
+		}
+
+		db := m.db(ctx.selectedDB)
+		members := db.ssetElements(key)
+
+		matches := withinRadius(members, longitude, latitude, radius*toMeter)
+
+		// deal with ASC/DESC
+		if direction != unsorted {
+			sort.Slice(matches, func(i, j int) bool {
+				if direction == desc {
+					return matches[i].Distance > matches[j].Distance
+				}
+				return matches[i].Distance < matches[j].Distance
+			})
+		}
+
+		// deal with COUNT
+		if count > 0 && len(matches) > count {
+			matches = matches[:count]
+		}
+
+		// deal with "STORE x"
+		if withStore {
+			db.del(storeKey, true)
+			for _, member := range matches {
+				db.ssetAdd(storeKey, member.Score, member.Name)
+			}
+			c.WriteInt(len(matches))
+			return
+		}
+
+		// deal with "STOREDIST x"
+		if withStoredist {
+			db.del(storedistKey, true)
+			for _, member := range matches {
+				db.ssetAdd(storedistKey, member.Distance/toMeter, member.Name)
+			}
+			c.WriteInt(len(matches))
+			return
+		}
+
+		c.WriteLen(len(matches))
+		for _, member := range matches {
+			if !withDist && !withCoord {
+				c.WriteBulk(member.Name)
+				continue
+			}
+
+			len := 1
+			if withDist {
+				len++
+			}
+			if withCoord {
+				len++
+			}
+			c.WriteLen(len)
+			c.WriteBulk(member.Name)
+			if withDist {
+				c.WriteBulk(fmt.Sprintf("%.4f", member.Distance/toMeter))
+			}
+			if withCoord {
+				c.WriteLen(2)
+				c.WriteBulk(fmt.Sprintf("%f", member.Longitude))
+				c.WriteBulk(fmt.Sprintf("%f", member.Latitude))
+			}
+		}
+	})
+}
+
+// GEORADIUSBYMEMBER and GEORADIUSBYMEMBER_RO
+func (m *Miniredis) cmdGeoradiusbymember(c *server.Peer, cmd string, args []string) {
+	if len(args) < 4 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	member := args[1]
+
+	radius, err := strconv.ParseFloat(args[2], 64)
+	if err != nil || radius < 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	toMeter := parseUnit(args[3])
+	if toMeter == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	args = args[4:]
+
+	var (
+		withDist      = false
+		withCoord     = false
+		direction     = unsorted
+		count         = 0
+		withStore     = false
+		storeKey      = ""
+		withStoredist = false
+		storedistKey  = ""
+	)
+	for len(args) > 0 {
+		arg := args[0]
+		args = args[1:]
+		switch strings.ToUpper(arg) {
+		case "WITHCOORD":
+			withCoord = true
+		case "WITHDIST":
+			withDist = true
+		case "ASC":
+			direction = asc
+		case "DESC":
+			direction = desc
+		case "COUNT":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			n, err := strconv.Atoi(args[0])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			if n <= 0 {
+				setDirty(c)
+				c.WriteError("ERR COUNT must be > 0")
+				return
+			}
+			args = args[1:]
+			count = n
+		case "STORE":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			withStore = true
+			storeKey = args[0]
+			args = args[1:]
+		case "STOREDIST":
+			if len(args) == 0 {
+				setDirty(c)
+				c.WriteError("ERR syntax error")
+				return
+			}
+			withStoredist = true
+			storedistKey = args[0]
+			args = args[1:]
+		default:
+			setDirty(c)
+			c.WriteError("ERR syntax error")
+			return
+		}
+	}
+
+	if strings.ToUpper(cmd) == "GEORADIUSBYMEMBER_RO" && (withStore || withStoredist) {
+		setDirty(c)
+		c.WriteError("ERR syntax error")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		if (withStore || withStoredist) && (withDist || withCoord) {
+			c.WriteError("ERR STORE option in GEORADIUS is not compatible with WITHDIST, WITHHASH and WITHCOORDS options")
+			return
+		}
+
+		db := m.db(ctx.selectedDB)
+		if !db.exists(key) {
+			c.WriteNull()
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		// get position of member
+		if !db.ssetExists(key, member) {
+			c.WriteError("ERR could not decode requested zset member")
+			return
+		}
+		score := db.ssetScore(key, member)
+		longitude, latitude := fromGeohash(uint64(score))
+
+		members := db.ssetElements(key)
+		matches := withinRadius(members, longitude, latitude, radius*toMeter)
+
+		// deal with ASC/DESC
+		if direction != unsorted {
+			sort.Slice(matches, func(i, j int) bool {
+				if direction == desc {
+					return matches[i].Distance > matches[j].Distance
+				}
+				return matches[i].Distance < matches[j].Distance
+			})
+		}
+
+		// deal with COUNT
+		if count > 0 && len(matches) > count {
+			matches = matches[:count]
+		}
+
+		// deal with "STORE x"
+		if withStore {
+			db.del(storeKey, true)
+			for _, member := range matches {
+				db.ssetAdd(storeKey, member.Score, member.Name)
+			}
+			c.WriteInt(len(matches))
+			return
+		}
+
+		// deal with "STOREDIST x"
+		if withStoredist {
+			db.del(storedistKey, true)
+			for _, member := range matches {
+				db.ssetAdd(storedistKey, member.Distance/toMeter, member.Name)
+			}
+			c.WriteInt(len(matches))
+			return
+		}
+
+		c.WriteLen(len(matches))
+		for _, member := range matches {
+			if !withDist && !withCoord {
+				c.WriteBulk(member.Name)
+				continue
+			}
+
+			len := 1
+			if withDist {
+				len++
+			}
+			if withCoord {
+				len++
+			}
+			c.WriteLen(len)
+			c.WriteBulk(member.Name)
+			if withDist {
+				c.WriteBulk(fmt.Sprintf("%.4f", member.Distance/toMeter))
+			}
+			if withCoord {
+				c.WriteLen(2)
+				c.WriteBulk(fmt.Sprintf("%f", member.Longitude))
+				c.WriteBulk(fmt.Sprintf("%f", member.Latitude))
+			}
+		}
+	})
+}
+
+func withinRadius(members []ssElem, longitude, latitude, radius float64) []geoDistance {
+	matches := []geoDistance{}
+	for _, el := range members {
+		elLo, elLat := fromGeohash(uint64(el.score))
+		distanceInMeter := distance(latitude, longitude, elLat, elLo)
+
+		if distanceInMeter <= radius {
+			matches = append(matches, geoDistance{
+				Name:      el.member,
+				Score:     el.score,
+				Distance:  distanceInMeter,
+				Longitude: elLo,
+				Latitude:  elLat,
+			})
+		}
+	}
+	return matches
+}
+
+func parseUnit(u string) float64 {
+	switch u {
+	case "m":
+		return 1
+	case "km":
+		return 1000
+	case "mi":
+		return 1609.34
+	case "ft":
+		return 0.3048
+	default:
+		return 0
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_hash.go b/vendor/github.com/alicebob/miniredis/v2/cmd_hash.go
new file mode 100644
index 0000000000000..142ba63e161aa
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_hash.go
@@ -0,0 +1,651 @@
+// Commands from https://redis.io/commands#hash
+
+package miniredis
+
+import (
+	"math/big"
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsHash handles all hash value operations.
+func commandsHash(m *Miniredis) {
+	m.srv.Register("HDEL", m.cmdHdel)
+	m.srv.Register("HEXISTS", m.cmdHexists)
+	m.srv.Register("HGET", m.cmdHget)
+	m.srv.Register("HGETALL", m.cmdHgetall)
+	m.srv.Register("HINCRBY", m.cmdHincrby)
+	m.srv.Register("HINCRBYFLOAT", m.cmdHincrbyfloat)
+	m.srv.Register("HKEYS", m.cmdHkeys)
+	m.srv.Register("HLEN", m.cmdHlen)
+	m.srv.Register("HMGET", m.cmdHmget)
+	m.srv.Register("HMSET", m.cmdHmset)
+	m.srv.Register("HSET", m.cmdHset)
+	m.srv.Register("HSETNX", m.cmdHsetnx)
+	m.srv.Register("HSTRLEN", m.cmdHstrlen)
+	m.srv.Register("HVALS", m.cmdHvals)
+	m.srv.Register("HSCAN", m.cmdHscan)
+}
+
+// HSET
+func (m *Miniredis) cmdHset(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, pairs := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if len(pairs)%2 == 1 {
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		new := db.hashSet(key, pairs...)
+		c.WriteInt(new)
+	})
+}
+
+// HSETNX
+func (m *Miniredis) cmdHsetnx(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, field, value := args[0], args[1], args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		if _, ok := db.hashKeys[key]; !ok {
+			db.hashKeys[key] = map[string]string{}
+			db.keys[key] = "hash"
+		}
+		_, ok := db.hashKeys[key][field]
+		if ok {
+			c.WriteInt(0)
+			return
+		}
+		db.hashKeys[key][field] = value
+		db.keyVersion[key]++
+		c.WriteInt(1)
+	})
+}
+
+// HMSET
+func (m *Miniredis) cmdHmset(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+	if len(args)%2 != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		for len(args) > 0 {
+			field, value := args[0], args[1]
+			args = args[2:]
+			db.hashSet(key, field, value)
+		}
+		c.WriteOK()
+	})
+}
+
+// HGET
+func (m *Miniredis) cmdHget(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, field := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteNull()
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		value, ok := db.hashKeys[key][field]
+		if !ok {
+			c.WriteNull()
+			return
+		}
+		c.WriteBulk(value)
+	})
+}
+
+// HDEL
+func (m *Miniredis) cmdHdel(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, fields := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			// No key is zero deleted
+			c.WriteInt(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		deleted := 0
+		for _, f := range fields {
+			_, ok := db.hashKeys[key][f]
+			if !ok {
+				continue
+			}
+			delete(db.hashKeys[key], f)
+			deleted++
+		}
+		c.WriteInt(deleted)
+
+		// Nothing left. Remove the whole key.
+		if len(db.hashKeys[key]) == 0 {
+			db.del(key, true)
+		}
+	})
+}
+
+// HEXISTS
+func (m *Miniredis) cmdHexists(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, field := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteInt(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		if _, ok := db.hashKeys[key][field]; !ok {
+			c.WriteInt(0)
+			return
+		}
+		c.WriteInt(1)
+	})
+}
+
+// HGETALL
+func (m *Miniredis) cmdHgetall(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteMapLen(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteMapLen(len(db.hashKeys[key]))
+		for _, k := range db.hashFields(key) {
+			c.WriteBulk(k)
+			c.WriteBulk(db.hashGet(key, k))
+		}
+	})
+}
+
+// HKEYS
+func (m *Miniredis) cmdHkeys(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteLen(0)
+			return
+		}
+		if db.t(key) != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		fields := db.hashFields(key)
+		c.WriteLen(len(fields))
+		for _, f := range fields {
+			c.WriteBulk(f)
+		}
+	})
+}
+
+// HSTRLEN
+func (m *Miniredis) cmdHstrlen(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	hash, key := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[hash]
+		if !ok {
+			c.WriteInt(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		keys := db.hashKeys[hash]
+		c.WriteInt(len(keys[key]))
+	})
+}
+
+// HVALS
+func (m *Miniredis) cmdHvals(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteLen(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		vals := db.hashValues(key)
+		c.WriteLen(len(vals))
+		for _, v := range vals {
+			c.WriteBulk(v)
+		}
+	})
+}
+
+// HLEN
+func (m *Miniredis) cmdHlen(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteInt(0)
+			return
+		}
+		if t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteInt(len(db.hashKeys[key]))
+	})
+}
+
+// HMGET
+func (m *Miniredis) cmdHmget(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		f, ok := db.hashKeys[key]
+		if !ok {
+			f = map[string]string{}
+		}
+
+		c.WriteLen(len(args) - 1)
+		for _, k := range args[1:] {
+			v, ok := f[k]
+			if !ok {
+				c.WriteNull()
+				continue
+			}
+			c.WriteBulk(v)
+		}
+	})
+}
+
+// HINCRBY
+func (m *Miniredis) cmdHincrby(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, field, deltas := args[0], args[1], args[2]
+
+	delta, err := strconv.Atoi(deltas)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v, err := db.hashIncr(key, field, delta)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		c.WriteInt(v)
+	})
+}
+
+// HINCRBYFLOAT
+func (m *Miniredis) cmdHincrbyfloat(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, field, deltas := args[0], args[1], args[2]
+
+	delta, _, err := big.ParseFloat(deltas, 10, 128, 0)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidFloat)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "hash" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v, err := db.hashIncrfloat(key, field, delta)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		c.WriteBulk(formatBig(v))
+	})
+}
+
+// HSCAN
+func (m *Miniredis) cmdHscan(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	cursor, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidCursor)
+		return
+	}
+	args = args[2:]
+
+	// MATCH and COUNT options
+	var withMatch bool
+	var match string
+	for len(args) > 0 {
+		if strings.ToLower(args[0]) == "count" {
+			// we do nothing with count
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			_, err := strconv.Atoi(args[1])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			args = args[2:]
+			continue
+		}
+		if strings.ToLower(args[0]) == "match" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			withMatch = true
+			match, args = args[1], args[2:]
+			continue
+		}
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		// return _all_ (matched) keys every time
+
+		if cursor != 0 {
+			// Invalid cursor.
+			c.WriteLen(2)
+			c.WriteBulk("0") // no next cursor
+			c.WriteLen(0)    // no elements
+			return
+		}
+		if db.exists(key) && db.t(key) != "hash" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.hashFields(key)
+		if withMatch {
+			members, _ = matchKeys(members, match)
+		}
+
+		c.WriteLen(2)
+		c.WriteBulk("0") // no next cursor
+		// HSCAN gives key, values.
+		c.WriteLen(len(members) * 2)
+		for _, k := range members {
+			c.WriteBulk(k)
+			c.WriteBulk(db.hashGet(key, k))
+		}
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_list.go b/vendor/github.com/alicebob/miniredis/v2/cmd_list.go
new file mode 100644
index 0000000000000..90040a6211d9e
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_list.go
@@ -0,0 +1,727 @@
+// Commands from https://redis.io/commands#list
+
+package miniredis
+
+import (
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+type leftright int
+
+const (
+	left leftright = iota
+	right
+)
+
+// commandsList handles list commands (mostly L*)
+func commandsList(m *Miniredis) {
+	m.srv.Register("BLPOP", m.cmdBlpop)
+	m.srv.Register("BRPOP", m.cmdBrpop)
+	m.srv.Register("BRPOPLPUSH", m.cmdBrpoplpush)
+	m.srv.Register("LINDEX", m.cmdLindex)
+	m.srv.Register("LINSERT", m.cmdLinsert)
+	m.srv.Register("LLEN", m.cmdLlen)
+	m.srv.Register("LPOP", m.cmdLpop)
+	m.srv.Register("LPUSH", m.cmdLpush)
+	m.srv.Register("LPUSHX", m.cmdLpushx)
+	m.srv.Register("LRANGE", m.cmdLrange)
+	m.srv.Register("LREM", m.cmdLrem)
+	m.srv.Register("LSET", m.cmdLset)
+	m.srv.Register("LTRIM", m.cmdLtrim)
+	m.srv.Register("RPOP", m.cmdRpop)
+	m.srv.Register("RPOPLPUSH", m.cmdRpoplpush)
+	m.srv.Register("RPUSH", m.cmdRpush)
+	m.srv.Register("RPUSHX", m.cmdRpushx)
+}
+
+// BLPOP
+func (m *Miniredis) cmdBlpop(c *server.Peer, cmd string, args []string) {
+	m.cmdBXpop(c, cmd, args, left)
+}
+
+// BRPOP
+func (m *Miniredis) cmdBrpop(c *server.Peer, cmd string, args []string) {
+	m.cmdBXpop(c, cmd, args, right)
+}
+
+func (m *Miniredis) cmdBXpop(c *server.Peer, cmd string, args []string, lr leftright) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	timeoutS := args[len(args)-1]
+	keys := args[:len(args)-1]
+
+	timeout, err := strconv.Atoi(timeoutS)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidTimeout)
+		return
+	}
+	if timeout < 0 {
+		setDirty(c)
+		c.WriteError(msgNegTimeout)
+		return
+	}
+
+	blocking(
+		m,
+		c,
+		time.Duration(timeout)*time.Second,
+		func(c *server.Peer, ctx *connCtx) bool {
+			db := m.db(ctx.selectedDB)
+			for _, key := range keys {
+				if !db.exists(key) {
+					continue
+				}
+				if db.t(key) != "list" {
+					c.WriteError(msgWrongType)
+					return true
+				}
+
+				if len(db.listKeys[key]) == 0 {
+					continue
+				}
+				c.WriteLen(2)
+				c.WriteBulk(key)
+				var v string
+				switch lr {
+				case left:
+					v = db.listLpop(key)
+				case right:
+					v = db.listPop(key)
+				}
+				c.WriteBulk(v)
+				return true
+			}
+			return false
+		},
+		func(c *server.Peer) {
+			// timeout
+			c.WriteLen(-1)
+		},
+	)
+}
+
+// LINDEX
+func (m *Miniredis) cmdLindex(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, offsets := args[0], args[1]
+
+	offset, err := strconv.Atoi(offsets)
+	if err != nil || offsets == "-0" {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			// No such key
+			c.WriteNull()
+			return
+		}
+		if t != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		if offset < 0 {
+			offset = len(l) + offset
+		}
+		if offset < 0 || offset > len(l)-1 {
+			c.WriteNull()
+			return
+		}
+		c.WriteBulk(l[offset])
+	})
+}
+
+// LINSERT
+func (m *Miniredis) cmdLinsert(c *server.Peer, cmd string, args []string) {
+	if len(args) != 4 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	where := 0
+	switch strings.ToLower(args[1]) {
+	case "before":
+		where = -1
+	case "after":
+		where = +1
+	default:
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	pivot := args[2]
+	value := args[3]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			// No such key
+			c.WriteInt(0)
+			return
+		}
+		if t != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		for i, el := range l {
+			if el != pivot {
+				continue
+			}
+
+			if where < 0 {
+				l = append(l[:i], append(listKey{value}, l[i:]...)...)
+			} else {
+				if i == len(l)-1 {
+					l = append(l, value)
+				} else {
+					l = append(l[:i+1], append(listKey{value}, l[i+1:]...)...)
+				}
+			}
+			db.listKeys[key] = l
+			db.keyVersion[key]++
+			c.WriteInt(len(l))
+			return
+		}
+		c.WriteInt(-1)
+	})
+}
+
+// LLEN
+func (m *Miniredis) cmdLlen(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			// No such key. That's zero length.
+			c.WriteInt(0)
+			return
+		}
+		if t != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteInt(len(db.listKeys[key]))
+	})
+}
+
+// LPOP
+func (m *Miniredis) cmdLpop(c *server.Peer, cmd string, args []string) {
+	m.cmdXpop(c, cmd, args, left)
+}
+
+// RPOP
+func (m *Miniredis) cmdRpop(c *server.Peer, cmd string, args []string) {
+	m.cmdXpop(c, cmd, args, right)
+}
+
+func (m *Miniredis) cmdXpop(c *server.Peer, cmd string, args []string, lr leftright) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			// non-existing key is fine
+			c.WriteNull()
+			return
+		}
+		if db.t(key) != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		var elem string
+		switch lr {
+		case left:
+			elem = db.listLpop(key)
+		case right:
+			elem = db.listPop(key)
+		}
+		c.WriteBulk(elem)
+	})
+}
+
+// LPUSH
+func (m *Miniredis) cmdLpush(c *server.Peer, cmd string, args []string) {
+	m.cmdXpush(c, cmd, args, left)
+}
+
+// RPUSH
+func (m *Miniredis) cmdRpush(c *server.Peer, cmd string, args []string) {
+	m.cmdXpush(c, cmd, args, right)
+}
+
+func (m *Miniredis) cmdXpush(c *server.Peer, cmd string, args []string, lr leftright) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		var newLen int
+		for _, value := range args {
+			switch lr {
+			case left:
+				newLen = db.listLpush(key, value)
+			case right:
+				newLen = db.listPush(key, value)
+			}
+		}
+		c.WriteInt(newLen)
+	})
+}
+
+// LPUSHX
+func (m *Miniredis) cmdLpushx(c *server.Peer, cmd string, args []string) {
+	m.cmdXpushx(c, cmd, args, left)
+}
+
+// RPUSHX
+func (m *Miniredis) cmdRpushx(c *server.Peer, cmd string, args []string) {
+	m.cmdXpushx(c, cmd, args, right)
+}
+
+func (m *Miniredis) cmdXpushx(c *server.Peer, cmd string, args []string, lr leftright) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+		if db.t(key) != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		var newLen int
+		for _, value := range args {
+			switch lr {
+			case left:
+				newLen = db.listLpush(key, value)
+			case right:
+				newLen = db.listPush(key, value)
+			}
+		}
+		c.WriteInt(newLen)
+	})
+}
+
+// LRANGE
+func (m *Miniredis) cmdLrange(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	start, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	end, err := strconv.Atoi(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		if len(l) == 0 {
+			c.WriteLen(0)
+			return
+		}
+
+		rs, re := redisRange(len(l), start, end, false)
+		c.WriteLen(re - rs)
+		for _, el := range l[rs:re] {
+			c.WriteBulk(el)
+		}
+	})
+}
+
+// LREM
+func (m *Miniredis) cmdLrem(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	count, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	value := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+		if db.t(key) != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		if count < 0 {
+			reverseSlice(l)
+		}
+		deleted := 0
+		newL := []string{}
+		toDelete := len(l)
+		if count < 0 {
+			toDelete = -count
+		}
+		if count > 0 {
+			toDelete = count
+		}
+		for _, el := range l {
+			if el == value {
+				if toDelete > 0 {
+					deleted++
+					toDelete--
+					continue
+				}
+			}
+			newL = append(newL, el)
+		}
+		if count < 0 {
+			reverseSlice(newL)
+		}
+		if len(newL) == 0 {
+			db.del(key, true)
+		} else {
+			db.listKeys[key] = newL
+			db.keyVersion[key]++
+		}
+
+		c.WriteInt(deleted)
+	})
+}
+
+// LSET
+func (m *Miniredis) cmdLset(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	index, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	value := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteError(msgKeyNotFound)
+			return
+		}
+		if db.t(key) != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		if index < 0 {
+			index = len(l) + index
+		}
+		if index < 0 || index > len(l)-1 {
+			c.WriteError(msgOutOfRange)
+			return
+		}
+		l[index] = value
+		db.keyVersion[key]++
+
+		c.WriteOK()
+	})
+}
+
+// LTRIM
+func (m *Miniredis) cmdLtrim(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	start, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	end, err := strconv.Atoi(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			c.WriteOK()
+			return
+		}
+		if t != "list" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		l := db.listKeys[key]
+		rs, re := redisRange(len(l), start, end, false)
+		l = l[rs:re]
+		if len(l) == 0 {
+			db.del(key, true)
+		} else {
+			db.listKeys[key] = l
+			db.keyVersion[key]++
+		}
+		c.WriteOK()
+	})
+}
+
+// RPOPLPUSH
+func (m *Miniredis) cmdRpoplpush(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	src, dst := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(src) {
+			c.WriteNull()
+			return
+		}
+		if db.t(src) != "list" || (db.exists(dst) && db.t(dst) != "list") {
+			c.WriteError(msgWrongType)
+			return
+		}
+		elem := db.listPop(src)
+		db.listLpush(dst, elem)
+		c.WriteBulk(elem)
+	})
+}
+
+// BRPOPLPUSH
+func (m *Miniredis) cmdBrpoplpush(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	src := args[0]
+	dst := args[1]
+	timeout, err := strconv.Atoi(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidTimeout)
+		return
+	}
+	if timeout < 0 {
+		setDirty(c)
+		c.WriteError(msgNegTimeout)
+		return
+	}
+
+	blocking(
+		m,
+		c,
+		time.Duration(timeout)*time.Second,
+		func(c *server.Peer, ctx *connCtx) bool {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(src) {
+				return false
+			}
+			if db.t(src) != "list" || (db.exists(dst) && db.t(dst) != "list") {
+				c.WriteError(msgWrongType)
+				return true
+			}
+			if len(db.listKeys[src]) == 0 {
+				return false
+			}
+			elem := db.listPop(src)
+			db.listLpush(dst, elem)
+			c.WriteBulk(elem)
+			return true
+		},
+		func(c *server.Peer) {
+			// timeout
+			c.WriteLen(-1)
+		},
+	)
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_pubsub.go b/vendor/github.com/alicebob/miniredis/v2/cmd_pubsub.go
new file mode 100644
index 0000000000000..70997be5ab197
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_pubsub.go
@@ -0,0 +1,256 @@
+// Commands from https://redis.io/commands#pubsub
+
+package miniredis
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsPubsub handles all PUB/SUB operations.
+func commandsPubsub(m *Miniredis) {
+	m.srv.Register("SUBSCRIBE", m.cmdSubscribe)
+	m.srv.Register("UNSUBSCRIBE", m.cmdUnsubscribe)
+	m.srv.Register("PSUBSCRIBE", m.cmdPsubscribe)
+	m.srv.Register("PUNSUBSCRIBE", m.cmdPunsubscribe)
+	m.srv.Register("PUBLISH", m.cmdPublish)
+	m.srv.Register("PUBSUB", m.cmdPubSub)
+}
+
+// SUBSCRIBE
+func (m *Miniredis) cmdSubscribe(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		sub := m.subscribedState(c)
+		for _, channel := range args {
+			n := sub.Subscribe(channel)
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("subscribe")
+				w.WriteBulk(channel)
+				w.WriteInt(n)
+			})
+		}
+	})
+}
+
+// UNSUBSCRIBE
+func (m *Miniredis) cmdUnsubscribe(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	channels := args
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		sub := m.subscribedState(c)
+
+		if len(channels) == 0 {
+			channels = sub.Channels()
+		}
+
+		// there is no de-duplication
+		for _, channel := range channels {
+			n := sub.Unsubscribe(channel)
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("unsubscribe")
+				w.WriteBulk(channel)
+				w.WriteInt(n)
+			})
+		}
+		if len(channels) == 0 {
+			// special case: there is always a reply
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("unsubscribe")
+				w.WriteNull()
+				w.WriteInt(0)
+			})
+		}
+
+		if sub.Count() == 0 {
+			endSubscriber(m, c)
+		}
+	})
+}
+
+// PSUBSCRIBE
+func (m *Miniredis) cmdPsubscribe(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		sub := m.subscribedState(c)
+		for _, pat := range args {
+			n := sub.Psubscribe(pat)
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("psubscribe")
+				w.WriteBulk(pat)
+				w.WriteInt(n)
+			})
+		}
+	})
+}
+
+// PUNSUBSCRIBE
+func (m *Miniredis) cmdPunsubscribe(c *server.Peer, cmd string, args []string) {
+	if !m.handleAuth(c) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	patterns := args
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		sub := m.subscribedState(c)
+
+		if len(patterns) == 0 {
+			patterns = sub.Patterns()
+		}
+
+		// there is no de-duplication
+		for _, pat := range patterns {
+			n := sub.Punsubscribe(pat)
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("punsubscribe")
+				w.WriteBulk(pat)
+				w.WriteInt(n)
+			})
+		}
+		if len(patterns) == 0 {
+			// special case: there is always a reply
+			c.Block(func(w *server.Writer) {
+				w.WritePushLen(3)
+				w.WriteBulk("punsubscribe")
+				w.WriteNull()
+				w.WriteInt(0)
+			})
+		}
+
+		if sub.Count() == 0 {
+			endSubscriber(m, c)
+		}
+	})
+}
+
+// PUBLISH
+func (m *Miniredis) cmdPublish(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	channel, mesg := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		c.WriteInt(m.publish(channel, mesg))
+	})
+}
+
+// PUBSUB
+func (m *Miniredis) cmdPubSub(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	subcommand := strings.ToUpper(args[0])
+	subargs := args[1:]
+	var argsOk bool
+
+	switch subcommand {
+	case "CHANNELS":
+		argsOk = len(subargs) < 2
+	case "NUMSUB":
+		argsOk = true
+	case "NUMPAT":
+		argsOk = len(subargs) == 0
+	default:
+		argsOk = false
+	}
+
+	if !argsOk {
+		setDirty(c)
+		c.WriteError(fmt.Sprintf(msgFPubsubUsage, subcommand))
+		return
+	}
+
+	if !m.handleAuth(c) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		switch subcommand {
+		case "CHANNELS":
+			pat := ""
+			if len(subargs) == 1 {
+				pat = subargs[0]
+			}
+
+			allsubs := m.allSubscribers()
+			channels := activeChannels(allsubs, pat)
+
+			c.WriteLen(len(channels))
+			for _, channel := range channels {
+				c.WriteBulk(channel)
+			}
+
+		case "NUMSUB":
+			subs := m.allSubscribers()
+			c.WriteLen(len(subargs) * 2)
+			for _, channel := range subargs {
+				c.WriteBulk(channel)
+				c.WriteInt(countSubs(subs, channel))
+			}
+
+		case "NUMPAT":
+			c.WriteInt(countPsubs(m.allSubscribers()))
+		}
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_scripting.go b/vendor/github.com/alicebob/miniredis/v2/cmd_scripting.go
new file mode 100644
index 0000000000000..c30956b1ad416
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_scripting.go
@@ -0,0 +1,265 @@
+package miniredis
+
+import (
+	"crypto/sha1"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"strconv"
+	"strings"
+
+	luajson "github.com/alicebob/gopher-json"
+	lua "github.com/yuin/gopher-lua"
+	"github.com/yuin/gopher-lua/parse"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+func commandsScripting(m *Miniredis) {
+	m.srv.Register("EVAL", m.cmdEval)
+	m.srv.Register("EVALSHA", m.cmdEvalsha)
+	m.srv.Register("SCRIPT", m.cmdScript)
+}
+
+// Execute lua. Needs to run m.Lock()ed, from within withTx().
+func (m *Miniredis) runLuaScript(c *server.Peer, script string, args []string) {
+	l := lua.NewState(lua.Options{SkipOpenLibs: true})
+	defer l.Close()
+
+	// Taken from the go-lua manual
+	for _, pair := range []struct {
+		n string
+		f lua.LGFunction
+	}{
+		{lua.LoadLibName, lua.OpenPackage},
+		{lua.BaseLibName, lua.OpenBase},
+		{lua.CoroutineLibName, lua.OpenCoroutine},
+		{lua.TabLibName, lua.OpenTable},
+		{lua.StringLibName, lua.OpenString},
+		{lua.MathLibName, lua.OpenMath},
+		{lua.DebugLibName, lua.OpenDebug},
+	} {
+		if err := l.CallByParam(lua.P{
+			Fn:      l.NewFunction(pair.f),
+			NRet:    0,
+			Protect: true,
+		}, lua.LString(pair.n)); err != nil {
+			panic(err)
+		}
+	}
+
+	luajson.Preload(l)
+	requireGlobal(l, "cjson", "json")
+
+	// set global variable KEYS
+	keysTable := l.NewTable()
+	keysS, args := args[0], args[1:]
+	keysLen, err := strconv.Atoi(keysS)
+	if err != nil {
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	if keysLen < 0 {
+		c.WriteError(msgNegativeKeysNumber)
+		return
+	}
+	if keysLen > len(args) {
+		c.WriteError(msgInvalidKeysNumber)
+		return
+	}
+	keys, args := args[:keysLen], args[keysLen:]
+	for i, k := range keys {
+		l.RawSet(keysTable, lua.LNumber(i+1), lua.LString(k))
+	}
+	l.SetGlobal("KEYS", keysTable)
+
+	argvTable := l.NewTable()
+	for i, a := range args {
+		l.RawSet(argvTable, lua.LNumber(i+1), lua.LString(a))
+	}
+	l.SetGlobal("ARGV", argvTable)
+
+	redisFuncs := mkLuaFuncs(m.srv, c)
+	// Register command handlers
+	l.Push(l.NewFunction(func(l *lua.LState) int {
+		mod := l.RegisterModule("redis", redisFuncs).(*lua.LTable)
+		l.Push(mod)
+		return 1
+	}))
+
+	l.DoString(protectGlobals)
+
+	l.Push(lua.LString("redis"))
+	l.Call(1, 0)
+
+	if err := l.DoString(script); err != nil {
+		c.WriteError(errLuaParseError(err))
+		return
+	}
+
+	luaToRedis(l, c, l.Get(1))
+}
+
+func (m *Miniredis) cmdEval(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	script, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		m.runLuaScript(c, script, args)
+	})
+}
+
+func (m *Miniredis) cmdEvalsha(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	sha, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		script, ok := m.scripts[sha]
+		if !ok {
+			c.WriteError(msgNoScriptFound)
+			return
+		}
+
+		m.runLuaScript(c, script, args)
+	})
+}
+
+func (m *Miniredis) cmdScript(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if getCtx(c).nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+
+	subcmd, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		switch strings.ToLower(subcmd) {
+		case "load":
+			if len(args) != 1 {
+				c.WriteError(fmt.Sprintf(msgFScriptUsage, "LOAD"))
+				return
+			}
+			script := args[0]
+
+			if _, err := parse.Parse(strings.NewReader(script), "user_script"); err != nil {
+				c.WriteError(errLuaParseError(err))
+				return
+			}
+			sha := sha1Hex(script)
+			m.scripts[sha] = script
+			c.WriteBulk(sha)
+
+		case "exists":
+			c.WriteLen(len(args))
+			for _, arg := range args {
+				if _, ok := m.scripts[arg]; ok {
+					c.WriteInt(1)
+				} else {
+					c.WriteInt(0)
+				}
+			}
+
+		case "flush":
+			if len(args) != 0 {
+				c.WriteError(fmt.Sprintf(msgFScriptUsage, "FLUSH"))
+				return
+			}
+
+			m.scripts = map[string]string{}
+			c.WriteOK()
+
+		default:
+			c.WriteError(fmt.Sprintf(msgFScriptUsage, strings.ToUpper(subcmd)))
+		}
+	})
+}
+
+func sha1Hex(s string) string {
+	h := sha1.New()
+	io.WriteString(h, s)
+	return hex.EncodeToString(h.Sum(nil))
+}
+
+// requireGlobal imports module modName into the global namespace with the
+// identifier id.  panics if an error results from the function execution
+func requireGlobal(l *lua.LState, id, modName string) {
+	if err := l.CallByParam(lua.P{
+		Fn:      l.GetGlobal("require"),
+		NRet:    1,
+		Protect: true,
+	}, lua.LString(modName)); err != nil {
+		panic(err)
+	}
+	mod := l.Get(-1)
+	l.Pop(1)
+
+	l.SetGlobal(id, mod)
+}
+
+// the following script protects globals
+// it is based on:  http://metalua.luaforge.net/src/lib/strict.lua.html
+var protectGlobals = `
+local dbg=debug
+local mt = {}
+setmetatable(_G, mt)
+mt.__newindex = function (t, n, v)
+  if dbg.getinfo(2) then
+    local w = dbg.getinfo(2, "S").what
+    if w ~= "C" then
+      error("Script attempted to create global variable '"..tostring(n).."'", 2)
+    end
+  end
+  rawset(t, n, v)
+end
+mt.__index = function (t, n)
+  if dbg.getinfo(2) and dbg.getinfo(2, "S").what ~= "C" then
+    error("Script attempted to access nonexistent global variable '"..tostring(n).."'", 2)
+  end
+  return rawget(t, n)
+end
+debug = nil
+
+`
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_server.go b/vendor/github.com/alicebob/miniredis/v2/cmd_server.go
new file mode 100644
index 0000000000000..223651d39eb2f
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_server.go
@@ -0,0 +1,110 @@
+// Commands from https://redis.io/commands#server
+
+package miniredis
+
+import (
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+func commandsServer(m *Miniredis) {
+	m.srv.Register("DBSIZE", m.cmdDbsize)
+	m.srv.Register("FLUSHALL", m.cmdFlushall)
+	m.srv.Register("FLUSHDB", m.cmdFlushdb)
+	m.srv.Register("TIME", m.cmdTime)
+}
+
+// DBSIZE
+func (m *Miniredis) cmdDbsize(c *server.Peer, cmd string, args []string) {
+	if len(args) > 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		c.WriteInt(len(db.keys))
+	})
+}
+
+// FLUSHALL
+func (m *Miniredis) cmdFlushall(c *server.Peer, cmd string, args []string) {
+	if len(args) > 0 && strings.ToLower(args[0]) == "async" {
+		args = args[1:]
+	}
+	if len(args) > 0 {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		m.flushAll()
+		c.WriteOK()
+	})
+}
+
+// FLUSHDB
+func (m *Miniredis) cmdFlushdb(c *server.Peer, cmd string, args []string) {
+	if len(args) > 0 && strings.ToLower(args[0]) == "async" {
+		args = args[1:]
+	}
+	if len(args) > 0 {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		m.db(ctx.selectedDB).flush()
+		c.WriteOK()
+	})
+}
+
+// TIME
+func (m *Miniredis) cmdTime(c *server.Peer, cmd string, args []string) {
+	if len(args) > 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		now := m.effectiveNow()
+		nanos := now.UnixNano()
+		seconds := nanos / 1000000000
+		microseconds := (nanos / 1000) % 1000000
+
+		c.WriteLen(2)
+		c.WriteBulk(strconv.FormatInt(seconds, 10))
+		c.WriteBulk(strconv.FormatInt(microseconds, 10))
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_set.go b/vendor/github.com/alicebob/miniredis/v2/cmd_set.go
new file mode 100644
index 0000000000000..c562dab2c4829
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_set.go
@@ -0,0 +1,679 @@
+// Commands from https://redis.io/commands#set
+
+package miniredis
+
+import (
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsSet handles all set value operations.
+func commandsSet(m *Miniredis) {
+	m.srv.Register("SADD", m.cmdSadd)
+	m.srv.Register("SCARD", m.cmdScard)
+	m.srv.Register("SDIFF", m.cmdSdiff)
+	m.srv.Register("SDIFFSTORE", m.cmdSdiffstore)
+	m.srv.Register("SINTER", m.cmdSinter)
+	m.srv.Register("SINTERSTORE", m.cmdSinterstore)
+	m.srv.Register("SISMEMBER", m.cmdSismember)
+	m.srv.Register("SMEMBERS", m.cmdSmembers)
+	m.srv.Register("SMOVE", m.cmdSmove)
+	m.srv.Register("SPOP", m.cmdSpop)
+	m.srv.Register("SRANDMEMBER", m.cmdSrandmember)
+	m.srv.Register("SREM", m.cmdSrem)
+	m.srv.Register("SUNION", m.cmdSunion)
+	m.srv.Register("SUNIONSTORE", m.cmdSunionstore)
+	m.srv.Register("SSCAN", m.cmdSscan)
+}
+
+// SADD
+func (m *Miniredis) cmdSadd(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, elems := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		added := db.setAdd(key, elems...)
+		c.WriteInt(added)
+	})
+}
+
+// SCARD
+func (m *Miniredis) cmdScard(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.setMembers(key)
+		c.WriteInt(len(members))
+	})
+}
+
+// SDIFF
+func (m *Miniredis) cmdSdiff(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	keys := args
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setDiff(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		c.WriteSetLen(len(set))
+		for k := range set {
+			c.WriteBulk(k)
+		}
+	})
+}
+
+// SDIFFSTORE
+func (m *Miniredis) cmdSdiffstore(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	dest, keys := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setDiff(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		db.del(dest, true)
+		db.setSet(dest, set)
+		c.WriteInt(len(set))
+	})
+}
+
+// SINTER
+func (m *Miniredis) cmdSinter(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	keys := args
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setInter(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		c.WriteLen(len(set))
+		for k := range set {
+			c.WriteBulk(k)
+		}
+	})
+}
+
+// SINTERSTORE
+func (m *Miniredis) cmdSinterstore(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	dest, keys := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setInter(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		db.del(dest, true)
+		db.setSet(dest, set)
+		c.WriteInt(len(set))
+	})
+}
+
+// SISMEMBER
+func (m *Miniredis) cmdSismember(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, value := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		if db.setIsMember(key, value) {
+			c.WriteInt(1)
+			return
+		}
+		c.WriteInt(0)
+	})
+}
+
+// SMEMBERS
+func (m *Miniredis) cmdSmembers(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteSetLen(0)
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.setMembers(key)
+
+		c.WriteSetLen(len(members))
+		for _, elem := range members {
+			c.WriteBulk(elem)
+		}
+	})
+}
+
+// SMOVE
+func (m *Miniredis) cmdSmove(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	src, dst, member := args[0], args[1], args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(src) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(src) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		if db.exists(dst) && db.t(dst) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		if !db.setIsMember(src, member) {
+			c.WriteInt(0)
+			return
+		}
+		db.setRem(src, member)
+		db.setAdd(dst, member)
+		c.WriteInt(1)
+	})
+}
+
+// SPOP
+func (m *Miniredis) cmdSpop(c *server.Peer, cmd string, args []string) {
+	if len(args) == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		withCount := false
+		count := 1
+		if len(args) > 0 {
+			v, err := strconv.Atoi(args[0])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			if v < 0 {
+				setDirty(c)
+				c.WriteError(msgOutOfRange)
+				return
+			}
+			count = v
+			withCount = true
+			args = args[1:]
+		}
+		if len(args) > 0 {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+
+		if !db.exists(key) {
+			if !withCount {
+				c.WriteNull()
+				return
+			}
+			c.WriteLen(0)
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		var deleted []string
+		for i := 0; i < count; i++ {
+			members := db.setMembers(key)
+			if len(members) == 0 {
+				break
+			}
+			member := members[m.randIntn(len(members))]
+			db.setRem(key, member)
+			deleted = append(deleted, member)
+		}
+		// without `count` return a single value...
+		if !withCount {
+			if len(deleted) == 0 {
+				c.WriteNull()
+				return
+			}
+			c.WriteBulk(deleted[0])
+			return
+		}
+		// ... with `count` return a list
+		c.WriteLen(len(deleted))
+		for _, v := range deleted {
+			c.WriteBulk(v)
+		}
+	})
+}
+
+// SRANDMEMBER
+func (m *Miniredis) cmdSrandmember(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if len(args) > 2 {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	count := 0
+	withCount := false
+	if len(args) == 2 {
+		var err error
+		count, err = strconv.Atoi(args[1])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+		withCount = true
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteNull()
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.setMembers(key)
+		if count < 0 {
+			// Non-unique elements is allowed with negative count.
+			c.WriteLen(-count)
+			for count != 0 {
+				member := members[m.randIntn(len(members))]
+				c.WriteBulk(member)
+				count++
+			}
+			return
+		}
+
+		// Must be unique elements.
+		m.shuffle(members)
+		if count > len(members) {
+			count = len(members)
+		}
+		if !withCount {
+			c.WriteBulk(members[0])
+			return
+		}
+		c.WriteSetLen(count)
+		for i := range make([]struct{}, count) {
+			c.WriteBulk(members[i])
+		}
+	})
+}
+
+// SREM
+func (m *Miniredis) cmdSrem(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, fields := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		c.WriteInt(db.setRem(key, fields...))
+	})
+}
+
+// SUNION
+func (m *Miniredis) cmdSunion(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	keys := args
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setUnion(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		c.WriteLen(len(set))
+		for k := range set {
+			c.WriteBulk(k)
+		}
+	})
+}
+
+// SUNIONSTORE
+func (m *Miniredis) cmdSunionstore(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	dest, keys := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		set, err := db.setUnion(keys)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+
+		db.del(dest, true)
+		db.setSet(dest, set)
+		c.WriteInt(len(set))
+	})
+}
+
+// SSCAN
+func (m *Miniredis) cmdSscan(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	cursor, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidCursor)
+		return
+	}
+	args = args[2:]
+	// MATCH and COUNT options
+	var withMatch bool
+	var match string
+	for len(args) > 0 {
+		if strings.ToLower(args[0]) == "count" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			_, err := strconv.Atoi(args[1])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			// We do nothing with count.
+			args = args[2:]
+			continue
+		}
+		if strings.ToLower(args[0]) == "match" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			withMatch = true
+			match = args[1]
+			args = args[2:]
+			continue
+		}
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		// return _all_ (matched) keys every time
+
+		if cursor != 0 {
+			// invalid cursor
+			c.WriteLen(2)
+			c.WriteBulk("0") // no next cursor
+			c.WriteLen(0)    // no elements
+			return
+		}
+		if db.exists(key) && db.t(key) != "set" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.setMembers(key)
+		if withMatch {
+			members, _ = matchKeys(members, match)
+		}
+
+		c.WriteLen(2)
+		c.WriteBulk("0") // no next cursor
+		c.WriteLen(len(members))
+		for _, k := range members {
+			c.WriteBulk(k)
+		}
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_sorted_set.go b/vendor/github.com/alicebob/miniredis/v2/cmd_sorted_set.go
new file mode 100644
index 0000000000000..e654ef621efc6
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_sorted_set.go
@@ -0,0 +1,1465 @@
+// Commands from https://redis.io/commands#sorted_set
+
+package miniredis
+
+import (
+	"errors"
+	"sort"
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+var (
+	errInvalidRangeItem = errors.New(msgInvalidRangeItem)
+)
+
+// commandsSortedSet handles all sorted set operations.
+func commandsSortedSet(m *Miniredis) {
+	m.srv.Register("ZADD", m.cmdZadd)
+	m.srv.Register("ZCARD", m.cmdZcard)
+	m.srv.Register("ZCOUNT", m.cmdZcount)
+	m.srv.Register("ZINCRBY", m.cmdZincrby)
+	m.srv.Register("ZINTERSTORE", m.cmdZinterstore)
+	m.srv.Register("ZLEXCOUNT", m.cmdZlexcount)
+	m.srv.Register("ZRANGE", m.makeCmdZrange(false))
+	m.srv.Register("ZRANGEBYLEX", m.makeCmdZrangebylex(false))
+	m.srv.Register("ZRANGEBYSCORE", m.makeCmdZrangebyscore(false))
+	m.srv.Register("ZRANK", m.makeCmdZrank(false))
+	m.srv.Register("ZREM", m.cmdZrem)
+	m.srv.Register("ZREMRANGEBYLEX", m.cmdZremrangebylex)
+	m.srv.Register("ZREMRANGEBYRANK", m.cmdZremrangebyrank)
+	m.srv.Register("ZREMRANGEBYSCORE", m.cmdZremrangebyscore)
+	m.srv.Register("ZREVRANGE", m.makeCmdZrange(true))
+	m.srv.Register("ZREVRANGEBYLEX", m.makeCmdZrangebylex(true))
+	m.srv.Register("ZREVRANGEBYSCORE", m.makeCmdZrangebyscore(true))
+	m.srv.Register("ZREVRANK", m.makeCmdZrank(true))
+	m.srv.Register("ZSCORE", m.cmdZscore)
+	m.srv.Register("ZUNIONSTORE", m.cmdZunionstore)
+	m.srv.Register("ZSCAN", m.cmdZscan)
+	m.srv.Register("ZPOPMAX", m.cmdZpopmax(true))
+	m.srv.Register("ZPOPMIN", m.cmdZpopmax(false))
+}
+
+// ZADD
+func (m *Miniredis) cmdZadd(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+	var (
+		nx    = false
+		xx    = false
+		ch    = false
+		incr  = false
+		elems = map[string]float64{}
+	)
+
+outer:
+	for len(args) > 0 {
+		switch strings.ToUpper(args[0]) {
+		case "NX":
+			nx = true
+			args = args[1:]
+			continue
+		case "XX":
+			xx = true
+			args = args[1:]
+			continue
+		case "CH":
+			ch = true
+			args = args[1:]
+			continue
+		case "INCR":
+			incr = true
+			args = args[1:]
+			continue
+		default:
+			break outer
+		}
+	}
+
+	if len(args) == 0 || len(args)%2 != 0 {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	for len(args) > 0 {
+		score, err := strconv.ParseFloat(args[0], 64)
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidFloat)
+			return
+		}
+		elems[args[1]] = score
+		args = args[2:]
+	}
+
+	if xx && nx {
+		setDirty(c)
+		c.WriteError(msgXXandNX)
+		return
+	}
+
+	if incr && len(elems) > 1 {
+		setDirty(c)
+		c.WriteError(msgSingleElementPair)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		if incr {
+			for member, delta := range elems {
+				if nx && db.ssetExists(key, member) {
+					c.WriteNull()
+					return
+				}
+				if xx && !db.ssetExists(key, member) {
+					c.WriteNull()
+					return
+				}
+				newScore := db.ssetIncrby(key, member, delta)
+				c.WriteFloat(newScore)
+			}
+			return
+		}
+
+		res := 0
+		for member, score := range elems {
+			if nx && db.ssetExists(key, member) {
+				continue
+			}
+			if xx && !db.ssetExists(key, member) {
+				continue
+			}
+			old := db.ssetScore(key, member)
+			if db.ssetAdd(key, score, member) {
+				res++
+			} else {
+				if ch && old != score {
+					// if 'CH' is specified, only count changed keys
+					res++
+				}
+			}
+		}
+		c.WriteInt(res)
+	})
+}
+
+// ZCARD
+func (m *Miniredis) cmdZcard(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		c.WriteInt(db.ssetCard(key))
+	})
+}
+
+// ZCOUNT
+func (m *Miniredis) cmdZcount(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	min, minIncl, err := parseFloatRange(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidMinMax)
+		return
+	}
+	max, maxIncl, err := parseFloatRange(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidMinMax)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetElements(key)
+		members = withSSRange(members, min, minIncl, max, maxIncl)
+		c.WriteInt(len(members))
+	})
+}
+
+// ZINCRBY
+func (m *Miniredis) cmdZincrby(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	delta, err := strconv.ParseFloat(args[1], 64)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidFloat)
+		return
+	}
+	member := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if db.exists(key) && db.t(key) != "zset" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		newScore := db.ssetIncrby(key, member, delta)
+		c.WriteFloat(newScore)
+	})
+}
+
+// ZINTERSTORE
+func (m *Miniredis) cmdZinterstore(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	destination := args[0]
+	numKeys, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	args = args[2:]
+	if len(args) < numKeys {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if numKeys <= 0 {
+		setDirty(c)
+		c.WriteError("ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE")
+		return
+	}
+	keys := args[:numKeys]
+	args = args[numKeys:]
+
+	withWeights := false
+	weights := []float64{}
+	aggregate := "sum"
+	for len(args) > 0 {
+		switch strings.ToLower(args[0]) {
+		case "weights":
+			if len(args) < numKeys+1 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			for i := 0; i < numKeys; i++ {
+				f, err := strconv.ParseFloat(args[i+1], 64)
+				if err != nil {
+					setDirty(c)
+					c.WriteError("ERR weight value is not a float")
+					return
+				}
+				weights = append(weights, f)
+			}
+			withWeights = true
+			args = args[numKeys+1:]
+		case "aggregate":
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			aggregate = strings.ToLower(args[1])
+			switch aggregate {
+			case "sum", "min", "max":
+			default:
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			args = args[2:]
+		default:
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		db.del(destination, true)
+
+		// We collect everything and remove all keys which turned out not to be
+		// present in every set.
+		sset := map[string]float64{}
+		counts := map[string]int{}
+		for i, key := range keys {
+			if !db.exists(key) {
+				continue
+			}
+			if db.t(key) != "zset" {
+				c.WriteError(msgWrongType)
+				return
+			}
+			for _, el := range db.ssetElements(key) {
+				score := el.score
+				if withWeights {
+					score *= weights[i]
+				}
+				counts[el.member]++
+				old, ok := sset[el.member]
+				if !ok {
+					sset[el.member] = score
+					continue
+				}
+				switch aggregate {
+				default:
+					panic("Invalid aggregate")
+				case "sum":
+					sset[el.member] += score
+				case "min":
+					if score < old {
+						sset[el.member] = score
+					}
+				case "max":
+					if score > old {
+						sset[el.member] = score
+					}
+				}
+			}
+		}
+		for key, count := range counts {
+			if count != numKeys {
+				delete(sset, key)
+			}
+		}
+		db.ssetSet(destination, sset)
+		c.WriteInt(len(sset))
+	})
+}
+
+// ZLEXCOUNT
+func (m *Miniredis) cmdZlexcount(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	min, minIncl, err := parseLexrange(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+	max, maxIncl, err := parseLexrange(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetMembers(key)
+		// Just key sort. If scores are not the same we don't care.
+		sort.Strings(members)
+		members = withLexRange(members, min, minIncl, max, maxIncl)
+
+		c.WriteInt(len(members))
+	})
+}
+
+// ZRANGE and ZREVRANGE
+func (m *Miniredis) makeCmdZrange(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) < 3 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		key := args[0]
+		start, err := strconv.Atoi(args[1])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+		end, err := strconv.Atoi(args[2])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+
+		withScores := false
+		if len(args) > 4 {
+			c.WriteError(msgSyntaxError)
+			return
+		}
+		if len(args) == 4 {
+			if strings.ToLower(args[3]) != "withscores" {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			withScores = true
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteLen(0)
+				return
+			}
+
+			if db.t(key) != "zset" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			members := db.ssetMembers(key)
+			if reverse {
+				reverseSlice(members)
+			}
+			rs, re := redisRange(len(members), start, end, false)
+			if withScores {
+				c.WriteLen((re - rs) * 2)
+			} else {
+				c.WriteLen(re - rs)
+			}
+			for _, el := range members[rs:re] {
+				c.WriteBulk(el)
+				if withScores {
+					c.WriteFloat(db.ssetScore(key, el))
+				}
+			}
+		})
+	}
+}
+
+// ZRANGEBYLEX and ZREVRANGEBYLEX
+func (m *Miniredis) makeCmdZrangebylex(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) < 3 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		key := args[0]
+		min, minIncl, err := parseLexrange(args[1])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(err.Error())
+			return
+		}
+		max, maxIncl, err := parseLexrange(args[2])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(err.Error())
+			return
+		}
+		args = args[3:]
+
+		withLimit := false
+		limitStart := 0
+		limitEnd := 0
+		for len(args) > 0 {
+			if strings.ToLower(args[0]) == "limit" {
+				withLimit = true
+				args = args[1:]
+				if len(args) < 2 {
+					c.WriteError(msgSyntaxError)
+					return
+				}
+				limitStart, err = strconv.Atoi(args[0])
+				if err != nil {
+					setDirty(c)
+					c.WriteError(msgInvalidInt)
+					return
+				}
+				limitEnd, err = strconv.Atoi(args[1])
+				if err != nil {
+					setDirty(c)
+					c.WriteError(msgInvalidInt)
+					return
+				}
+				args = args[2:]
+				continue
+			}
+			// Syntax error
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteLen(0)
+				return
+			}
+
+			if db.t(key) != "zset" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			members := db.ssetMembers(key)
+			// Just key sort. If scores are not the same we don't care.
+			sort.Strings(members)
+			if reverse {
+				min, max = max, min
+				minIncl, maxIncl = maxIncl, minIncl
+			}
+			members = withLexRange(members, min, minIncl, max, maxIncl)
+			if reverse {
+				reverseSlice(members)
+			}
+
+			// Apply LIMIT ranges. That's <start> <elements>. Unlike RANGE.
+			if withLimit {
+				if limitStart < 0 {
+					members = nil
+				} else {
+					if limitStart < len(members) {
+						members = members[limitStart:]
+					} else {
+						// out of range
+						members = nil
+					}
+					if limitEnd >= 0 {
+						if len(members) > limitEnd {
+							members = members[:limitEnd]
+						}
+					}
+				}
+			}
+
+			c.WriteLen(len(members))
+			for _, el := range members {
+				c.WriteBulk(el)
+			}
+		})
+	}
+}
+
+// ZRANGEBYSCORE and ZREVRANGEBYSCORE
+func (m *Miniredis) makeCmdZrangebyscore(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) < 3 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		key := args[0]
+		min, minIncl, err := parseFloatRange(args[1])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidMinMax)
+			return
+		}
+		max, maxIncl, err := parseFloatRange(args[2])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidMinMax)
+			return
+		}
+		args = args[3:]
+
+		withScores := false
+		withLimit := false
+		limitStart := 0
+		limitEnd := 0
+		for len(args) > 0 {
+			if strings.ToLower(args[0]) == "limit" {
+				withLimit = true
+				args = args[1:]
+				if len(args) < 2 {
+					c.WriteError(msgSyntaxError)
+					return
+				}
+				limitStart, err = strconv.Atoi(args[0])
+				if err != nil {
+					setDirty(c)
+					c.WriteError(msgInvalidInt)
+					return
+				}
+				limitEnd, err = strconv.Atoi(args[1])
+				if err != nil {
+					setDirty(c)
+					c.WriteError(msgInvalidInt)
+					return
+				}
+				args = args[2:]
+				continue
+			}
+			if strings.ToLower(args[0]) == "withscores" {
+				withScores = true
+				args = args[1:]
+				continue
+			}
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteLen(0)
+				return
+			}
+
+			if db.t(key) != "zset" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			members := db.ssetElements(key)
+			if reverse {
+				min, max = max, min
+				minIncl, maxIncl = maxIncl, minIncl
+			}
+			members = withSSRange(members, min, minIncl, max, maxIncl)
+			if reverse {
+				reverseElems(members)
+			}
+
+			// Apply LIMIT ranges. That's <start> <elements>. Unlike RANGE.
+			if withLimit {
+				if limitStart < 0 {
+					members = ssElems{}
+				} else {
+					if limitStart < len(members) {
+						members = members[limitStart:]
+					} else {
+						// out of range
+						members = ssElems{}
+					}
+					if limitEnd >= 0 {
+						if len(members) > limitEnd {
+							members = members[:limitEnd]
+						}
+					}
+				}
+			}
+
+			if withScores {
+				c.WriteLen(len(members) * 2)
+			} else {
+				c.WriteLen(len(members))
+			}
+			for _, el := range members {
+				c.WriteBulk(el.member)
+				if withScores {
+					c.WriteFloat(el.score)
+				}
+			}
+		})
+	}
+}
+
+// ZRANK and ZREVRANK
+func (m *Miniredis) makeCmdZrank(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) != 2 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		key, member := args[0], args[1]
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteNull()
+				return
+			}
+
+			if db.t(key) != "zset" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			direction := asc
+			if reverse {
+				direction = desc
+			}
+			rank, ok := db.ssetRank(key, member, direction)
+			if !ok {
+				c.WriteNull()
+				return
+			}
+			c.WriteInt(rank)
+		})
+	}
+}
+
+// ZREM
+func (m *Miniredis) cmdZrem(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, members := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		deleted := 0
+		for _, member := range members {
+			if db.ssetRem(key, member) {
+				deleted++
+			}
+		}
+		c.WriteInt(deleted)
+	})
+}
+
+// ZREMRANGEBYLEX
+func (m *Miniredis) cmdZremrangebylex(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	min, minIncl, err := parseLexrange(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+	max, maxIncl, err := parseLexrange(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetMembers(key)
+		// Just key sort. If scores are not the same we don't care.
+		sort.Strings(members)
+		members = withLexRange(members, min, minIncl, max, maxIncl)
+
+		for _, el := range members {
+			db.ssetRem(key, el)
+		}
+		c.WriteInt(len(members))
+	})
+}
+
+// ZREMRANGEBYRANK
+func (m *Miniredis) cmdZremrangebyrank(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	start, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	end, err := strconv.Atoi(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetMembers(key)
+		rs, re := redisRange(len(members), start, end, false)
+		for _, el := range members[rs:re] {
+			db.ssetRem(key, el)
+		}
+		c.WriteInt(re - rs)
+	})
+}
+
+// ZREMRANGEBYSCORE
+func (m *Miniredis) cmdZremrangebyscore(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	min, minIncl, err := parseFloatRange(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidMinMax)
+		return
+	}
+	max, maxIncl, err := parseFloatRange(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidMinMax)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetElements(key)
+		members = withSSRange(members, min, minIncl, max, maxIncl)
+
+		for _, el := range members {
+			db.ssetRem(key, el.member)
+		}
+		c.WriteInt(len(members))
+	})
+}
+
+// ZSCORE
+func (m *Miniredis) cmdZscore(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, member := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteNull()
+			return
+		}
+
+		if db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		if !db.ssetExists(key, member) {
+			c.WriteNull()
+			return
+		}
+
+		c.WriteFloat(db.ssetScore(key, member))
+	})
+}
+
+// parseFloatRange handles ZRANGEBYSCORE floats. They are inclusive unless the
+// string starts with '('
+func parseFloatRange(s string) (float64, bool, error) {
+	if len(s) == 0 {
+		return 0, false, nil
+	}
+	inclusive := true
+	if s[0] == '(' {
+		s = s[1:]
+		inclusive = false
+	}
+	f, err := strconv.ParseFloat(s, 64)
+	return f, inclusive, err
+}
+
+// parseLexrange handles ZRANGEBYLEX ranges. They start with '[', '(', or are
+// '+' or '-'.
+// Returns range, inclusive, error.
+// On '+' or '-' that's just returned.
+func parseLexrange(s string) (string, bool, error) {
+	if len(s) == 0 {
+		return "", false, errInvalidRangeItem
+	}
+	if s == "+" || s == "-" {
+		return s, false, nil
+	}
+	switch s[0] {
+	case '(':
+		return s[1:], false, nil
+	case '[':
+		return s[1:], true, nil
+	default:
+		return "", false, errInvalidRangeItem
+	}
+}
+
+// withSSRange limits a list of sorted set elements by the ZRANGEBYSCORE range
+// logic.
+func withSSRange(members ssElems, min float64, minIncl bool, max float64, maxIncl bool) ssElems {
+	gt := func(a, b float64) bool { return a > b }
+	gteq := func(a, b float64) bool { return a >= b }
+
+	mincmp := gt
+	if minIncl {
+		mincmp = gteq
+	}
+	for i, m := range members {
+		if mincmp(m.score, min) {
+			members = members[i:]
+			goto checkmax
+		}
+	}
+	// all elements were smaller
+	return nil
+
+checkmax:
+	maxcmp := gteq
+	if maxIncl {
+		maxcmp = gt
+	}
+	for i, m := range members {
+		if maxcmp(m.score, max) {
+			members = members[:i]
+			break
+		}
+	}
+
+	return members
+}
+
+// withLexRange limits a list of sorted set elements.
+func withLexRange(members []string, min string, minIncl bool, max string, maxIncl bool) []string {
+	if max == "-" || min == "+" {
+		return nil
+	}
+	if min != "-" {
+		found := false
+		if minIncl {
+			for i, m := range members {
+				if m >= min {
+					members = members[i:]
+					found = true
+					break
+				}
+			}
+		} else {
+			// Excluding min
+			for i, m := range members {
+				if m > min {
+					members = members[i:]
+					found = true
+					break
+				}
+			}
+		}
+		if !found {
+			return nil
+		}
+	}
+	if max != "+" {
+		if maxIncl {
+			for i, m := range members {
+				if m > max {
+					members = members[:i]
+					break
+				}
+			}
+		} else {
+			// Excluding max
+			for i, m := range members {
+				if m >= max {
+					members = members[:i]
+					break
+				}
+			}
+		}
+	}
+	return members
+}
+
+// ZUNIONSTORE
+func (m *Miniredis) cmdZunionstore(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	destination := args[0]
+	numKeys, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	args = args[2:]
+	if len(args) < numKeys {
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+	if numKeys <= 0 {
+		setDirty(c)
+		c.WriteError("ERR at least 1 input key is needed for ZUNIONSTORE/ZINTERSTORE")
+		return
+	}
+	keys := args[:numKeys]
+	args = args[numKeys:]
+
+	withWeights := false
+	weights := []float64{}
+	aggregate := "sum"
+	for len(args) > 0 {
+		switch strings.ToLower(args[0]) {
+		case "weights":
+			if len(args) < numKeys+1 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			for i := 0; i < numKeys; i++ {
+				f, err := strconv.ParseFloat(args[i+1], 64)
+				if err != nil {
+					setDirty(c)
+					c.WriteError("ERR weight value is not a float")
+					return
+				}
+				weights = append(weights, f)
+			}
+			withWeights = true
+			args = args[numKeys+1:]
+		case "aggregate":
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			aggregate = strings.ToLower(args[1])
+			switch aggregate {
+			default:
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			case "sum", "min", "max":
+			}
+			args = args[2:]
+		default:
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		deleteDest := true
+		for _, key := range keys {
+			if destination == key {
+				deleteDest = false
+			}
+		}
+		if deleteDest {
+			db.del(destination, true)
+		}
+
+		sset := sortedSet{}
+		for i, key := range keys {
+			if !db.exists(key) {
+				continue
+			}
+
+			var set map[string]float64
+			switch db.t(key) {
+			case "set":
+				set = map[string]float64{}
+				for elem := range db.setKeys[key] {
+					set[elem] = 1.0
+				}
+			case "zset":
+				set = db.sortedSet(key)
+			default:
+				c.WriteError(msgWrongType)
+				return
+			}
+
+			for member, score := range set {
+				if withWeights {
+					score *= weights[i]
+				}
+				old, ok := sset[member]
+				if !ok {
+					sset[member] = score
+					continue
+				}
+				switch aggregate {
+				default:
+					panic("Invalid aggregate")
+				case "sum":
+					sset[member] += score
+				case "min":
+					if score < old {
+						sset[member] = score
+					}
+				case "max":
+					if score > old {
+						sset[member] = score
+					}
+				}
+			}
+		}
+		db.ssetSet(destination, sset)
+		c.WriteInt(sset.card())
+	})
+}
+
+// ZSCAN
+func (m *Miniredis) cmdZscan(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	cursor, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidCursor)
+		return
+	}
+	args = args[2:]
+	// MATCH and COUNT options
+	var withMatch bool
+	var match string
+	for len(args) > 0 {
+		if strings.ToLower(args[0]) == "count" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			_, err := strconv.Atoi(args[1])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			// We do nothing with count.
+			args = args[2:]
+			continue
+		}
+		if strings.ToLower(args[0]) == "match" {
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			withMatch = true
+			match = args[1]
+			args = args[2:]
+			continue
+		}
+		setDirty(c)
+		c.WriteError(msgSyntaxError)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		// Paging is not implementend, all results are returned for cursor 0.
+		if cursor != 0 {
+			// Invalid cursor.
+			c.WriteLen(2)
+			c.WriteBulk("0") // no next cursor
+			c.WriteLen(0)    // no elements
+			return
+		}
+		if db.exists(key) && db.t(key) != "zset" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		members := db.ssetMembers(key)
+		if withMatch {
+			members, _ = matchKeys(members, match)
+		}
+
+		c.WriteLen(2)
+		c.WriteBulk("0") // no next cursor
+		// HSCAN gives key, values.
+		c.WriteLen(len(members) * 2)
+		for _, k := range members {
+			c.WriteBulk(k)
+			c.WriteFloat(db.ssetScore(key, k))
+		}
+	})
+}
+
+// ZPOPMAX and ZPOPMIN
+func (m *Miniredis) cmdZpopmax(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) < 1 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+
+		key := args[0]
+		count := 1
+		var err error
+		if len(args) > 1 {
+			count, err = strconv.Atoi(args[1])
+
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+		}
+
+		withScores := true
+		if len(args) > 2 {
+			c.WriteError(msgSyntaxError)
+			return
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteLen(0)
+				return
+			}
+
+			if db.t(key) != "zset" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			members := db.ssetMembers(key)
+			if reverse {
+				reverseSlice(members)
+			}
+			rs, re := redisRange(len(members), 0, count-1, false)
+			if withScores {
+				c.WriteLen((re - rs) * 2)
+			} else {
+				c.WriteLen(re - rs)
+			}
+			for _, el := range members[rs:re] {
+				c.WriteBulk(el)
+				if withScores {
+					c.WriteFloat(db.ssetScore(key, el))
+				}
+				db.ssetRem(key, el)
+			}
+		})
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_stream.go b/vendor/github.com/alicebob/miniredis/v2/cmd_stream.go
new file mode 100644
index 0000000000000..8efcf5d03eb81
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_stream.go
@@ -0,0 +1,625 @@
+// Commands from https://redis.io/commands#stream
+
+package miniredis
+
+import (
+	"errors"
+	"fmt"
+	"strconv"
+	"strings"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsStream handles all stream operations.
+func commandsStream(m *Miniredis) {
+	m.srv.Register("XADD", m.cmdXadd)
+	m.srv.Register("XLEN", m.cmdXlen)
+	m.srv.Register("XREAD", m.cmdXread)
+	m.srv.Register("XRANGE", m.makeCmdXrange(false))
+	m.srv.Register("XREVRANGE", m.makeCmdXrange(true))
+	m.srv.Register("XGROUP", m.cmdXgroup)
+	m.srv.Register("XINFO", m.cmdXinfo)
+	m.srv.Register("XREADGROUP", m.cmdXreadgroup)
+	m.srv.Register("XACK", m.cmdXack)
+	m.srv.Register("XDEL", m.cmdXdel)
+}
+
+// XADD
+func (m *Miniredis) cmdXadd(c *server.Peer, cmd string, args []string) {
+	if len(args) < 4 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+
+		maxlen := -1
+		if strings.ToLower(args[0]) == "maxlen" {
+			args = args[1:]
+			// we don't treat "~" special
+			if args[0] == "~" {
+				args = args[1:]
+			}
+			n, err := strconv.Atoi(args[0])
+			if err != nil {
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			if n < 0 {
+				c.WriteError("ERR The MAXLEN argument must be >= 0.")
+				return
+			}
+			maxlen = n
+			args = args[1:]
+		}
+		if len(args) < 1 {
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		entryID, args := args[0], args[1:]
+
+		// args must be composed of field/value pairs.
+		if len(args) == 0 || len(args)%2 != 0 {
+			c.WriteError("ERR wrong number of arguments for XADD") // non-default message
+			return
+		}
+
+		var values []string
+		for len(args) > 0 {
+			values = append(values, args[0], args[1])
+			args = args[2:]
+		}
+
+		db := m.db(ctx.selectedDB)
+		if db.exists(key) && db.t(key) != "stream" {
+			c.WriteError(ErrWrongType.Error())
+			return
+		}
+
+		newID, err := db.streamAdd(key, entryID, values)
+		if err != nil {
+			switch err {
+			case errInvalidEntryID:
+				c.WriteError(msgInvalidStreamID)
+			case errZeroStreamValue:
+				c.WriteError(msgStreamIDZero)
+			case errInvalidStreamValue:
+				c.WriteError(msgStreamIDTooSmall)
+			default:
+				c.WriteError(err.Error())
+			}
+			return
+		}
+
+		if maxlen >= 0 {
+			db.streamMaxlen(key, maxlen)
+		}
+
+		c.WriteBulk(newID)
+	})
+}
+
+// XLEN
+func (m *Miniredis) cmdXlen(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		t, ok := db.keys[key]
+		if !ok {
+			// No such key. That's zero length.
+			c.WriteInt(0)
+			return
+		}
+		if t != "stream" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteInt(len(db.streamKeys[key]))
+	})
+}
+
+// XRANGE and XREVRANGE
+func (m *Miniredis) makeCmdXrange(reverse bool) server.Cmd {
+	return func(c *server.Peer, cmd string, args []string) {
+		if len(args) < 3 {
+			setDirty(c)
+			c.WriteError(errWrongNumber(cmd))
+			return
+		}
+		if len(args) == 4 || len(args) > 5 {
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+		if !m.handleAuth(c) {
+			return
+		}
+		if m.checkPubsub(c, cmd) {
+			return
+		}
+
+		var (
+			key      = args[0]
+			startKey = args[1]
+			endKey   = args[2]
+		)
+
+		countArg := "0"
+		if len(args) == 5 {
+			if strings.ToLower(args[3]) != "count" {
+				setDirty(c)
+				c.WriteError(msgSyntaxError)
+				return
+			}
+			countArg = args[4]
+		}
+
+		withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+
+			start, err := formatStreamRangeBound(startKey, true, reverse)
+			if err != nil {
+				c.WriteError(msgInvalidStreamID)
+				return
+			}
+			end, err := formatStreamRangeBound(endKey, false, reverse)
+			if err != nil {
+				c.WriteError(msgInvalidStreamID)
+				return
+			}
+			count, err := strconv.Atoi(countArg)
+			if err != nil {
+				c.WriteError(msgInvalidInt)
+				return
+			}
+
+			db := m.db(ctx.selectedDB)
+
+			if !db.exists(key) {
+				c.WriteLen(0)
+				return
+			}
+
+			if db.t(key) != "stream" {
+				c.WriteError(ErrWrongType.Error())
+				return
+			}
+
+			var entries = db.streamKeys[key]
+			if reverse {
+				entries = reversedStreamEntries(entries)
+			}
+			if count == 0 {
+				count = len(entries)
+			}
+
+			returnedEntries := make([]StreamEntry, 0, count)
+
+			for _, entry := range entries {
+				if len(returnedEntries) == count {
+					break
+				}
+
+				if !reverse {
+					// Break if entry ID > end
+					if streamCmp(entry.ID, end) == 1 {
+						break
+					}
+
+					// Continue if entry ID < start
+					if streamCmp(entry.ID, start) == -1 {
+						continue
+					}
+				} else {
+					// Break if entry iD < end
+					if streamCmp(entry.ID, end) == -1 {
+						break
+					}
+
+					// Continue if entry ID > start.
+					if streamCmp(entry.ID, start) == 1 {
+						continue
+					}
+				}
+
+				returnedEntries = append(returnedEntries, entry)
+			}
+
+			c.WriteLen(len(returnedEntries))
+			for _, entry := range returnedEntries {
+				c.WriteLen(2)
+				c.WriteBulk(entry.ID)
+				c.WriteLen(len(entry.Values))
+				for _, v := range entry.Values {
+					c.WriteBulk(v)
+				}
+			}
+		})
+	}
+}
+
+// XGROUP
+func (m *Miniredis) cmdXgroup(c *server.Peer, cmd string, args []string) {
+	if (len(args) == 4 || len(args) == 5) && strings.ToUpper(args[0]) == "CREATE" {
+		m.cmdXgroupCreate(c, cmd, args)
+	} else {
+		j := strings.Join(args, " ")
+		err := fmt.Sprintf("ERR 'XGROUP %s' not supported", j)
+		setDirty(c)
+		c.WriteError(err)
+	}
+}
+
+// XGROUP CREATE
+func (m *Miniredis) cmdXgroupCreate(c *server.Peer, cmd string, args []string) {
+	stream, group, id := args[1], args[2], args[3]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if len(args) == 5 && strings.ToUpper(args[4]) == "MKSTREAM" {
+			db.streamCreate(stream)
+		}
+
+		if err := db.streamGroupCreate(stream, group, id); err != nil {
+			c.WriteError(fmt.Sprintf("ERR %s", err.Error()))
+			return
+		}
+
+		c.WriteOK()
+	})
+}
+
+// XINFO
+func (m *Miniredis) cmdXinfo(c *server.Peer, cmd string, args []string) {
+	if len(args) == 2 && strings.ToUpper(args[0]) == "STREAM" {
+		m.cmdXinfoStream(c, args[1])
+		return
+	}
+
+	j := strings.Join(args, " ")
+	err := fmt.Sprintf("'XINFO %s' not supported", j)
+	setDirty(c)
+	c.WriteError(err)
+}
+
+// XINFO STREAM
+// Produces only part of full command output
+func (m *Miniredis) cmdXinfoStream(c *server.Peer, stream string) {
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		streamLen, err := db.streamLen(stream)
+		if err != nil {
+			c.WriteError(fmt.Sprintf("ERR %s", err.Error()))
+			return
+		}
+
+		c.WriteMapLen(1)
+		c.WriteBulk("length")
+		c.WriteInt(streamLen)
+	})
+}
+
+// XREADGROUP
+// NOACK is not supported, BLOCK is not supported
+func (m *Miniredis) cmdXreadgroup(c *server.Peer, cmd string, args []string) {
+	// XREADGROUP GROUP group consumer STREAMS key ID
+	if len(args) < 6 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	if strings.ToUpper(args[0]) != "GROUP" {
+		setDirty(c)
+		c.WriteError("ERR incorrect command")
+		return
+	}
+
+	group, consumer, args := args[1], args[2], args[3:]
+
+	var count int
+	var err error
+	streams := make([]string, 0)
+	ids := make([]string, 0)
+
+parsing:
+	for len(args) > 0 {
+		switch strings.ToUpper(args[0]) {
+		case "COUNT":
+			if len(args) < 2 {
+				err = errors.New(errWrongNumber(cmd))
+				break parsing
+			}
+
+			count, err = strconv.Atoi(args[1])
+			if err != nil {
+				break parsing
+			}
+
+			args = args[2:]
+		case "BLOCK":
+			if len(args) < 2 {
+				err = errors.New(errWrongNumber(cmd))
+				break parsing
+			}
+			args = args[2:]
+		case "NOACK":
+			args = args[1:]
+		case "STREAMS":
+			args = args[1:]
+
+			if len(args)%2 != 0 {
+				err = errors.New(errWrongNumber(cmd))
+				break parsing
+			}
+
+			streams, ids = args[0:len(args)/2], args[len(args)/2:]
+			break parsing
+		default:
+			err = fmt.Errorf("ERR incorrect argument %s", args[0])
+			break parsing
+		}
+	}
+
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+
+	if len(streams) == 0 || len(ids) == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		res := make(map[string][]StreamEntry)
+
+		db := m.db(ctx.selectedDB)
+
+		for i := range streams {
+			stream := streams[i]
+			id := ids[i]
+
+			entries, err := db.streamRead(stream, group, consumer, id, count)
+			if err != nil {
+				c.WriteError(err.Error())
+				return
+			}
+
+			if len(entries) == 0 {
+				continue
+			}
+
+			res[stream] = entries
+		}
+
+		if len(res) == 0 {
+			c.WriteLen(-1)
+			return
+		}
+
+		c.WriteLen(len(res))
+
+		for _, stream := range streams {
+			entries, ok := res[stream]
+			if !ok {
+				continue
+			}
+
+			c.WriteLen(2)
+			c.WriteBulk(stream)
+
+			c.WriteLen(len(entries))
+
+			for _, entry := range entries {
+				c.WriteLen(2)
+				c.WriteBulk(entry.ID)
+				c.WriteLen(len(entry.Values))
+
+				for _, v := range entry.Values {
+					c.WriteBulk(v)
+				}
+			}
+		}
+	})
+}
+
+// XACK
+func (m *Miniredis) cmdXack(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	stream, group, args := args[0], args[1], args[2:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		cnt, err := db.streamAck(stream, group, args)
+		if err != nil {
+			c.WriteError(fmt.Sprintf("ERR %s", err.Error()))
+			return
+		}
+
+		c.WriteInt(cnt)
+	})
+}
+
+// XDEL
+func (m *Miniredis) cmdXdel(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+
+	stream, args := args[0], args[1:]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+		cnt, err := db.streamDelete(stream, args)
+		if err != nil {
+			c.WriteError(fmt.Sprintf("ERR %s", err.Error()))
+			return
+		}
+
+		c.WriteInt(cnt)
+	})
+}
+
+// XREAD
+func (m *Miniredis) cmdXread(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	var count int
+	var err error
+	streams := make([]string, 0)
+	ids := make([]string, 0)
+
+parsing:
+	for len(args) > 0 {
+		switch strings.ToUpper(args[0]) {
+		case "COUNT":
+			if len(args) < 2 {
+				err = errors.New(errWrongNumber(cmd))
+				break parsing
+			}
+
+			count, err = strconv.Atoi(args[1])
+			if err != nil {
+				break parsing
+			}
+
+			args = args[2:]
+		case "BLOCK":
+			if len(args) < 2 {
+				err = errors.New(errWrongNumber(cmd))
+				break parsing
+			}
+			args = args[2:]
+		case "STREAMS":
+			args = args[1:]
+
+			if len(args)%2 != 0 {
+				err = errors.New(msgXreadUnbalanced)
+				break parsing
+			}
+
+			streams, ids = args[0:len(args)/2], args[len(args)/2:]
+			break parsing
+		default:
+			err = fmt.Errorf("ERR incorrect argument %s", args[0])
+			break parsing
+		}
+	}
+
+	if err != nil {
+		setDirty(c)
+		c.WriteError(err.Error())
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		res := make(map[string][]StreamEntry)
+
+		db := m.db(ctx.selectedDB)
+
+		for i := range streams {
+			stream := streams[i]
+			id := ids[i]
+
+			var entries = db.streamKeys[stream]
+			if entries == nil {
+				setDirty(c)
+				c.WriteError(msgInvalidStreamID)
+				return
+			}
+			entryCount := count
+			if entryCount == 0 {
+				entryCount = len(entries)
+			}
+
+			if len(entries) == 0 {
+				continue
+			}
+
+			returnedEntries := make([]StreamEntry, 0, entryCount)
+
+			for _, entry := range entries {
+				if len(returnedEntries) == entryCount {
+					break
+				}
+
+				// Continue if entry ID <= start
+				if streamCmp(entry.ID, id) <= 0 {
+					continue
+				}
+				returnedEntries = append(returnedEntries, entry)
+			}
+
+			res[stream] = returnedEntries
+		}
+
+		// Real Redis returns Nil
+		if len(res) == 0 {
+			c.WriteNull()
+			return
+		}
+
+		c.WriteLen(len(res))
+
+		for _, stream := range streams {
+			entries, ok := res[stream]
+			if !ok {
+				continue
+			}
+
+			c.WriteLen(2)
+			c.WriteBulk(stream)
+
+			c.WriteLen(len(entries))
+
+			for _, entry := range entries {
+				c.WriteLen(2)
+				c.WriteBulk(entry.ID)
+				c.WriteLen(len(entry.Values))
+
+				for _, v := range entry.Values {
+					c.WriteBulk(v)
+				}
+			}
+		}
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_string.go b/vendor/github.com/alicebob/miniredis/v2/cmd_string.go
new file mode 100644
index 0000000000000..1d548e23eae7b
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_string.go
@@ -0,0 +1,1155 @@
+// Commands from https://redis.io/commands#string
+
+package miniredis
+
+import (
+	"math/big"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsString handles all string value operations.
+func commandsString(m *Miniredis) {
+	m.srv.Register("APPEND", m.cmdAppend)
+	m.srv.Register("BITCOUNT", m.cmdBitcount)
+	m.srv.Register("BITOP", m.cmdBitop)
+	m.srv.Register("BITPOS", m.cmdBitpos)
+	m.srv.Register("DECRBY", m.cmdDecrby)
+	m.srv.Register("DECR", m.cmdDecr)
+	m.srv.Register("GETBIT", m.cmdGetbit)
+	m.srv.Register("GET", m.cmdGet)
+	m.srv.Register("GETRANGE", m.cmdGetrange)
+	m.srv.Register("GETSET", m.cmdGetset)
+	m.srv.Register("INCRBYFLOAT", m.cmdIncrbyfloat)
+	m.srv.Register("INCRBY", m.cmdIncrby)
+	m.srv.Register("INCR", m.cmdIncr)
+	m.srv.Register("MGET", m.cmdMget)
+	m.srv.Register("MSET", m.cmdMset)
+	m.srv.Register("MSETNX", m.cmdMsetnx)
+	m.srv.Register("PSETEX", m.cmdPsetex)
+	m.srv.Register("SETBIT", m.cmdSetbit)
+	m.srv.Register("SETEX", m.cmdSetex)
+	m.srv.Register("SET", m.cmdSet)
+	m.srv.Register("SETNX", m.cmdSetnx)
+	m.srv.Register("SETRANGE", m.cmdSetrange)
+	m.srv.Register("STRLEN", m.cmdStrlen)
+}
+
+// SET
+func (m *Miniredis) cmdSet(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	var (
+		nx  = false // set iff not exists
+		xx  = false // set iff exists
+		keepttl = false // set keepttl
+		ttl time.Duration
+	)
+
+	key, value, args := args[0], args[1], args[2:]
+	for len(args) > 0 {
+		timeUnit := time.Second
+		switch strings.ToUpper(args[0]) {
+		case "NX":
+			nx = true
+			args = args[1:]
+			continue
+		case "XX":
+			xx = true
+			args = args[1:]
+			continue
+		case "KEEPTTL":
+			keepttl = true
+			args = args[1:]
+			continue
+		case "PX":
+			timeUnit = time.Millisecond
+			fallthrough
+		case "EX":
+			if len(args) < 2 {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			expire, err := strconv.Atoi(args[1])
+			if err != nil {
+				setDirty(c)
+				c.WriteError(msgInvalidInt)
+				return
+			}
+			ttl = time.Duration(expire) * timeUnit
+			if ttl <= 0 {
+				setDirty(c)
+				c.WriteError(msgInvalidSETime)
+				return
+			}
+
+			args = args[2:]
+			continue
+		default:
+			setDirty(c)
+			c.WriteError(msgSyntaxError)
+			return
+		}
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if nx {
+			if db.exists(key) {
+				c.WriteNull()
+				return
+			}
+		}
+		if xx {
+			if !db.exists(key) {
+				c.WriteNull()
+				return
+			}
+		}
+		if keepttl {
+			if val, ok := db.ttl[key]; ok {
+				ttl = val
+			}
+		}
+
+		db.del(key, true) // be sure to remove existing values of other type keys.
+		// a vanilla SET clears the expire
+		db.stringSet(key, value)
+		if ttl != 0 {
+			db.ttl[key] = ttl
+		}
+		c.WriteOK()
+	})
+}
+
+// SETEX
+func (m *Miniredis) cmdSetex(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	ttl, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	if ttl <= 0 {
+		setDirty(c)
+		c.WriteError(msgInvalidSETEXTime)
+		return
+	}
+	value := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		db.del(key, true) // Clear any existing keys.
+		db.stringSet(key, value)
+		db.ttl[key] = time.Duration(ttl) * time.Second
+		c.WriteOK()
+	})
+}
+
+// PSETEX
+func (m *Miniredis) cmdPsetex(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	ttl, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	if ttl <= 0 {
+		setDirty(c)
+		c.WriteError(msgInvalidPSETEXTime)
+		return
+	}
+	value := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		db.del(key, true) // Clear any existing keys.
+		db.stringSet(key, value)
+		db.ttl[key] = time.Duration(ttl) * time.Millisecond
+		c.WriteOK()
+	})
+}
+
+// SETNX
+func (m *Miniredis) cmdSetnx(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, value := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if _, ok := db.keys[key]; ok {
+			c.WriteInt(0)
+			return
+		}
+
+		db.stringSet(key, value)
+		c.WriteInt(1)
+	})
+}
+
+// MSET
+func (m *Miniredis) cmdMset(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if len(args)%2 != 0 {
+		setDirty(c)
+		// non-default error message
+		c.WriteError("ERR wrong number of arguments for MSET")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		for len(args) > 0 {
+			key, value := args[0], args[1]
+			args = args[2:]
+
+			db.del(key, true) // clear TTL
+			db.stringSet(key, value)
+		}
+		c.WriteOK()
+	})
+}
+
+// MSETNX
+func (m *Miniredis) cmdMsetnx(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	if len(args)%2 != 0 {
+		setDirty(c)
+		// non-default error message (yes, with 'MSET').
+		c.WriteError("ERR wrong number of arguments for MSET")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		keys := map[string]string{}
+		existing := false
+		for len(args) > 0 {
+			key := args[0]
+			value := args[1]
+			args = args[2:]
+			keys[key] = value
+			if _, ok := db.keys[key]; ok {
+				existing = true
+			}
+		}
+
+		res := 0
+		if !existing {
+			res = 1
+			for k, v := range keys {
+				// Nothing to delete. That's the whole point.
+				db.stringSet(k, v)
+			}
+		}
+		c.WriteInt(res)
+	})
+}
+
+// GET
+func (m *Miniredis) cmdGet(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteNull()
+			return
+		}
+		if db.t(key) != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteBulk(db.stringGet(key))
+	})
+}
+
+// GETSET
+func (m *Miniredis) cmdGetset(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, value := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		old, ok := db.stringKeys[key]
+		db.stringSet(key, value)
+		// a GETSET clears the ttl
+		delete(db.ttl, key)
+
+		if !ok {
+			c.WriteNull()
+			return
+		}
+		c.WriteBulk(old)
+	})
+}
+
+// MGET
+func (m *Miniredis) cmdMget(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		c.WriteLen(len(args))
+		for _, k := range args {
+			if t, ok := db.keys[k]; !ok || t != "string" {
+				c.WriteNull()
+				continue
+			}
+			v, ok := db.stringKeys[k]
+			if !ok {
+				// Should not happen, we just checked keys[]
+				c.WriteNull()
+				continue
+			}
+			c.WriteBulk(v)
+		}
+	})
+}
+
+// INCR
+func (m *Miniredis) cmdIncr(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		key := args[0]
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		v, err := db.stringIncr(key, +1)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		// Don't touch TTL
+		c.WriteInt(v)
+	})
+}
+
+// INCRBY
+func (m *Miniredis) cmdIncrby(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	delta, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v, err := db.stringIncr(key, delta)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		// Don't touch TTL
+		c.WriteInt(v)
+	})
+}
+
+// INCRBYFLOAT
+func (m *Miniredis) cmdIncrbyfloat(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	delta, _, err := big.ParseFloat(args[1], 10, 128, 0)
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidFloat)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v, err := db.stringIncrfloat(key, delta)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		// Don't touch TTL
+		c.WriteBulk(formatBig(v))
+	})
+}
+
+// DECR
+func (m *Miniredis) cmdDecr(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		key := args[0]
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		v, err := db.stringIncr(key, -1)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		// Don't touch TTL
+		c.WriteInt(v)
+	})
+}
+
+// DECRBY
+func (m *Miniredis) cmdDecrby(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	delta, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v, err := db.stringIncr(key, -delta)
+		if err != nil {
+			c.WriteError(err.Error())
+			return
+		}
+		// Don't touch TTL
+		c.WriteInt(v)
+	})
+}
+
+// STRLEN
+func (m *Miniredis) cmdStrlen(c *server.Peer, cmd string, args []string) {
+	if len(args) != 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		c.WriteInt(len(db.stringKeys[key]))
+	})
+}
+
+// APPEND
+func (m *Miniredis) cmdAppend(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key, value := args[0], args[1]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		newValue := db.stringKeys[key] + value
+		db.stringSet(key, newValue)
+
+		c.WriteInt(len(newValue))
+	})
+}
+
+// GETRANGE
+func (m *Miniredis) cmdGetrange(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	start, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	end, err := strconv.Atoi(args[2])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v := db.stringKeys[key]
+		c.WriteBulk(withRange(v, start, end))
+	})
+}
+
+// SETRANGE
+func (m *Miniredis) cmdSetrange(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	pos, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	if pos < 0 {
+		setDirty(c)
+		c.WriteError("ERR offset is out of range")
+		return
+	}
+	subst := args[2]
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		v := []byte(db.stringKeys[key])
+		if len(v) < pos+len(subst) {
+			newV := make([]byte, pos+len(subst))
+			copy(newV, v)
+			v = newV
+		}
+		copy(v[pos:pos+len(subst)], subst)
+		db.stringSet(key, string(v))
+		c.WriteInt(len(v))
+	})
+}
+
+// BITCOUNT
+func (m *Miniredis) cmdBitcount(c *server.Peer, cmd string, args []string) {
+	if len(args) < 1 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	var (
+		useRange   = false
+		start, end = 0, 0
+		key        = args[0]
+	)
+	args = args[1:]
+	if len(args) >= 2 {
+		useRange = true
+		var err error
+		start, err = strconv.Atoi(args[0])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+		end, err = strconv.Atoi(args[1])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+		args = args[2:]
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if !db.exists(key) {
+			c.WriteInt(0)
+			return
+		}
+		if db.t(key) != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+
+		// Real redis only checks after it knows the key is there and a string.
+		if len(args) != 0 {
+			c.WriteError(msgSyntaxError)
+			return
+		}
+
+		v := db.stringKeys[key]
+		if useRange {
+			v = withRange(v, start, end)
+		}
+
+		c.WriteInt(countBits([]byte(v)))
+	})
+}
+
+// BITOP
+func (m *Miniredis) cmdBitop(c *server.Peer, cmd string, args []string) {
+	if len(args) < 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	var (
+		op     = strings.ToUpper(args[0])
+		target = args[1]
+		input  = args[2:]
+	)
+
+	// 'op' is tested when the transaction is executed.
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		switch op {
+		case "AND", "OR", "XOR":
+			first := input[0]
+			if t, ok := db.keys[first]; ok && t != "string" {
+				c.WriteError(msgWrongType)
+				return
+			}
+			res := []byte(db.stringKeys[first])
+			for _, vk := range input[1:] {
+				if t, ok := db.keys[vk]; ok && t != "string" {
+					c.WriteError(msgWrongType)
+					return
+				}
+				v := db.stringKeys[vk]
+				cb := map[string]func(byte, byte) byte{
+					"AND": func(a, b byte) byte { return a & b },
+					"OR":  func(a, b byte) byte { return a | b },
+					"XOR": func(a, b byte) byte { return a ^ b },
+				}[op]
+				res = sliceBinOp(cb, res, []byte(v))
+			}
+			db.del(target, false) // Keep TTL
+			if len(res) == 0 {
+				db.del(target, true)
+			} else {
+				db.stringSet(target, string(res))
+			}
+			c.WriteInt(len(res))
+		case "NOT":
+			// NOT only takes a single argument.
+			if len(input) != 1 {
+				c.WriteError("ERR BITOP NOT must be called with a single source key.")
+				return
+			}
+			key := input[0]
+			if t, ok := db.keys[key]; ok && t != "string" {
+				c.WriteError(msgWrongType)
+				return
+			}
+			value := []byte(db.stringKeys[key])
+			for i := range value {
+				value[i] = ^value[i]
+			}
+			db.del(target, false) // Keep TTL
+			if len(value) == 0 {
+				db.del(target, true)
+			} else {
+				db.stringSet(target, string(value))
+			}
+			c.WriteInt(len(value))
+		default:
+			c.WriteError(msgSyntaxError)
+		}
+	})
+}
+
+// BITPOS
+func (m *Miniredis) cmdBitpos(c *server.Peer, cmd string, args []string) {
+	if len(args) < 2 || len(args) > 4 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	bit, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError(msgInvalidInt)
+		return
+	}
+	var start, end int
+	withEnd := false
+	if len(args) > 2 {
+		start, err = strconv.Atoi(args[2])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+	}
+	if len(args) > 3 {
+		end, err = strconv.Atoi(args[3])
+		if err != nil {
+			setDirty(c)
+			c.WriteError(msgInvalidInt)
+			return
+		}
+		withEnd = true
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		value := db.stringKeys[key]
+		if start != 0 {
+			if start > len(value) {
+				start = len(value)
+			}
+		}
+		if withEnd {
+			end++ // redis end semantics.
+			if end < 0 {
+				end = len(value) + end
+			}
+			if end > len(value) {
+				end = len(value)
+			}
+		} else {
+			end = len(value)
+		}
+		if start != 0 || withEnd {
+			if end < start {
+				value = ""
+			} else {
+				value = value[start:end]
+			}
+		}
+		pos := bitPos([]byte(value), bit == 1)
+		if pos >= 0 {
+			pos += start * 8
+		}
+		// Special case when looking for 0, but not when start and end are
+		// given.
+		if bit == 0 && pos == -1 && !withEnd {
+			pos = start*8 + len(value)*8
+		}
+		c.WriteInt(pos)
+	})
+}
+
+// GETBIT
+func (m *Miniredis) cmdGetbit(c *server.Peer, cmd string, args []string) {
+	if len(args) != 2 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	bit, err := strconv.Atoi(args[1])
+	if err != nil {
+		setDirty(c)
+		c.WriteError("ERR bit offset is not an integer or out of range")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		value := db.stringKeys[key]
+
+		ourByteNr := bit / 8
+		var ourByte byte
+		if ourByteNr > len(value)-1 {
+			ourByte = '\x00'
+		} else {
+			ourByte = value[ourByteNr]
+		}
+		res := 0
+		if toBits(ourByte)[bit%8] {
+			res = 1
+		}
+		c.WriteInt(res)
+	})
+}
+
+// SETBIT
+func (m *Miniredis) cmdSetbit(c *server.Peer, cmd string, args []string) {
+	if len(args) != 3 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	key := args[0]
+	bit, err := strconv.Atoi(args[1])
+	if err != nil || bit < 0 {
+		setDirty(c)
+		c.WriteError("ERR bit offset is not an integer or out of range")
+		return
+	}
+	newBit, err := strconv.Atoi(args[2])
+	if err != nil || (newBit != 0 && newBit != 1) {
+		setDirty(c)
+		c.WriteError("ERR bit is not an integer or out of range")
+		return
+	}
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		db := m.db(ctx.selectedDB)
+
+		if t, ok := db.keys[key]; ok && t != "string" {
+			c.WriteError(msgWrongType)
+			return
+		}
+		value := []byte(db.stringKeys[key])
+
+		ourByteNr := bit / 8
+		ourBitNr := bit % 8
+		if ourByteNr > len(value)-1 {
+			// Too short. Expand.
+			newValue := make([]byte, ourByteNr+1)
+			copy(newValue, value)
+			value = newValue
+		}
+		old := 0
+		if toBits(value[ourByteNr])[ourBitNr] {
+			old = 1
+		}
+		if newBit == 0 {
+			value[ourByteNr] &^= 1 << uint8(7-ourBitNr)
+		} else {
+			value[ourByteNr] |= 1 << uint8(7-ourBitNr)
+		}
+		db.stringSet(key, string(value))
+
+		c.WriteInt(old)
+	})
+}
+
+// Redis range. both start and end can be negative.
+func withRange(v string, start, end int) string {
+	s, e := redisRange(len(v), start, end, true /* string getrange symantics */)
+	return v[s:e]
+}
+
+func countBits(v []byte) int {
+	count := 0
+	for _, b := range []byte(v) {
+		for b > 0 {
+			count += int((b % uint8(2)))
+			b = b >> 1
+		}
+	}
+	return count
+}
+
+// sliceBinOp applies an operator to all slice elements, with Redis string
+// padding logic.
+func sliceBinOp(f func(a, b byte) byte, a, b []byte) []byte {
+	maxl := len(a)
+	if len(b) > maxl {
+		maxl = len(b)
+	}
+	lA := make([]byte, maxl)
+	copy(lA, a)
+	lB := make([]byte, maxl)
+	copy(lB, b)
+	res := make([]byte, maxl)
+	for i := range res {
+		res[i] = f(lA[i], lB[i])
+	}
+	return res
+}
+
+// Return the number of the first bit set/unset.
+func bitPos(s []byte, bit bool) int {
+	for i, b := range s {
+		for j, set := range toBits(b) {
+			if set == bit {
+				return i*8 + j
+			}
+		}
+	}
+	return -1
+}
+
+// toBits changes a byte in 8 bools.
+func toBits(s byte) [8]bool {
+	r := [8]bool{}
+	for i := range r {
+		if s&(uint8(1)<<uint8(7-i)) != 0 {
+			r[i] = true
+		}
+	}
+	return r
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/cmd_transactions.go b/vendor/github.com/alicebob/miniredis/v2/cmd_transactions.go
new file mode 100644
index 0000000000000..efccc9838d0bd
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/cmd_transactions.go
@@ -0,0 +1,179 @@
+// Commands from https://redis.io/commands#transactions
+
+package miniredis
+
+import (
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// commandsTransaction handles MULTI &c.
+func commandsTransaction(m *Miniredis) {
+	m.srv.Register("DISCARD", m.cmdDiscard)
+	m.srv.Register("EXEC", m.cmdExec)
+	m.srv.Register("MULTI", m.cmdMulti)
+	m.srv.Register("UNWATCH", m.cmdUnwatch)
+	m.srv.Register("WATCH", m.cmdWatch)
+}
+
+// MULTI
+func (m *Miniredis) cmdMulti(c *server.Peer, cmd string, args []string) {
+	if len(args) != 0 {
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	ctx := getCtx(c)
+	if ctx.nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+	if inTx(ctx) {
+		c.WriteError("ERR MULTI calls can not be nested")
+		return
+	}
+
+	startTx(ctx)
+
+	c.WriteOK()
+}
+
+// EXEC
+func (m *Miniredis) cmdExec(c *server.Peer, cmd string, args []string) {
+	if len(args) != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	ctx := getCtx(c)
+	if ctx.nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+	if !inTx(ctx) {
+		c.WriteError("ERR EXEC without MULTI")
+		return
+	}
+
+	if ctx.dirtyTransaction {
+		c.WriteError("EXECABORT Transaction discarded because of previous errors.")
+		// a failed EXEC finishes the tx
+		stopTx(ctx)
+		return
+	}
+
+	m.Lock()
+	defer m.Unlock()
+
+	// Check WATCHed keys.
+	for t, version := range ctx.watch {
+		if m.db(t.db).keyVersion[t.key] > version {
+			// Abort! Abort!
+			stopTx(ctx)
+			c.WriteNull()
+			return
+		}
+	}
+
+	c.WriteLen(len(ctx.transaction))
+	for _, cb := range ctx.transaction {
+		cb(c, ctx)
+	}
+	// wake up anyone who waits on anything.
+	m.signal.Broadcast()
+
+	stopTx(ctx)
+}
+
+// DISCARD
+func (m *Miniredis) cmdDiscard(c *server.Peer, cmd string, args []string) {
+	if len(args) != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	ctx := getCtx(c)
+	if !inTx(ctx) {
+		c.WriteError("ERR DISCARD without MULTI")
+		return
+	}
+
+	stopTx(ctx)
+	c.WriteOK()
+}
+
+// WATCH
+func (m *Miniredis) cmdWatch(c *server.Peer, cmd string, args []string) {
+	if len(args) == 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	ctx := getCtx(c)
+	if ctx.nested {
+		c.WriteError(msgNotFromScripts)
+		return
+	}
+	if inTx(ctx) {
+		c.WriteError("ERR WATCH in MULTI")
+		return
+	}
+
+	m.Lock()
+	defer m.Unlock()
+	db := m.db(ctx.selectedDB)
+
+	for _, key := range args {
+		watch(db, ctx, key)
+	}
+	c.WriteOK()
+}
+
+// UNWATCH
+func (m *Miniredis) cmdUnwatch(c *server.Peer, cmd string, args []string) {
+	if len(args) != 0 {
+		setDirty(c)
+		c.WriteError(errWrongNumber(cmd))
+		return
+	}
+	if !m.handleAuth(c) {
+		return
+	}
+	if m.checkPubsub(c, cmd) {
+		return
+	}
+
+	// Doesn't matter if UNWATCH is in a TX or not. Looks like a Redis bug to me.
+	unwatch(getCtx(c))
+
+	withTx(m, c, func(c *server.Peer, ctx *connCtx) {
+		// Do nothing if it's called in a transaction.
+		c.WriteOK()
+	})
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/db.go b/vendor/github.com/alicebob/miniredis/v2/db.go
new file mode 100644
index 0000000000000..459054b6b202c
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/db.go
@@ -0,0 +1,816 @@
+package miniredis
+
+import (
+	"errors"
+	"fmt"
+	"math/big"
+	"sort"
+	"strconv"
+	"time"
+)
+
+var (
+	errInvalidEntryID = errors.New("stream ID is invalid")
+)
+
+func (db *RedisDB) exists(k string) bool {
+	_, ok := db.keys[k]
+	return ok
+}
+
+// t gives the type of a key, or ""
+func (db *RedisDB) t(k string) string {
+	return db.keys[k]
+}
+
+// allKeys returns all keys. Sorted.
+func (db *RedisDB) allKeys() []string {
+	res := make([]string, 0, len(db.keys))
+	for k := range db.keys {
+		res = append(res, k)
+	}
+	sort.Strings(res) // To make things deterministic.
+	return res
+}
+
+// flush removes all keys and values.
+func (db *RedisDB) flush() {
+	db.keys = map[string]string{}
+	db.stringKeys = map[string]string{}
+	db.hashKeys = map[string]hashKey{}
+	db.listKeys = map[string]listKey{}
+	db.setKeys = map[string]setKey{}
+	db.sortedsetKeys = map[string]sortedSet{}
+	db.ttl = map[string]time.Duration{}
+}
+
+// move something to another db. Will return ok. Or not.
+func (db *RedisDB) move(key string, to *RedisDB) bool {
+	if _, ok := to.keys[key]; ok {
+		return false
+	}
+
+	t, ok := db.keys[key]
+	if !ok {
+		return false
+	}
+	to.keys[key] = db.keys[key]
+	switch t {
+	case "string":
+		to.stringKeys[key] = db.stringKeys[key]
+	case "hash":
+		to.hashKeys[key] = db.hashKeys[key]
+	case "list":
+		to.listKeys[key] = db.listKeys[key]
+	case "set":
+		to.setKeys[key] = db.setKeys[key]
+	case "zset":
+		to.sortedsetKeys[key] = db.sortedsetKeys[key]
+	case "stream":
+		to.streamKeys[key] = db.streamKeys[key]
+	default:
+		panic("unhandled key type")
+	}
+	to.keyVersion[key]++
+	if v, ok := db.ttl[key]; ok {
+		to.ttl[key] = v
+	}
+	db.del(key, true)
+	return true
+}
+
+func (db *RedisDB) rename(from, to string) {
+	db.del(to, true)
+	switch db.t(from) {
+	case "string":
+		db.stringKeys[to] = db.stringKeys[from]
+	case "hash":
+		db.hashKeys[to] = db.hashKeys[from]
+	case "list":
+		db.listKeys[to] = db.listKeys[from]
+	case "set":
+		db.setKeys[to] = db.setKeys[from]
+	case "zset":
+		db.sortedsetKeys[to] = db.sortedsetKeys[from]
+	case "stream":
+		db.streamKeys[to] = db.streamKeys[from]
+	default:
+		panic("missing case")
+	}
+	db.keys[to] = db.keys[from]
+	db.keyVersion[to]++
+	if v, ok := db.ttl[from]; ok {
+		db.ttl[to] = v
+	}
+
+	db.del(from, true)
+}
+
+func (db *RedisDB) del(k string, delTTL bool) {
+	if !db.exists(k) {
+		return
+	}
+	t := db.t(k)
+	delete(db.keys, k)
+	db.keyVersion[k]++
+	if delTTL {
+		delete(db.ttl, k)
+	}
+	switch t {
+	case "string":
+		delete(db.stringKeys, k)
+	case "hash":
+		delete(db.hashKeys, k)
+	case "list":
+		delete(db.listKeys, k)
+	case "set":
+		delete(db.setKeys, k)
+	case "zset":
+		delete(db.sortedsetKeys, k)
+	case "stream":
+		delete(db.streamKeys, k)
+	default:
+		panic("Unknown key type: " + t)
+	}
+}
+
+// stringGet returns the string key or "" on error/nonexists.
+func (db *RedisDB) stringGet(k string) string {
+	if t, ok := db.keys[k]; !ok || t != "string" {
+		return ""
+	}
+	return db.stringKeys[k]
+}
+
+// stringSet force set()s a key. Does not touch expire.
+func (db *RedisDB) stringSet(k, v string) {
+	db.del(k, false)
+	db.keys[k] = "string"
+	db.stringKeys[k] = v
+	db.keyVersion[k]++
+}
+
+// change int key value
+func (db *RedisDB) stringIncr(k string, delta int) (int, error) {
+	v := 0
+	if sv, ok := db.stringKeys[k]; ok {
+		var err error
+		v, err = strconv.Atoi(sv)
+		if err != nil {
+			return 0, ErrIntValueError
+		}
+	}
+	v += delta
+	db.stringSet(k, strconv.Itoa(v))
+	return v, nil
+}
+
+// change float key value
+func (db *RedisDB) stringIncrfloat(k string, delta *big.Float) (*big.Float, error) {
+	v := big.NewFloat(0.0)
+	v.SetPrec(128)
+	if sv, ok := db.stringKeys[k]; ok {
+		var err error
+		v, _, err = big.ParseFloat(sv, 10, 128, 0)
+		if err != nil {
+			return nil, ErrFloatValueError
+		}
+	}
+	v.Add(v, delta)
+	db.stringSet(k, formatBig(v))
+	return v, nil
+}
+
+// listLpush is 'left push', aka unshift. Returns the new length.
+func (db *RedisDB) listLpush(k, v string) int {
+	l, ok := db.listKeys[k]
+	if !ok {
+		db.keys[k] = "list"
+	}
+	l = append([]string{v}, l...)
+	db.listKeys[k] = l
+	db.keyVersion[k]++
+	return len(l)
+}
+
+// 'left pop', aka shift.
+func (db *RedisDB) listLpop(k string) string {
+	l := db.listKeys[k]
+	el := l[0]
+	l = l[1:]
+	if len(l) == 0 {
+		db.del(k, true)
+	} else {
+		db.listKeys[k] = l
+	}
+	db.keyVersion[k]++
+	return el
+}
+
+func (db *RedisDB) listPush(k string, v ...string) int {
+	l, ok := db.listKeys[k]
+	if !ok {
+		db.keys[k] = "list"
+	}
+	l = append(l, v...)
+	db.listKeys[k] = l
+	db.keyVersion[k]++
+	return len(l)
+}
+
+func (db *RedisDB) listPop(k string) string {
+	l := db.listKeys[k]
+	el := l[len(l)-1]
+	l = l[:len(l)-1]
+	if len(l) == 0 {
+		db.del(k, true)
+	} else {
+		db.listKeys[k] = l
+		db.keyVersion[k]++
+	}
+	return el
+}
+
+// setset replaces a whole set.
+func (db *RedisDB) setSet(k string, set setKey) {
+	db.keys[k] = "set"
+	db.setKeys[k] = set
+	db.keyVersion[k]++
+}
+
+// setadd adds members to a set. Returns nr of new keys.
+func (db *RedisDB) setAdd(k string, elems ...string) int {
+	s, ok := db.setKeys[k]
+	if !ok {
+		s = setKey{}
+		db.keys[k] = "set"
+	}
+	added := 0
+	for _, e := range elems {
+		if _, ok := s[e]; !ok {
+			added++
+		}
+		s[e] = struct{}{}
+	}
+	db.setKeys[k] = s
+	db.keyVersion[k]++
+	return added
+}
+
+// setrem removes members from a set. Returns nr of deleted keys.
+func (db *RedisDB) setRem(k string, fields ...string) int {
+	s, ok := db.setKeys[k]
+	if !ok {
+		return 0
+	}
+	removed := 0
+	for _, f := range fields {
+		if _, ok := s[f]; ok {
+			removed++
+			delete(s, f)
+		}
+	}
+	if len(s) == 0 {
+		db.del(k, true)
+	} else {
+		db.setKeys[k] = s
+	}
+	db.keyVersion[k]++
+	return removed
+}
+
+// All members of a set.
+func (db *RedisDB) setMembers(k string) []string {
+	set := db.setKeys[k]
+	members := make([]string, 0, len(set))
+	for k := range set {
+		members = append(members, k)
+	}
+	sort.Strings(members)
+	return members
+}
+
+// Is a SET value present?
+func (db *RedisDB) setIsMember(k, v string) bool {
+	set, ok := db.setKeys[k]
+	if !ok {
+		return false
+	}
+	_, ok = set[v]
+	return ok
+}
+
+// hashFields returns all (sorted) keys ('fields') for a hash key.
+func (db *RedisDB) hashFields(k string) []string {
+	v := db.hashKeys[k]
+	var r []string
+	for k := range v {
+		r = append(r, k)
+	}
+	sort.Strings(r)
+	return r
+}
+
+// hashValues returns all (sorted) values a hash key.
+func (db *RedisDB) hashValues(k string) []string {
+	h := db.hashKeys[k]
+	var r []string
+	for _, v := range h {
+		r = append(r, v)
+	}
+	sort.Strings(r)
+	return r
+}
+
+// hashGet a value
+func (db *RedisDB) hashGet(key, field string) string {
+	return db.hashKeys[key][field]
+}
+
+// hashSet returns the number of new keys
+func (db *RedisDB) hashSet(k string, fv ...string) int {
+	if t, ok := db.keys[k]; ok && t != "hash" {
+		db.del(k, true)
+	}
+	db.keys[k] = "hash"
+	if _, ok := db.hashKeys[k]; !ok {
+		db.hashKeys[k] = map[string]string{}
+	}
+	new := 0
+	for idx := 0; idx < len(fv)-1; idx = idx + 2 {
+		f, v := fv[idx], fv[idx+1]
+		_, ok := db.hashKeys[k][f]
+		db.hashKeys[k][f] = v
+		db.keyVersion[k]++
+		if !ok {
+			new++
+		}
+	}
+	return new
+}
+
+// hashIncr changes int key value
+func (db *RedisDB) hashIncr(key, field string, delta int) (int, error) {
+	v := 0
+	if h, ok := db.hashKeys[key]; ok {
+		if f, ok := h[field]; ok {
+			var err error
+			v, err = strconv.Atoi(f)
+			if err != nil {
+				return 0, ErrIntValueError
+			}
+		}
+	}
+	v += delta
+	db.hashSet(key, field, strconv.Itoa(v))
+	return v, nil
+}
+
+// hashIncrfloat changes float key value
+func (db *RedisDB) hashIncrfloat(key, field string, delta *big.Float) (*big.Float, error) {
+	v := big.NewFloat(0.0)
+	v.SetPrec(128)
+	if h, ok := db.hashKeys[key]; ok {
+		if f, ok := h[field]; ok {
+			var err error
+			v, _, err = big.ParseFloat(f, 10, 128, 0)
+			if err != nil {
+				return nil, ErrFloatValueError
+			}
+		}
+	}
+	v.Add(v, delta)
+	db.hashSet(key, field, formatBig(v))
+	return v, nil
+}
+
+// sortedSet set returns a sortedSet as map
+func (db *RedisDB) sortedSet(key string) map[string]float64 {
+	ss := db.sortedsetKeys[key]
+	return map[string]float64(ss)
+}
+
+// ssetSet sets a complete sorted set.
+func (db *RedisDB) ssetSet(key string, sset sortedSet) {
+	db.keys[key] = "zset"
+	db.keyVersion[key]++
+	db.sortedsetKeys[key] = sset
+}
+
+// ssetAdd adds member to a sorted set. Returns whether this was a new member.
+func (db *RedisDB) ssetAdd(key string, score float64, member string) bool {
+	ss, ok := db.sortedsetKeys[key]
+	if !ok {
+		ss = newSortedSet()
+		db.keys[key] = "zset"
+	}
+	_, ok = ss[member]
+	ss[member] = score
+	db.sortedsetKeys[key] = ss
+	db.keyVersion[key]++
+	return !ok
+}
+
+// All members from a sorted set, ordered by score.
+func (db *RedisDB) ssetMembers(key string) []string {
+	ss, ok := db.sortedsetKeys[key]
+	if !ok {
+		return nil
+	}
+	elems := ss.byScore(asc)
+	members := make([]string, 0, len(elems))
+	for _, e := range elems {
+		members = append(members, e.member)
+	}
+	return members
+}
+
+// All members+scores from a sorted set, ordered by score.
+func (db *RedisDB) ssetElements(key string) ssElems {
+	ss, ok := db.sortedsetKeys[key]
+	if !ok {
+		return nil
+	}
+	return ss.byScore(asc)
+}
+
+// ssetCard is the sorted set cardinality.
+func (db *RedisDB) ssetCard(key string) int {
+	ss := db.sortedsetKeys[key]
+	return ss.card()
+}
+
+// ssetRank is the sorted set rank.
+func (db *RedisDB) ssetRank(key, member string, d direction) (int, bool) {
+	ss := db.sortedsetKeys[key]
+	return ss.rankByScore(member, d)
+}
+
+// ssetScore is sorted set score.
+func (db *RedisDB) ssetScore(key, member string) float64 {
+	ss := db.sortedsetKeys[key]
+	return ss[member]
+}
+
+// ssetRem is sorted set key delete.
+func (db *RedisDB) ssetRem(key, member string) bool {
+	ss := db.sortedsetKeys[key]
+	_, ok := ss[member]
+	delete(ss, member)
+	if len(ss) == 0 {
+		// Delete key on removal of last member
+		db.del(key, true)
+	}
+	return ok
+}
+
+// ssetExists tells if a member exists in a sorted set.
+func (db *RedisDB) ssetExists(key, member string) bool {
+	ss := db.sortedsetKeys[key]
+	_, ok := ss[member]
+	return ok
+}
+
+// ssetIncrby changes float sorted set score.
+func (db *RedisDB) ssetIncrby(k, m string, delta float64) float64 {
+	ss, ok := db.sortedsetKeys[k]
+	if !ok {
+		ss = newSortedSet()
+		db.keys[k] = "zset"
+		db.sortedsetKeys[k] = ss
+	}
+
+	v, _ := ss.get(m)
+	v += delta
+	ss.set(v, m)
+	db.keyVersion[k]++
+	return v
+}
+
+// setDiff implements the logic behind SDIFF*
+func (db *RedisDB) setDiff(keys []string) (setKey, error) {
+	key := keys[0]
+	keys = keys[1:]
+	if db.exists(key) && db.t(key) != "set" {
+		return nil, ErrWrongType
+	}
+	s := setKey{}
+	for k := range db.setKeys[key] {
+		s[k] = struct{}{}
+	}
+	for _, sk := range keys {
+		if !db.exists(sk) {
+			continue
+		}
+		if db.t(sk) != "set" {
+			return nil, ErrWrongType
+		}
+		for e := range db.setKeys[sk] {
+			delete(s, e)
+		}
+	}
+	return s, nil
+}
+
+// setInter implements the logic behind SINTER*
+func (db *RedisDB) setInter(keys []string) (setKey, error) {
+	key := keys[0]
+	keys = keys[1:]
+	if !db.exists(key) {
+		return setKey{}, nil
+	}
+	if db.t(key) != "set" {
+		return nil, ErrWrongType
+	}
+	s := setKey{}
+	for k := range db.setKeys[key] {
+		s[k] = struct{}{}
+	}
+	for _, sk := range keys {
+		if !db.exists(sk) {
+			return setKey{}, nil
+		}
+		if db.t(sk) != "set" {
+			return nil, ErrWrongType
+		}
+		other := db.setKeys[sk]
+		for e := range s {
+			if _, ok := other[e]; ok {
+				continue
+			}
+			delete(s, e)
+		}
+	}
+	return s, nil
+}
+
+// setUnion implements the logic behind SUNION*
+func (db *RedisDB) setUnion(keys []string) (setKey, error) {
+	key := keys[0]
+	keys = keys[1:]
+	if db.exists(key) && db.t(key) != "set" {
+		return nil, ErrWrongType
+	}
+	s := setKey{}
+	for k := range db.setKeys[key] {
+		s[k] = struct{}{}
+	}
+	for _, sk := range keys {
+		if !db.exists(sk) {
+			continue
+		}
+		if db.t(sk) != "set" {
+			return nil, ErrWrongType
+		}
+		for e := range db.setKeys[sk] {
+			s[e] = struct{}{}
+		}
+	}
+	return s, nil
+}
+
+// stream set returns a stream as a slice. Lowest ID first.
+func (db *RedisDB) stream(key string) []StreamEntry {
+	return db.streamKeys[key]
+}
+
+func (db *RedisDB) streamCreate(key string) {
+	_, ok := db.streamKeys[key]
+	if !ok {
+		db.keys[key] = "stream"
+	}
+
+	db.streamKeys[key] = make(streamKey, 0)
+	db.keyVersion[key]++
+}
+
+// streamAdd adds an entry to a stream. Returns the new entry ID.
+// If id is empty or "*" the ID will be generated automatically.
+// `values` should have an even length.
+func (db *RedisDB) streamAdd(key, entryID string, values []string) (string, error) {
+	stream, ok := db.streamKeys[key]
+	if !ok {
+		db.keys[key] = "stream"
+	}
+
+	if entryID == "" || entryID == "*" {
+		entryID = stream.generateID(db.master.effectiveNow())
+	}
+	entryID, err := formatStreamID(entryID)
+	if err != nil {
+		return "", err
+	}
+	if entryID == "0-0" {
+		return "", errZeroStreamValue
+	}
+	if streamCmp(stream.lastID(), entryID) != -1 {
+		return "", errInvalidStreamValue
+	}
+	db.streamKeys[key] = append(stream, StreamEntry{
+		ID:     entryID,
+		Values: values,
+	})
+	db.keyVersion[key]++
+	return entryID, nil
+}
+
+func (db *RedisDB) streamMaxlen(key string, n int) {
+	stream, ok := db.streamKeys[key]
+	if !ok {
+		return
+	}
+	if len(stream) > n {
+		db.streamKeys[key] = stream[len(stream)-n:]
+	}
+}
+
+func (db *RedisDB) streamLen(key string) (int, error) {
+	stream, ok := db.streamKeys[key]
+	if !ok {
+		return 0, fmt.Errorf("stream %s not exists", key)
+	}
+	return len(stream), nil
+}
+
+func (db *RedisDB) streamGroupCreate(stream, group, id string) error {
+	streamData, ok := db.streamKeys[stream]
+	if !ok {
+		return fmt.Errorf("stream %s not exists", stream)
+	}
+
+	if _, ok := db.streamGroupKeys[stream]; !ok {
+		db.streamGroupKeys[stream] = streamGroupKey{}
+	}
+
+	if _, ok := db.streamGroupKeys[stream][group]; ok {
+		return errors.New("BUSYGROUP")
+	}
+
+	entry := streamGroupEntry{
+		pending: make([]pendingEntry, 0),
+	}
+
+	if id == "$" {
+		entry.lastID = streamData.lastID()
+	} else {
+		entry.lastID = id
+	}
+
+	db.streamGroupKeys[stream][group] = entry
+
+	return nil
+}
+
+func (db *RedisDB) streamRead(stream, group, consumer, id string, count int) ([]StreamEntry, error) {
+	streamData, ok := db.streamKeys[stream]
+	if !ok {
+		return nil, fmt.Errorf("stream %s not exists", stream)
+	}
+
+	if _, ok := db.streamGroupKeys[stream]; !ok {
+		// Error for group because this is key for group
+		return nil, fmt.Errorf("group %s not exists", group)
+	}
+
+	groupData, ok := db.streamGroupKeys[stream][group]
+	if !ok {
+		return nil, fmt.Errorf("group %s not exists", group)
+	}
+
+	res := make([]StreamEntry, 0)
+
+	if id == ">" {
+		next := sort.Search(len(streamData), func(i int) bool {
+			return streamCmp(groupData.lastID, streamData[i].ID) < 0
+		})
+
+		if len(streamData[next:]) == 0 {
+			return nil, nil
+		}
+
+		if count == 0 || count > len(streamData[next:]) {
+			count = len(streamData[next:])
+		}
+
+		res = append(res, streamData[next:count]...)
+
+		for _, en := range res {
+			pending := pendingEntry{
+				consumer: consumer,
+				ID:       en.ID,
+			}
+			groupData.pending = append(groupData.pending, pending)
+		}
+
+		groupData.lastID = res[len(res)-1].ID
+		db.streamGroupKeys[stream][group] = groupData
+	} else {
+		next := sort.Search(len(groupData.pending), func(i int) bool {
+			return streamCmp(id, groupData.pending[i].ID) < 0
+		})
+
+		if len(groupData.pending[next:]) == 0 {
+			return nil, nil
+		}
+
+		for _, e := range groupData.pending[next:] {
+			if e.consumer != consumer {
+				continue
+			}
+
+			pos := sort.Search(len(streamData), func(i int) bool {
+				return streamCmp(e.ID, streamData[i].ID) == 0
+			})
+
+			// Not found
+			if pos == len(streamData) {
+				continue
+			}
+
+			res = append(res, streamData[pos])
+
+			// Truncate to allow faster next search, because next element in pending
+			// is greater, then current, so on for stream
+			streamData = streamData[pos:]
+		}
+	}
+
+	return res, nil
+}
+
+func (db *RedisDB) streamDelete(stream string, ids []string) (int, error) {
+	streamData, ok := db.streamKeys[stream]
+	if !ok {
+		return 0, fmt.Errorf("stream %s not exists", stream)
+	}
+
+	count := 0
+
+	for _, id := range ids {
+		pos := sort.Search(len(streamData), func(i int) bool {
+			return streamCmp(id, streamData[i].ID) == 0
+		})
+
+		if pos == len(streamData) {
+			continue
+		}
+
+		streamData = append(streamData[:pos], streamData[pos+1:]...)
+		count++
+	}
+
+	if count > 0 {
+		db.streamKeys[stream] = streamData
+	}
+
+	return count, nil
+}
+
+func (db *RedisDB) streamAck(stream, group string, ids []string) (int, error) {
+	if _, ok := db.streamGroupKeys[stream]; !ok {
+		// Error for group because this is key for group
+		return 0, fmt.Errorf("group %s not exists", group)
+	}
+
+	groupData, ok := db.streamGroupKeys[stream][group]
+	if !ok {
+		return 0, fmt.Errorf("group %s not exists", group)
+	}
+
+	count := 0
+
+	for _, id := range ids {
+		pos := sort.Search(len(groupData.pending), func(i int) bool {
+			return streamCmp(id, groupData.pending[i].ID) == 0
+		})
+
+		if pos == len(groupData.pending) {
+			continue
+		}
+
+		groupData.pending = append(groupData.pending[:pos], groupData.pending[pos+1:]...)
+		count++
+	}
+
+	if count > 0 {
+		db.streamGroupKeys[stream][group] = groupData
+	}
+
+	return count, nil
+}
+
+// fastForward proceeds the current timestamp with duration, works as a time machine
+func (db *RedisDB) fastForward(duration time.Duration) {
+	for _, key := range db.allKeys() {
+		if value, ok := db.ttl[key]; ok {
+			db.ttl[key] = value - duration
+			db.checkTTL(key)
+		}
+	}
+}
+
+func (db *RedisDB) checkTTL(key string) {
+	if v, ok := db.ttl[key]; ok && v <= 0 {
+		db.del(key, true)
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/direct.go b/vendor/github.com/alicebob/miniredis/v2/direct.go
new file mode 100644
index 0000000000000..7bb80c0cb2ab5
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/direct.go
@@ -0,0 +1,745 @@
+package miniredis
+
+// Commands to modify and query our databases directly.
+
+import (
+	"errors"
+	"math/big"
+	"time"
+)
+
+var (
+	// ErrKeyNotFound is returned when a key doesn't exist.
+	ErrKeyNotFound = errors.New(msgKeyNotFound)
+
+	// ErrWrongType when a key is not the right type.
+	ErrWrongType = errors.New(msgWrongType)
+
+	// ErrIntValueError can returned by INCRBY
+	ErrIntValueError = errors.New(msgInvalidInt)
+
+	// ErrFloatValueError can returned by INCRBYFLOAT
+	ErrFloatValueError = errors.New(msgInvalidFloat)
+)
+
+// Select sets the DB id for all direct commands.
+func (m *Miniredis) Select(i int) {
+	m.Lock()
+	defer m.Unlock()
+	m.selectedDB = i
+}
+
+// Keys returns all keys from the selected database, sorted.
+func (m *Miniredis) Keys() []string {
+	return m.DB(m.selectedDB).Keys()
+}
+
+// Keys returns all keys, sorted.
+func (db *RedisDB) Keys() []string {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	return db.allKeys()
+}
+
+// FlushAll removes all keys from all databases.
+func (m *Miniredis) FlushAll() {
+	m.Lock()
+	defer m.Unlock()
+	defer m.signal.Broadcast()
+
+	m.flushAll()
+}
+
+func (m *Miniredis) flushAll() {
+	for _, db := range m.dbs {
+		db.flush()
+	}
+}
+
+// FlushDB removes all keys from the selected database.
+func (m *Miniredis) FlushDB() {
+	m.DB(m.selectedDB).FlushDB()
+}
+
+// FlushDB removes all keys.
+func (db *RedisDB) FlushDB() {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	db.flush()
+}
+
+// Get returns string keys added with SET.
+func (m *Miniredis) Get(k string) (string, error) {
+	return m.DB(m.selectedDB).Get(k)
+}
+
+// Get returns a string key.
+func (db *RedisDB) Get(k string) (string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return "", ErrKeyNotFound
+	}
+	if db.t(k) != "string" {
+		return "", ErrWrongType
+	}
+	return db.stringGet(k), nil
+}
+
+// Set sets a string key. Removes expire.
+func (m *Miniredis) Set(k, v string) error {
+	return m.DB(m.selectedDB).Set(k, v)
+}
+
+// Set sets a string key. Removes expire.
+// Unlike redis the key can't be an existing non-string key.
+func (db *RedisDB) Set(k, v string) error {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "string" {
+		return ErrWrongType
+	}
+	db.del(k, true) // Remove expire
+	db.stringSet(k, v)
+	return nil
+}
+
+// Incr changes a int string value by delta.
+func (m *Miniredis) Incr(k string, delta int) (int, error) {
+	return m.DB(m.selectedDB).Incr(k, delta)
+}
+
+// Incr changes a int string value by delta.
+func (db *RedisDB) Incr(k string, delta int) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "string" {
+		return 0, ErrWrongType
+	}
+
+	return db.stringIncr(k, delta)
+}
+
+// IncrByFloat increments the float value of a key by the given delta.
+// is an alias for Miniredis.Incrfloat
+func (m *Miniredis) IncrByFloat(k string, delta float64) (float64, error) {
+	return m.Incrfloat(k, delta)
+}
+
+// Incrfloat changes a float string value by delta.
+func (m *Miniredis) Incrfloat(k string, delta float64) (float64, error) {
+	return m.DB(m.selectedDB).Incrfloat(k, delta)
+}
+
+// Incrfloat changes a float string value by delta.
+func (db *RedisDB) Incrfloat(k string, delta float64) (float64, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "string" {
+		return 0, ErrWrongType
+	}
+
+	v, err := db.stringIncrfloat(k, big.NewFloat(delta))
+	if err != nil {
+		return 0, err
+	}
+	vf, _ := v.Float64()
+	return vf, nil
+}
+
+// List returns the list k, or an error if it's not there or something else.
+// This is the same as the Redis command `LRANGE 0 -1`, but you can do your own
+// range-ing.
+func (m *Miniredis) List(k string) ([]string, error) {
+	return m.DB(m.selectedDB).List(k)
+}
+
+// List returns the list k, or an error if it's not there or something else.
+// This is the same as the Redis command `LRANGE 0 -1`, but you can do your own
+// range-ing.
+func (db *RedisDB) List(k string) ([]string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(k) != "list" {
+		return nil, ErrWrongType
+	}
+	return db.listKeys[k], nil
+}
+
+// Lpush prepends one value to a list. Returns the new length.
+func (m *Miniredis) Lpush(k, v string) (int, error) {
+	return m.DB(m.selectedDB).Lpush(k, v)
+}
+
+// Lpush prepends one value to a list. Returns the new length.
+func (db *RedisDB) Lpush(k, v string) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "list" {
+		return 0, ErrWrongType
+	}
+	return db.listLpush(k, v), nil
+}
+
+// Lpop removes and returns the last element in a list.
+func (m *Miniredis) Lpop(k string) (string, error) {
+	return m.DB(m.selectedDB).Lpop(k)
+}
+
+// Lpop removes and returns the last element in a list.
+func (db *RedisDB) Lpop(k string) (string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if !db.exists(k) {
+		return "", ErrKeyNotFound
+	}
+	if db.t(k) != "list" {
+		return "", ErrWrongType
+	}
+	return db.listLpop(k), nil
+}
+
+// RPush appends one or multiple values to a list. Returns the new length.
+// An alias for Push
+func (m *Miniredis) RPush(k string, v ...string) (int, error) {
+	return m.Push(k, v...)
+}
+
+// Push add element at the end. Returns the new length.
+func (m *Miniredis) Push(k string, v ...string) (int, error) {
+	return m.DB(m.selectedDB).Push(k, v...)
+}
+
+// Push add element at the end. Is called RPUSH in redis. Returns the new length.
+func (db *RedisDB) Push(k string, v ...string) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "list" {
+		return 0, ErrWrongType
+	}
+	return db.listPush(k, v...), nil
+}
+
+// RPop is an alias for Pop
+func (m *Miniredis) RPop(k string) (string, error) {
+	return m.Pop(k)
+}
+
+// Pop removes and returns the last element. Is called RPOP in Redis.
+func (m *Miniredis) Pop(k string) (string, error) {
+	return m.DB(m.selectedDB).Pop(k)
+}
+
+// Pop removes and returns the last element. Is called RPOP in Redis.
+func (db *RedisDB) Pop(k string) (string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if !db.exists(k) {
+		return "", ErrKeyNotFound
+	}
+	if db.t(k) != "list" {
+		return "", ErrWrongType
+	}
+
+	return db.listPop(k), nil
+}
+
+// SAdd adds keys to a set. Returns the number of new keys.
+// Alias for SetAdd
+func (m *Miniredis) SAdd(k string, elems ...string) (int, error) {
+	return m.SetAdd(k, elems...)
+}
+
+// SetAdd adds keys to a set. Returns the number of new keys.
+func (m *Miniredis) SetAdd(k string, elems ...string) (int, error) {
+	return m.DB(m.selectedDB).SetAdd(k, elems...)
+}
+
+// SetAdd adds keys to a set. Returns the number of new keys.
+func (db *RedisDB) SetAdd(k string, elems ...string) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "set" {
+		return 0, ErrWrongType
+	}
+	return db.setAdd(k, elems...), nil
+}
+
+// SMembers returns all keys in a set, sorted.
+// Alias for Members.
+func (m *Miniredis) SMembers(k string) ([]string, error) {
+	return m.Members(k)
+}
+
+// Members returns all keys in a set, sorted.
+func (m *Miniredis) Members(k string) ([]string, error) {
+	return m.DB(m.selectedDB).Members(k)
+}
+
+// Members gives all set keys. Sorted.
+func (db *RedisDB) Members(k string) ([]string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(k) != "set" {
+		return nil, ErrWrongType
+	}
+	return db.setMembers(k), nil
+}
+
+// SIsMember tells if value is in the set.
+// Alias for IsMember
+func (m *Miniredis) SIsMember(k, v string) (bool, error) {
+	return m.IsMember(k, v)
+}
+
+// IsMember tells if value is in the set.
+func (m *Miniredis) IsMember(k, v string) (bool, error) {
+	return m.DB(m.selectedDB).IsMember(k, v)
+}
+
+// IsMember tells if value is in the set.
+func (db *RedisDB) IsMember(k, v string) (bool, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return false, ErrKeyNotFound
+	}
+	if db.t(k) != "set" {
+		return false, ErrWrongType
+	}
+	return db.setIsMember(k, v), nil
+}
+
+// HKeys returns all (sorted) keys ('fields') for a hash key.
+func (m *Miniredis) HKeys(k string) ([]string, error) {
+	return m.DB(m.selectedDB).HKeys(k)
+}
+
+// HKeys returns all (sorted) keys ('fields') for a hash key.
+func (db *RedisDB) HKeys(key string) ([]string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(key) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(key) != "hash" {
+		return nil, ErrWrongType
+	}
+	return db.hashFields(key), nil
+}
+
+// Del deletes a key and any expiration value. Returns whether there was a key.
+func (m *Miniredis) Del(k string) bool {
+	return m.DB(m.selectedDB).Del(k)
+}
+
+// Del deletes a key and any expiration value. Returns whether there was a key.
+func (db *RedisDB) Del(k string) bool {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if !db.exists(k) {
+		return false
+	}
+	db.del(k, true)
+	return true
+}
+
+// Unlink deletes a key and any expiration value. Returns where there was a key.
+// It's exactly the same as Del() and is not async. It is here for the consistency.
+func (m *Miniredis) Unlink(k string) bool {
+	return m.Del(k)
+}
+
+// Unlink deletes a key and any expiration value. Returns where there was a key.
+// It's exactly the same as Del() and is not async. It is here for the consistency.
+func (db *RedisDB) Unlink(k string) bool {
+	return db.Del(k)
+}
+
+// TTL is the left over time to live. As set via EXPIRE, PEXPIRE, EXPIREAT,
+// PEXPIREAT.
+// Note: this direct function returns 0 if there is no TTL set, unlike redis,
+// which returns -1.
+func (m *Miniredis) TTL(k string) time.Duration {
+	return m.DB(m.selectedDB).TTL(k)
+}
+
+// TTL is the left over time to live. As set via EXPIRE, PEXPIRE, EXPIREAT,
+// PEXPIREAT.
+// 0 if not set.
+func (db *RedisDB) TTL(k string) time.Duration {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	return db.ttl[k]
+}
+
+// SetTTL sets the TTL of a key.
+func (m *Miniredis) SetTTL(k string, ttl time.Duration) {
+	m.DB(m.selectedDB).SetTTL(k, ttl)
+}
+
+// SetTTL sets the time to live of a key.
+func (db *RedisDB) SetTTL(k string, ttl time.Duration) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	db.ttl[k] = ttl
+	db.keyVersion[k]++
+}
+
+// Type gives the type of a key, or ""
+func (m *Miniredis) Type(k string) string {
+	return m.DB(m.selectedDB).Type(k)
+}
+
+// Type gives the type of a key, or ""
+func (db *RedisDB) Type(k string) string {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	return db.t(k)
+}
+
+// Exists tells whether a key exists.
+func (m *Miniredis) Exists(k string) bool {
+	return m.DB(m.selectedDB).Exists(k)
+}
+
+// Exists tells whether a key exists.
+func (db *RedisDB) Exists(k string) bool {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	return db.exists(k)
+}
+
+// HGet returns hash keys added with HSET.
+// This will return an empty string if the key is not set. Redis would return
+// a nil.
+// Returns empty string when the key is of a different type.
+func (m *Miniredis) HGet(k, f string) string {
+	return m.DB(m.selectedDB).HGet(k, f)
+}
+
+// HGet returns hash keys added with HSET.
+// Returns empty string when the key is of a different type.
+func (db *RedisDB) HGet(k, f string) string {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	h, ok := db.hashKeys[k]
+	if !ok {
+		return ""
+	}
+	return h[f]
+}
+
+// HSet sets hash keys.
+// If there is another key by the same name it will be gone.
+func (m *Miniredis) HSet(k string, fv ...string) {
+	m.DB(m.selectedDB).HSet(k, fv...)
+}
+
+// HSet sets hash keys.
+// If there is another key by the same name it will be gone.
+func (db *RedisDB) HSet(k string, fv ...string) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	db.hashSet(k, fv...)
+}
+
+// HDel deletes a hash key.
+func (m *Miniredis) HDel(k, f string) {
+	m.DB(m.selectedDB).HDel(k, f)
+}
+
+// HDel deletes a hash key.
+func (db *RedisDB) HDel(k, f string) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	db.hdel(k, f)
+}
+
+func (db *RedisDB) hdel(k, f string) {
+	if _, ok := db.hashKeys[k]; !ok {
+		return
+	}
+	delete(db.hashKeys[k], f)
+	db.keyVersion[k]++
+}
+
+// HIncrBy increases the integer value of a hash field by delta (int).
+func (m *Miniredis) HIncrBy(k, f string, delta int) (int, error) {
+	return m.HIncr(k, f, delta)
+}
+
+// HIncr increases a key/field by delta (int).
+func (m *Miniredis) HIncr(k, f string, delta int) (int, error) {
+	return m.DB(m.selectedDB).HIncr(k, f, delta)
+}
+
+// HIncr increases a key/field by delta (int).
+func (db *RedisDB) HIncr(k, f string, delta int) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	return db.hashIncr(k, f, delta)
+}
+
+// HIncrByFloat increases a key/field by delta (float).
+func (m *Miniredis) HIncrByFloat(k, f string, delta float64) (float64, error) {
+	return m.HIncrfloat(k, f, delta)
+}
+
+// HIncrfloat increases a key/field by delta (float).
+func (m *Miniredis) HIncrfloat(k, f string, delta float64) (float64, error) {
+	return m.DB(m.selectedDB).HIncrfloat(k, f, delta)
+}
+
+// HIncrfloat increases a key/field by delta (float).
+func (db *RedisDB) HIncrfloat(k, f string, delta float64) (float64, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	v, err := db.hashIncrfloat(k, f, big.NewFloat(delta))
+	if err != nil {
+		return 0, err
+	}
+	vf, _ := v.Float64()
+	return vf, nil
+}
+
+// SRem removes fields from a set. Returns number of deleted fields.
+func (m *Miniredis) SRem(k string, fields ...string) (int, error) {
+	return m.DB(m.selectedDB).SRem(k, fields...)
+}
+
+// SRem removes fields from a set. Returns number of deleted fields.
+func (db *RedisDB) SRem(k string, fields ...string) (int, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if !db.exists(k) {
+		return 0, ErrKeyNotFound
+	}
+	if db.t(k) != "set" {
+		return 0, ErrWrongType
+	}
+	return db.setRem(k, fields...), nil
+}
+
+// ZAdd adds a score,member to a sorted set.
+func (m *Miniredis) ZAdd(k string, score float64, member string) (bool, error) {
+	return m.DB(m.selectedDB).ZAdd(k, score, member)
+}
+
+// ZAdd adds a score,member to a sorted set.
+func (db *RedisDB) ZAdd(k string, score float64, member string) (bool, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "zset" {
+		return false, ErrWrongType
+	}
+	return db.ssetAdd(k, score, member), nil
+}
+
+// ZMembers returns all members of a sorted set by score
+func (m *Miniredis) ZMembers(k string) ([]string, error) {
+	return m.DB(m.selectedDB).ZMembers(k)
+}
+
+// ZMembers returns all members of a sorted set by score
+func (db *RedisDB) ZMembers(k string) ([]string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(k) != "zset" {
+		return nil, ErrWrongType
+	}
+	return db.ssetMembers(k), nil
+}
+
+// SortedSet returns a raw string->float64 map.
+func (m *Miniredis) SortedSet(k string) (map[string]float64, error) {
+	return m.DB(m.selectedDB).SortedSet(k)
+}
+
+// SortedSet returns a raw string->float64 map.
+func (db *RedisDB) SortedSet(k string) (map[string]float64, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(k) != "zset" {
+		return nil, ErrWrongType
+	}
+	return db.sortedSet(k), nil
+}
+
+// ZRem deletes a member. Returns whether the was a key.
+func (m *Miniredis) ZRem(k, member string) (bool, error) {
+	return m.DB(m.selectedDB).ZRem(k, member)
+}
+
+// ZRem deletes a member. Returns whether the was a key.
+func (db *RedisDB) ZRem(k, member string) (bool, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if !db.exists(k) {
+		return false, ErrKeyNotFound
+	}
+	if db.t(k) != "zset" {
+		return false, ErrWrongType
+	}
+	return db.ssetRem(k, member), nil
+}
+
+// ZScore gives the score of a sorted set member.
+func (m *Miniredis) ZScore(k, member string) (float64, error) {
+	return m.DB(m.selectedDB).ZScore(k, member)
+}
+
+// ZScore gives the score of a sorted set member.
+func (db *RedisDB) ZScore(k, member string) (float64, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return 0, ErrKeyNotFound
+	}
+	if db.t(k) != "zset" {
+		return 0, ErrWrongType
+	}
+	return db.ssetScore(k, member), nil
+}
+
+// XAdd adds an entry to a stream. `id` can be left empty or be '*'.
+// If a value is given normal XADD rules apply. Values should be an even
+// length.
+func (m *Miniredis) XAdd(k string, id string, values []string) (string, error) {
+	return m.DB(m.selectedDB).XAdd(k, id, values)
+}
+
+// XAdd adds an entry to a stream. `id` can be left empty or be '*'.
+// If a value is given normal XADD rules apply. Values should be an even
+// length.
+func (db *RedisDB) XAdd(k string, id string, values []string) (string, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+	defer db.master.signal.Broadcast()
+
+	if db.exists(k) && db.t(k) != "stream" {
+		return "", ErrWrongType
+	}
+
+	return db.streamAdd(k, id, values)
+}
+
+// Stream returns a slice of stream entries. Oldest first.
+func (m *Miniredis) Stream(k string) ([]StreamEntry, error) {
+	return m.DB(m.selectedDB).Stream(k)
+}
+
+// Stream returns a slice of stream entries. Oldest first.
+func (db *RedisDB) Stream(k string) ([]StreamEntry, error) {
+	db.master.Lock()
+	defer db.master.Unlock()
+
+	if !db.exists(k) {
+		return nil, ErrKeyNotFound
+	}
+	if db.t(k) != "stream" {
+		return nil, ErrWrongType
+	}
+	return db.stream(k), nil
+}
+
+// Publish a message to subscribers. Returns the number of receivers.
+func (m *Miniredis) Publish(channel, message string) int {
+	m.Lock()
+	defer m.Unlock()
+
+	return m.publish(channel, message)
+}
+
+// PubSubChannels is "PUBSUB CHANNELS <pattern>". An empty pattern is fine
+// (meaning all channels).
+// Returned channels will be ordered alphabetically.
+func (m *Miniredis) PubSubChannels(pattern string) []string {
+	m.Lock()
+	defer m.Unlock()
+
+	return activeChannels(m.allSubscribers(), pattern)
+}
+
+// PubSubNumSub is "PUBSUB NUMSUB [channels]". It returns all channels with their
+// subscriber count.
+func (m *Miniredis) PubSubNumSub(channels ...string) map[string]int {
+	m.Lock()
+	defer m.Unlock()
+
+	subs := m.allSubscribers()
+	res := map[string]int{}
+	for _, channel := range channels {
+		res[channel] = countSubs(subs, channel)
+	}
+	return res
+}
+
+// PubSubNumPat is "PUBSUB NUMPAT"
+func (m *Miniredis) PubSubNumPat() int {
+	m.Lock()
+	defer m.Unlock()
+
+	return countPsubs(m.allSubscribers())
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/geo.go b/vendor/github.com/alicebob/miniredis/v2/geo.go
new file mode 100644
index 0000000000000..bc8e929270b71
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/geo.go
@@ -0,0 +1,48 @@
+package miniredis
+
+import (
+	"math"
+
+	"github.com/alicebob/miniredis/v2/geohash"
+)
+
+func toGeohash(long, lat float64) uint64 {
+	return geohash.EncodeIntWithPrecision(lat, long, 52)
+}
+
+func fromGeohash(score uint64) (float64, float64) {
+	lat, long := geohash.DecodeIntWithPrecision(score, 52)
+	return long, lat
+}
+
+// haversin(θ) function
+func hsin(theta float64) float64 {
+	return math.Pow(math.Sin(theta/2), 2)
+}
+
+// distance function returns the distance (in meters) between two points of
+//     a given longitude and latitude relatively accurately (using a spherical
+//     approximation of the Earth) through the Haversin Distance Formula for
+//     great arc distance on a sphere with accuracy for small distances
+//
+// point coordinates are supplied in degrees and converted into rad. in the func
+//
+// distance returned is meters
+// http://en.wikipedia.org/wiki/Haversine_formula
+// Source: https://gist.github.com/cdipaolo/d3f8db3848278b49db68
+func distance(lat1, lon1, lat2, lon2 float64) float64 {
+	// convert to radians
+	// must cast radius as float to multiply later
+	var la1, lo1, la2, lo2 float64
+	la1 = lat1 * math.Pi / 180
+	lo1 = lon1 * math.Pi / 180
+	la2 = lat2 * math.Pi / 180
+	lo2 = lon2 * math.Pi / 180
+
+	earth := 6372797.560856 // Earth radius in METERS, according to src/geohash_helper.c
+
+	// calculate
+	h := hsin(la2-la1) + math.Cos(la1)*math.Cos(la2)*hsin(lo2-lo1)
+
+	return 2 * earth * math.Asin(math.Sqrt(h))
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/geohash/LICENSE b/vendor/github.com/alicebob/miniredis/v2/geohash/LICENSE
new file mode 100644
index 0000000000000..c0190c9a61cc7
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/geohash/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Michael McLoughlin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/vendor/github.com/alicebob/miniredis/v2/geohash/README.md b/vendor/github.com/alicebob/miniredis/v2/geohash/README.md
new file mode 100644
index 0000000000000..c1a12d1443406
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/geohash/README.md
@@ -0,0 +1,2 @@
+This is a (selected) copy of github.com/mmcloughlin/geohash with the latitude
+range changed from 90 to ~85, to align with the algorithm use by Redis.
diff --git a/vendor/github.com/alicebob/miniredis/v2/geohash/base32.go b/vendor/github.com/alicebob/miniredis/v2/geohash/base32.go
new file mode 100644
index 0000000000000..916b272b904a2
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/geohash/base32.go
@@ -0,0 +1,44 @@
+package geohash
+
+// encoding encapsulates an encoding defined by a given base32 alphabet.
+type encoding struct {
+	encode string
+	decode [256]byte
+}
+
+// newEncoding constructs a new encoding defined by the given alphabet,
+// which must be a 32-byte string.
+func newEncoding(encoder string) *encoding {
+	e := new(encoding)
+	e.encode = encoder
+	for i := 0; i < len(e.decode); i++ {
+		e.decode[i] = 0xff
+	}
+	for i := 0; i < len(encoder); i++ {
+		e.decode[encoder[i]] = byte(i)
+	}
+	return e
+}
+
+// Decode string into bits of a 64-bit word. The string s may be at most 12
+// characters.
+func (e *encoding) Decode(s string) uint64 {
+	x := uint64(0)
+	for i := 0; i < len(s); i++ {
+		x = (x << 5) | uint64(e.decode[s[i]])
+	}
+	return x
+}
+
+// Encode bits of 64-bit word into a string.
+func (e *encoding) Encode(x uint64) string {
+	b := [12]byte{}
+	for i := 0; i < 12; i++ {
+		b[11-i] = e.encode[x&0x1f]
+		x >>= 5
+	}
+	return string(b[:])
+}
+
+// Base32Encoding with the Geohash alphabet.
+var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
diff --git a/vendor/github.com/alicebob/miniredis/v2/geohash/geohash.go b/vendor/github.com/alicebob/miniredis/v2/geohash/geohash.go
new file mode 100644
index 0000000000000..0e0ca2b28628c
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/geohash/geohash.go
@@ -0,0 +1,269 @@
+// Package geohash provides encoding and decoding of string and integer
+// geohashes.
+package geohash
+
+import (
+	"math"
+)
+
+const (
+	ENC_LAT  = 85.05112878
+	ENC_LONG = 180.0
+)
+
+// Direction represents directions in the latitute/longitude space.
+type Direction int
+
+// Cardinal and intercardinal directions
+const (
+	North Direction = iota
+	NorthEast
+	East
+	SouthEast
+	South
+	SouthWest
+	West
+	NorthWest
+)
+
+// Encode the point (lat, lng) as a string geohash with the standard 12
+// characters of precision.
+func Encode(lat, lng float64) string {
+	return EncodeWithPrecision(lat, lng, 12)
+}
+
+// EncodeWithPrecision encodes the point (lat, lng) as a string geohash with
+// the specified number of characters of precision (max 12).
+func EncodeWithPrecision(lat, lng float64, chars uint) string {
+	bits := 5 * chars
+	inthash := EncodeIntWithPrecision(lat, lng, bits)
+	enc := base32encoding.Encode(inthash)
+	return enc[12-chars:]
+}
+
+// encodeInt provides a Go implementation of integer geohash. This is the
+// default implementation of EncodeInt, but optimized versions are provided
+// for certain architectures.
+func EncodeInt(lat, lng float64) uint64 {
+	latInt := encodeRange(lat, ENC_LAT)
+	lngInt := encodeRange(lng, ENC_LONG)
+	return interleave(latInt, lngInt)
+}
+
+// EncodeIntWithPrecision encodes the point (lat, lng) to an integer with the
+// specified number of bits.
+func EncodeIntWithPrecision(lat, lng float64, bits uint) uint64 {
+	hash := EncodeInt(lat, lng)
+	return hash >> (64 - bits)
+}
+
+// Box represents a rectangle in latitude/longitude space.
+type Box struct {
+	MinLat float64
+	MaxLat float64
+	MinLng float64
+	MaxLng float64
+}
+
+// Center returns the center of the box.
+func (b Box) Center() (lat, lng float64) {
+	lat = (b.MinLat + b.MaxLat) / 2.0
+	lng = (b.MinLng + b.MaxLng) / 2.0
+	return
+}
+
+// Contains decides whether (lat, lng) is contained in the box. The
+// containment test is inclusive of the edges and corners.
+func (b Box) Contains(lat, lng float64) bool {
+	return (b.MinLat <= lat && lat <= b.MaxLat &&
+		b.MinLng <= lng && lng <= b.MaxLng)
+}
+
+// errorWithPrecision returns the error range in latitude and longitude for in
+// integer geohash with bits of precision.
+func errorWithPrecision(bits uint) (latErr, lngErr float64) {
+	b := int(bits)
+	latBits := b / 2
+	lngBits := b - latBits
+	latErr = math.Ldexp(180.0, -latBits)
+	lngErr = math.Ldexp(360.0, -lngBits)
+	return
+}
+
+// BoundingBox returns the region encoded by the given string geohash.
+func BoundingBox(hash string) Box {
+	bits := uint(5 * len(hash))
+	inthash := base32encoding.Decode(hash)
+	return BoundingBoxIntWithPrecision(inthash, bits)
+}
+
+// BoundingBoxIntWithPrecision returns the region encoded by the integer
+// geohash with the specified precision.
+func BoundingBoxIntWithPrecision(hash uint64, bits uint) Box {
+	fullHash := hash << (64 - bits)
+	latInt, lngInt := deinterleave(fullHash)
+	lat := decodeRange(latInt, ENC_LAT)
+	lng := decodeRange(lngInt, ENC_LONG)
+	latErr, lngErr := errorWithPrecision(bits)
+	return Box{
+		MinLat: lat,
+		MaxLat: lat + latErr,
+		MinLng: lng,
+		MaxLng: lng + lngErr,
+	}
+}
+
+// BoundingBoxInt returns the region encoded by the given 64-bit integer
+// geohash.
+func BoundingBoxInt(hash uint64) Box {
+	return BoundingBoxIntWithPrecision(hash, 64)
+}
+
+// DecodeCenter decodes the string geohash to the central point of the bounding box.
+func DecodeCenter(hash string) (lat, lng float64) {
+	box := BoundingBox(hash)
+	return box.Center()
+}
+
+// DecodeIntWithPrecision decodes the provided integer geohash with bits of
+// precision to a (lat, lng) point.
+func DecodeIntWithPrecision(hash uint64, bits uint) (lat, lng float64) {
+	box := BoundingBoxIntWithPrecision(hash, bits)
+	return box.Center()
+}
+
+// DecodeInt decodes the provided 64-bit integer geohash to a (lat, lng) point.
+func DecodeInt(hash uint64) (lat, lng float64) {
+	return DecodeIntWithPrecision(hash, 64)
+}
+
+// Neighbors returns a slice of geohash strings that correspond to the provided
+// geohash's neighbors.
+func Neighbors(hash string) []string {
+	box := BoundingBox(hash)
+	lat, lng := box.Center()
+	latDelta := box.MaxLat - box.MinLat
+	lngDelta := box.MaxLng - box.MinLng
+	precision := uint(len(hash))
+	return []string{
+		// N
+		EncodeWithPrecision(lat+latDelta, lng, precision),
+		// NE,
+		EncodeWithPrecision(lat+latDelta, lng+lngDelta, precision),
+		// E,
+		EncodeWithPrecision(lat, lng+lngDelta, precision),
+		// SE,
+		EncodeWithPrecision(lat-latDelta, lng+lngDelta, precision),
+		// S,
+		EncodeWithPrecision(lat-latDelta, lng, precision),
+		// SW,
+		EncodeWithPrecision(lat-latDelta, lng-lngDelta, precision),
+		// W,
+		EncodeWithPrecision(lat, lng-lngDelta, precision),
+		// NW
+		EncodeWithPrecision(lat+latDelta, lng-lngDelta, precision),
+	}
+}
+
+// NeighborsInt returns a slice of uint64s that correspond to the provided hash's
+// neighbors at 64-bit precision.
+func NeighborsInt(hash uint64) []uint64 {
+	return NeighborsIntWithPrecision(hash, 64)
+}
+
+// NeighborsIntWithPrecision returns a slice of uint64s that correspond to the
+// provided hash's neighbors at the given precision.
+func NeighborsIntWithPrecision(hash uint64, bits uint) []uint64 {
+	box := BoundingBoxIntWithPrecision(hash, bits)
+	lat, lng := box.Center()
+	latDelta := box.MaxLat - box.MinLat
+	lngDelta := box.MaxLng - box.MinLng
+	return []uint64{
+		// N
+		EncodeIntWithPrecision(lat+latDelta, lng, bits),
+		// NE,
+		EncodeIntWithPrecision(lat+latDelta, lng+lngDelta, bits),
+		// E,
+		EncodeIntWithPrecision(lat, lng+lngDelta, bits),
+		// SE,
+		EncodeIntWithPrecision(lat-latDelta, lng+lngDelta, bits),
+		// S,
+		EncodeIntWithPrecision(lat-latDelta, lng, bits),
+		// SW,
+		EncodeIntWithPrecision(lat-latDelta, lng-lngDelta, bits),
+		// W,
+		EncodeIntWithPrecision(lat, lng-lngDelta, bits),
+		// NW
+		EncodeIntWithPrecision(lat+latDelta, lng-lngDelta, bits),
+	}
+}
+
+// Neighbor returns a geohash string that corresponds to the provided
+// geohash's neighbor in the provided direction
+func Neighbor(hash string, direction Direction) string {
+	return Neighbors(hash)[direction]
+}
+
+// NeighborInt returns a uint64 that corresponds to the provided hash's
+// neighbor in the provided direction at 64-bit precision.
+func NeighborInt(hash uint64, direction Direction) uint64 {
+	return NeighborsIntWithPrecision(hash, 64)[direction]
+}
+
+// NeighborIntWithPrecision returns a uint64s that corresponds to the
+// provided hash's neighbor in the provided direction at the given precision.
+func NeighborIntWithPrecision(hash uint64, bits uint, direction Direction) uint64 {
+	return NeighborsIntWithPrecision(hash, bits)[direction]
+}
+
+// precalculated for performance
+var exp232 = math.Exp2(32)
+
+// Encode the position of x within the range -r to +r as a 32-bit integer.
+func encodeRange(x, r float64) uint32 {
+	p := (x + r) / (2 * r)
+	return uint32(p * exp232)
+}
+
+// Decode the 32-bit range encoding X back to a value in the range -r to +r.
+func decodeRange(X uint32, r float64) float64 {
+	p := float64(X) / exp232
+	x := 2*r*p - r
+	return x
+}
+
+// Spread out the 32 bits of x into 64 bits, where the bits of x occupy even
+// bit positions.
+func spread(x uint32) uint64 {
+	X := uint64(x)
+	X = (X | (X << 16)) & 0x0000ffff0000ffff
+	X = (X | (X << 8)) & 0x00ff00ff00ff00ff
+	X = (X | (X << 4)) & 0x0f0f0f0f0f0f0f0f
+	X = (X | (X << 2)) & 0x3333333333333333
+	X = (X | (X << 1)) & 0x5555555555555555
+	return X
+}
+
+// Interleave the bits of x and y. In the result, x and y occupy even and odd
+// bitlevels, respectively.
+func interleave(x, y uint32) uint64 {
+	return spread(x) | (spread(y) << 1)
+}
+
+// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
+// ignored, and may take any value.
+func squash(X uint64) uint32 {
+	X &= 0x5555555555555555
+	X = (X | (X >> 1)) & 0x3333333333333333
+	X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
+	X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
+	X = (X | (X >> 8)) & 0x0000ffff0000ffff
+	X = (X | (X >> 16)) & 0x00000000ffffffff
+	return uint32(X)
+}
+
+// Deinterleave the bits of X into 32-bit words containing the even and odd
+// bitlevels of X, respectively.
+func deinterleave(X uint64) (uint32, uint32) {
+	return squash(X), squash(X >> 1)
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/go.mod b/vendor/github.com/alicebob/miniredis/v2/go.mod
new file mode 100644
index 0000000000000..ff70f552fa08f
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/go.mod
@@ -0,0 +1,8 @@
+module github.com/alicebob/miniredis/v2
+
+require (
+	github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a
+	github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da
+)
+
+go 1.13
diff --git a/vendor/github.com/alicebob/miniredis/v2/go.sum b/vendor/github.com/alicebob/miniredis/v2/go.sum
new file mode 100644
index 0000000000000..378d5b0f5a268
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/go.sum
@@ -0,0 +1,10 @@
+github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a h1:HbKu58rmZpUGpz5+4FfNmIU+FmZg2P3Xaj2v2bfNWmk=
+github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a/go.mod h1:SGnFV6hVsYE877CKEZ6tDNTjaSXYUk6QqoIK6PrAtcc=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+github.com/yuin/gopher-lua v0.0.0-20191220021717-ab39c6098bdb h1:ZkM6LRnq40pR1Ox0hTHlnpkcOTuFIDQpZ1IN8rKKhX0=
+github.com/yuin/gopher-lua v0.0.0-20191220021717-ab39c6098bdb/go.mod h1:gqRgreBUhTSL0GeU64rtZ3Uq3wtjOa/TB2YfrtkCbVQ=
+github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da h1:NimzV1aGyq29m5ukMK0AMWEhFaL/lrEOaephfuoiARg=
+github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da/go.mod h1:E1AXubJBdNmFERAOucpDIxNzeGfLzg0mYh+UfMWdChA=
+golang.org/x/sys v0.0.0-20190204203706-41f3e6584952/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
diff --git a/vendor/github.com/alicebob/miniredis/v2/keys.go b/vendor/github.com/alicebob/miniredis/v2/keys.go
new file mode 100644
index 0000000000000..058e0a79a3844
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/keys.go
@@ -0,0 +1,83 @@
+package miniredis
+
+// Translate the 'KEYS' or 'PSUBSCRIBE' argument ('foo*', 'f??', &c.) into a regexp.
+
+import (
+	"bytes"
+	"regexp"
+)
+
+// patternRE compiles a glob to a regexp. Returns nil if the given
+// pattern will never match anything.
+// The general strategy is to sandwich all non-meta characters between \Q...\E.
+func patternRE(k string) *regexp.Regexp {
+	re := bytes.Buffer{}
+	re.WriteString(`(?s)^\Q`)
+	for i := 0; i < len(k); i++ {
+		p := k[i]
+		switch p {
+		case '*':
+			re.WriteString(`\E.*\Q`)
+		case '?':
+			re.WriteString(`\E.\Q`)
+		case '[':
+			charClass := bytes.Buffer{}
+			i++
+			for ; i < len(k); i++ {
+				if k[i] == ']' {
+					break
+				}
+				if k[i] == '\\' {
+					if i == len(k)-1 {
+						// Ends with a '\'. U-huh.
+						return nil
+					}
+					charClass.WriteByte(k[i])
+					i++
+					charClass.WriteByte(k[i])
+					continue
+				}
+				charClass.WriteByte(k[i])
+			}
+			if charClass.Len() == 0 {
+				// '[]' is valid in Redis, but matches nothing.
+				return nil
+			}
+			re.WriteString(`\E[`)
+			re.Write(charClass.Bytes())
+			re.WriteString(`]\Q`)
+
+		case '\\':
+			if i == len(k)-1 {
+				// Ends with a '\'. U-huh.
+				return nil
+			}
+			// Forget the \, keep the next char.
+			i++
+			re.WriteByte(k[i])
+			continue
+		default:
+			re.WriteByte(p)
+		}
+	}
+	re.WriteString(`\E$`)
+	return regexp.MustCompile(re.String())
+}
+
+// matchKeys filters only matching keys.
+// The returned boolean is whether the match pattern was valid
+func matchKeys(keys []string, match string) ([]string, bool) {
+	re := patternRE(match)
+	if re == nil {
+		// Special case: the given pattern won't match anything or is invalid.
+		return nil, false
+	}
+	var res []string
+	for _, k := range keys {
+		if !re.MatchString(k) {
+			continue
+		}
+		res = append(res, k)
+	}
+	return res, true
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/lua.go b/vendor/github.com/alicebob/miniredis/v2/lua.go
new file mode 100644
index 0000000000000..7a99252f10ac2
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/lua.go
@@ -0,0 +1,224 @@
+package miniredis
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"strings"
+
+	lua "github.com/yuin/gopher-lua"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+func mkLuaFuncs(srv *server.Server, c *server.Peer) map[string]lua.LGFunction {
+	mkCall := func(failFast bool) func(l *lua.LState) int {
+		// one server.Ctx for a single Lua run
+		pCtx := &connCtx{}
+		if getCtx(c).authenticated {
+			pCtx.authenticated = true
+		}
+		pCtx.nested = true
+		pCtx.selectedDB = getCtx(c).selectedDB
+
+		return func(l *lua.LState) int {
+			top := l.GetTop()
+			if top == 0 {
+				l.Error(lua.LString("Please specify at least one argument for redis.call()"), 1)
+				return 0
+			}
+			var args []string
+			for i := 1; i <= top; i++ {
+				switch a := l.Get(i).(type) {
+				case lua.LNumber:
+					args = append(args, a.String())
+				case lua.LString:
+					args = append(args, string(a))
+				default:
+					l.Error(lua.LString("Lua redis() command arguments must be strings or integers"), 1)
+					return 0
+				}
+			}
+			if len(args) == 0 {
+				l.Error(lua.LString(msgNotFromScripts), 1)
+				return 0
+			}
+
+			buf := &bytes.Buffer{}
+			wr := bufio.NewWriter(buf)
+			peer := server.NewPeer(wr)
+			peer.Ctx = pCtx
+			srv.Dispatch(peer, args)
+			wr.Flush()
+
+			res, err := server.ParseReply(bufio.NewReader(buf))
+			if err != nil {
+				if failFast {
+					// call() mode
+					if strings.Contains(err.Error(), "ERR unknown command") {
+						l.Error(lua.LString("Unknown Redis command called from Lua script"), 1)
+					} else {
+						l.Error(lua.LString(err.Error()), 1)
+					}
+					return 0
+				}
+				// pcall() mode
+				l.Push(lua.LNil)
+				return 1
+			}
+
+			if res == nil {
+				l.Push(lua.LFalse)
+			} else {
+				switch r := res.(type) {
+				case int64:
+					l.Push(lua.LNumber(r))
+				case int:
+					l.Push(lua.LNumber(r))
+				case []uint8:
+					l.Push(lua.LString(string(r)))
+				case []interface{}:
+					l.Push(redisToLua(l, r))
+				case string:
+					l.Push(lua.LString(r))
+				case error:
+					l.Error(lua.LString(r.Error()), 1)
+					return 0
+				default:
+					panic(fmt.Sprintf("type not handled (%T)", r))
+				}
+			}
+			return 1
+		}
+	}
+
+	return map[string]lua.LGFunction{
+		"call":  mkCall(true),
+		"pcall": mkCall(false),
+		"error_reply": func(l *lua.LState) int {
+			v := l.Get(1)
+			msg, ok := v.(lua.LString)
+			if !ok {
+				l.Error(lua.LString("wrong number or type of arguments"), 1)
+				return 0
+			}
+			res := &lua.LTable{}
+			res.RawSetString("err", lua.LString(msg))
+			l.Push(res)
+			return 1
+		},
+		"status_reply": func(l *lua.LState) int {
+			v := l.Get(1)
+			msg, ok := v.(lua.LString)
+			if !ok {
+				l.Error(lua.LString("wrong number or type of arguments"), 1)
+				return 0
+			}
+			res := &lua.LTable{}
+			res.RawSetString("ok", lua.LString(msg))
+			l.Push(res)
+			return 1
+		},
+		"sha1hex": func(l *lua.LState) int {
+			top := l.GetTop()
+			if top != 1 {
+				l.Error(lua.LString("wrong number of arguments"), 1)
+				return 0
+			}
+			msg := lua.LVAsString(l.Get(1))
+			l.Push(lua.LString(sha1Hex(msg)))
+			return 1
+		},
+		"replicate_commands": func(l *lua.LState) int {
+			// ignored
+			return 1
+		},
+	}
+}
+
+func luaToRedis(l *lua.LState, c *server.Peer, value lua.LValue) {
+	if value == nil {
+		c.WriteNull()
+		return
+	}
+
+	switch t := value.(type) {
+	case *lua.LNilType:
+		c.WriteNull()
+	case lua.LBool:
+		if lua.LVAsBool(value) {
+			c.WriteInt(1)
+		} else {
+			c.WriteNull()
+		}
+	case lua.LNumber:
+		c.WriteInt(int(lua.LVAsNumber(value)))
+	case lua.LString:
+		s := lua.LVAsString(value)
+		if s == "OK" {
+			c.WriteInline(s)
+		} else {
+			c.WriteBulk(s)
+		}
+	case *lua.LTable:
+		// special case for tables with an 'err' or 'ok' field
+		// note: according to the docs this only counts when 'err' or 'ok' is
+		// the only field.
+		if s := t.RawGetString("err"); s.Type() != lua.LTNil {
+			c.WriteError(s.String())
+			return
+		}
+		if s := t.RawGetString("ok"); s.Type() != lua.LTNil {
+			c.WriteInline(s.String())
+			return
+		}
+
+		result := []lua.LValue{}
+		for j := 1; true; j++ {
+			val := l.GetTable(value, lua.LNumber(j))
+			if val == nil {
+				result = append(result, val)
+				continue
+			}
+
+			if val.Type() == lua.LTNil {
+				break
+			}
+
+			result = append(result, val)
+		}
+
+		c.WriteLen(len(result))
+		for _, r := range result {
+			luaToRedis(l, c, r)
+		}
+	default:
+		panic("....")
+	}
+}
+
+func redisToLua(l *lua.LState, res []interface{}) *lua.LTable {
+	rettb := l.NewTable()
+	for _, e := range res {
+		var v lua.LValue
+		if e == nil {
+			v = lua.LFalse
+		} else {
+			switch et := e.(type) {
+			case int64:
+				v = lua.LNumber(et)
+			case []uint8:
+				v = lua.LString(string(et))
+			case []interface{}:
+				v = redisToLua(l, et)
+			case string:
+				v = lua.LString(et)
+			default:
+				// TODO: oops?
+				v = lua.LString(e.(string))
+			}
+		}
+		l.RawSet(rettb, lua.LNumber(rettb.Len()+1), v)
+	}
+	return rettb
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/miniredis.go b/vendor/github.com/alicebob/miniredis/v2/miniredis.go
new file mode 100644
index 0000000000000..5c69a9ae3f6d4
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/miniredis.go
@@ -0,0 +1,601 @@
+// Package miniredis is a pure Go Redis test server, for use in Go unittests.
+// There are no dependencies on system binaries, and every server you start
+// will be empty.
+//
+// import "github.com/alicebob/miniredis/v2"
+//
+// Start a server with `s, err := miniredis.Run()`.
+// Stop it with `defer s.Close()`.
+//
+// Point your Redis client to `s.Addr()` or `s.Host(), s.Port()`.
+//
+// Set keys directly via s.Set(...) and similar commands, or use a Redis client.
+//
+// For direct use you can select a Redis database with either `s.Select(12);
+// s.Get("foo")` or `s.DB(12).Get("foo")`.
+//
+package miniredis
+
+import (
+	"context"
+	"crypto/tls"
+	"fmt"
+	"math/rand"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+type hashKey map[string]string
+type listKey []string
+type setKey map[string]struct{}
+
+// RedisDB holds a single (numbered) Redis database.
+type RedisDB struct {
+	master          *Miniredis                // pointer to the lock in Miniredis
+	id              int                       // db id
+	keys            map[string]string         // Master map of keys with their type
+	stringKeys      map[string]string         // GET/SET &c. keys
+	hashKeys        map[string]hashKey        // MGET/MSET &c. keys
+	listKeys        map[string]listKey        // LPUSH &c. keys
+	setKeys         map[string]setKey         // SADD &c. keys
+	sortedsetKeys   map[string]sortedSet      // ZADD &c. keys
+	streamKeys      map[string]streamKey      // XADD &c. keys
+	streamGroupKeys map[string]streamGroupKey // XREADGROUP &c. keys
+	ttl             map[string]time.Duration  // effective TTL values
+	keyVersion      map[string]uint           // used to watch values
+}
+
+// Miniredis is a Redis server implementation.
+type Miniredis struct {
+	sync.Mutex
+	srv         *server.Server
+	port        int
+	passwords   map[string]string // username password
+	dbs         map[int]*RedisDB
+	selectedDB  int               // DB id used in the direct Get(), Set() &c.
+	scripts     map[string]string // sha1 -> lua src
+	signal      *sync.Cond
+	now         time.Time // time.Now() if not set.
+	subscribers map[*Subscriber]struct{}
+	rand        *rand.Rand
+	Ctx         context.Context
+	CtxCancel   context.CancelFunc
+}
+
+type txCmd func(*server.Peer, *connCtx)
+
+// database id + key combo
+type dbKey struct {
+	db  int
+	key string
+}
+
+// connCtx has all state for a single connection.
+type connCtx struct {
+	selectedDB       int            // selected DB
+	authenticated    bool           // auth enabled and a valid AUTH seen
+	transaction      []txCmd        // transaction callbacks. Or nil.
+	dirtyTransaction bool           // any error during QUEUEing
+	watch            map[dbKey]uint // WATCHed keys
+	subscriber       *Subscriber    // client is in PUBSUB mode if not nil
+	nested           bool           // this is called via Lua
+}
+
+// NewMiniRedis makes a new, non-started, Miniredis object.
+func NewMiniRedis() *Miniredis {
+	m := Miniredis{
+		dbs:         map[int]*RedisDB{},
+		scripts:     map[string]string{},
+		subscribers: map[*Subscriber]struct{}{},
+	}
+	m.Ctx, m.CtxCancel = context.WithCancel(context.Background())
+	m.signal = sync.NewCond(&m)
+	return &m
+}
+
+func newRedisDB(id int, m *Miniredis) RedisDB {
+	return RedisDB{
+		id:              id,
+		master:          m,
+		keys:            map[string]string{},
+		stringKeys:      map[string]string{},
+		hashKeys:        map[string]hashKey{},
+		listKeys:        map[string]listKey{},
+		setKeys:         map[string]setKey{},
+		sortedsetKeys:   map[string]sortedSet{},
+		streamKeys:      map[string]streamKey{},
+		streamGroupKeys: map[string]streamGroupKey{},
+		ttl:             map[string]time.Duration{},
+		keyVersion:      map[string]uint{},
+	}
+}
+
+// Run creates and Start()s a Miniredis.
+func Run() (*Miniredis, error) {
+	m := NewMiniRedis()
+	return m, m.Start()
+}
+
+// Run creates and Start()s a Miniredis, TLS version.
+func RunTLS(cfg *tls.Config) (*Miniredis, error) {
+	m := NewMiniRedis()
+	return m, m.StartTLS(cfg)
+}
+
+// Start starts a server. It listens on a random port on localhost. See also
+// Addr().
+func (m *Miniredis) Start() error {
+	s, err := server.NewServer(fmt.Sprintf("127.0.0.1:%d", m.port))
+	if err != nil {
+		return err
+	}
+	return m.start(s)
+}
+
+// Start starts a server, TLS version.
+func (m *Miniredis) StartTLS(cfg *tls.Config) error {
+	s, err := server.NewServerTLS(fmt.Sprintf("127.0.0.1:%d", m.port), cfg)
+	if err != nil {
+		return err
+	}
+	return m.start(s)
+}
+
+// StartAddr runs miniredis with a given addr. Examples: "127.0.0.1:6379",
+// ":6379", or "127.0.0.1:0"
+func (m *Miniredis) StartAddr(addr string) error {
+	s, err := server.NewServer(addr)
+	if err != nil {
+		return err
+	}
+	return m.start(s)
+}
+
+func (m *Miniredis) start(s *server.Server) error {
+	m.Lock()
+	defer m.Unlock()
+	m.srv = s
+	m.port = s.Addr().Port
+
+	commandsConnection(m)
+	commandsGeneric(m)
+	commandsServer(m)
+	commandsString(m)
+	commandsHash(m)
+	commandsList(m)
+	commandsPubsub(m)
+	commandsSet(m)
+	commandsSortedSet(m)
+	commandsStream(m)
+	commandsTransaction(m)
+	commandsScripting(m)
+	commandsGeo(m)
+	commandsCluster(m)
+	commandsCommand(m)
+
+	return nil
+}
+
+// Restart restarts a Close()d server on the same port. Values will be
+// preserved.
+func (m *Miniredis) Restart() error {
+	return m.Start()
+}
+
+// Close shuts down a Miniredis.
+func (m *Miniredis) Close() {
+	m.Lock()
+
+	if m.srv == nil {
+		m.Unlock()
+		return
+	}
+	srv := m.srv
+	m.srv = nil
+	m.CtxCancel()
+	m.Unlock()
+
+	// the OnDisconnect callbacks can lock m, so run Close() outside the lock.
+	srv.Close()
+
+}
+
+// RequireAuth makes every connection need to AUTH first. This is the old 'AUTH [password] command.
+// Remove it by setting an empty string.
+func (m *Miniredis) RequireAuth(pw string) {
+	m.RequireUserAuth("default", pw)
+}
+
+// Add a username/password, for use with 'AUTH [username] [password]'.
+// There are currently no access controls for commands implemented.
+// Disable access for the user with an empty password.
+func (m *Miniredis) RequireUserAuth(username, pw string) {
+	m.Lock()
+	defer m.Unlock()
+	if m.passwords == nil {
+		m.passwords = map[string]string{}
+	}
+	if pw == "" {
+		delete(m.passwords, username)
+		return
+	}
+	m.passwords[username] = pw
+}
+
+// DB returns a DB by ID.
+func (m *Miniredis) DB(i int) *RedisDB {
+	m.Lock()
+	defer m.Unlock()
+	return m.db(i)
+}
+
+// get DB. No locks!
+func (m *Miniredis) db(i int) *RedisDB {
+	if db, ok := m.dbs[i]; ok {
+		return db
+	}
+	db := newRedisDB(i, m) // main miniredis has our mutex.
+	m.dbs[i] = &db
+	return &db
+}
+
+// SwapDB swaps DBs by IDs.
+func (m *Miniredis) SwapDB(i, j int) bool {
+	m.Lock()
+	defer m.Unlock()
+	return m.swapDB(i, j)
+}
+
+// swap DB. No locks!
+func (m *Miniredis) swapDB(i, j int) bool {
+	db1 := m.db(i)
+	db2 := m.db(j)
+
+	db1.id = j
+	db2.id = i
+
+	m.dbs[i] = db2
+	m.dbs[j] = db1
+
+	return true
+}
+
+// Addr returns '127.0.0.1:12345'. Can be given to a Dial(). See also Host()
+// and Port(), which return the same things.
+func (m *Miniredis) Addr() string {
+	m.Lock()
+	defer m.Unlock()
+	return m.srv.Addr().String()
+}
+
+// Host returns the host part of Addr().
+func (m *Miniredis) Host() string {
+	m.Lock()
+	defer m.Unlock()
+	return m.srv.Addr().IP.String()
+}
+
+// Port returns the (random) port part of Addr().
+func (m *Miniredis) Port() string {
+	m.Lock()
+	defer m.Unlock()
+	return strconv.Itoa(m.srv.Addr().Port)
+}
+
+// CommandCount returns the number of processed commands.
+func (m *Miniredis) CommandCount() int {
+	m.Lock()
+	defer m.Unlock()
+	return int(m.srv.TotalCommands())
+}
+
+// CurrentConnectionCount returns the number of currently connected clients.
+func (m *Miniredis) CurrentConnectionCount() int {
+	m.Lock()
+	defer m.Unlock()
+	return m.srv.ClientsLen()
+}
+
+// TotalConnectionCount returns the number of client connections since server start.
+func (m *Miniredis) TotalConnectionCount() int {
+	m.Lock()
+	defer m.Unlock()
+	return int(m.srv.TotalConnections())
+}
+
+// FastForward decreases all TTLs by the given duration. All TTLs <= 0 will be
+// expired.
+func (m *Miniredis) FastForward(duration time.Duration) {
+	m.Lock()
+	defer m.Unlock()
+	for _, db := range m.dbs {
+		db.fastForward(duration)
+	}
+}
+
+// Server returns the underlying server to allow custom commands to be implemented
+func (m *Miniredis) Server() *server.Server {
+	return m.srv
+}
+
+// Dump returns a text version of the selected DB, usable for debugging.
+func (m *Miniredis) Dump() string {
+	m.Lock()
+	defer m.Unlock()
+
+	var (
+		maxLen = 60
+		indent = "   "
+		db     = m.db(m.selectedDB)
+		r      = ""
+		v      = func(s string) string {
+			suffix := ""
+			if len(s) > maxLen {
+				suffix = fmt.Sprintf("...(%d)", len(s))
+				s = s[:maxLen-len(suffix)]
+			}
+			return fmt.Sprintf("%q%s", s, suffix)
+		}
+	)
+	for _, k := range db.allKeys() {
+		r += fmt.Sprintf("- %s\n", k)
+		t := db.t(k)
+		switch t {
+		case "string":
+			r += fmt.Sprintf("%s%s\n", indent, v(db.stringKeys[k]))
+		case "hash":
+			for _, hk := range db.hashFields(k) {
+				r += fmt.Sprintf("%s%s: %s\n", indent, hk, v(db.hashGet(k, hk)))
+			}
+		case "list":
+			for _, lk := range db.listKeys[k] {
+				r += fmt.Sprintf("%s%s\n", indent, v(lk))
+			}
+		case "set":
+			for _, mk := range db.setMembers(k) {
+				r += fmt.Sprintf("%s%s\n", indent, v(mk))
+			}
+		case "zset":
+			for _, el := range db.ssetElements(k) {
+				r += fmt.Sprintf("%s%f: %s\n", indent, el.score, v(el.member))
+			}
+		case "stream":
+			for _, entry := range db.streamKeys[k] {
+				r += fmt.Sprintf("%s%s\n", indent, entry.ID)
+				ev := entry.Values
+				for i := 0; i < len(ev)/2; i++ {
+					r += fmt.Sprintf("%s%s%s: %s\n", indent, indent, v(ev[2*i]), v(ev[2*i+1]))
+				}
+			}
+		default:
+			r += fmt.Sprintf("%s(a %s, fixme!)\n", indent, t)
+		}
+	}
+	return r
+}
+
+// SetTime sets the time against which EXPIREAT values are compared, and the
+// time used in stream entry IDs.  Will use time.Now() if this is not set.
+func (m *Miniredis) SetTime(t time.Time) {
+	m.Lock()
+	defer m.Unlock()
+	m.now = t
+}
+
+// make every command return this message. For example:
+//   LOADING Redis is loading the dataset in memory
+//   MASTERDOWN Link with MASTER is down and replica-serve-stale-data is set to 'no'.
+// Clear it with an empty string. Don't add newlines.
+func (m *Miniredis) SetError(msg string) {
+	cb := server.Hook(nil)
+	if msg != "" {
+		cb = func(c *server.Peer, cmd string, args ...string) bool {
+			c.WriteError(msg)
+			return true
+		}
+	}
+	m.srv.SetPreHook(cb)
+}
+
+// handleAuth returns false if connection has no access. It sends the reply.
+func (m *Miniredis) handleAuth(c *server.Peer) bool {
+	if getCtx(c).nested {
+		return true
+	}
+
+	m.Lock()
+	defer m.Unlock()
+	if len(m.passwords) == 0 {
+		return true
+	}
+	if !getCtx(c).authenticated {
+		c.WriteError("NOAUTH Authentication required.")
+		return false
+	}
+	return true
+}
+
+// handlePubsub sends an error to the user if the connection is in PUBSUB mode.
+// It'll return true if it did.
+func (m *Miniredis) checkPubsub(c *server.Peer, cmd string) bool {
+	if getCtx(c).nested {
+		return false
+	}
+
+	m.Lock()
+	defer m.Unlock()
+
+	ctx := getCtx(c)
+	if ctx.subscriber == nil {
+		return false
+	}
+
+	prefix := "ERR "
+	if strings.ToLower(cmd) == "exec" {
+		prefix = "EXECABORT Transaction discarded because of: "
+	}
+	c.WriteError(fmt.Sprintf(
+		"%sCan't execute '%s': only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT are allowed in this context",
+		prefix,
+		strings.ToLower(cmd),
+	))
+	return true
+}
+
+func getCtx(c *server.Peer) *connCtx {
+	if c.Ctx == nil {
+		c.Ctx = &connCtx{}
+	}
+	return c.Ctx.(*connCtx)
+}
+
+func startTx(ctx *connCtx) {
+	ctx.transaction = []txCmd{}
+	ctx.dirtyTransaction = false
+}
+
+func stopTx(ctx *connCtx) {
+	ctx.transaction = nil
+	unwatch(ctx)
+}
+
+func inTx(ctx *connCtx) bool {
+	return ctx.transaction != nil
+}
+
+func addTxCmd(ctx *connCtx, cb txCmd) {
+	ctx.transaction = append(ctx.transaction, cb)
+}
+
+func watch(db *RedisDB, ctx *connCtx, key string) {
+	if ctx.watch == nil {
+		ctx.watch = map[dbKey]uint{}
+	}
+	ctx.watch[dbKey{db: db.id, key: key}] = db.keyVersion[key] // Can be 0.
+}
+
+func unwatch(ctx *connCtx) {
+	ctx.watch = nil
+}
+
+// setDirty can be called even when not in an tx. Is an no-op then.
+func setDirty(c *server.Peer) {
+	if c.Ctx == nil {
+		// No transaction. Not relevant.
+		return
+	}
+	getCtx(c).dirtyTransaction = true
+}
+
+func (m *Miniredis) addSubscriber(s *Subscriber) {
+	m.subscribers[s] = struct{}{}
+}
+
+// closes and remove the subscriber.
+func (m *Miniredis) removeSubscriber(s *Subscriber) {
+	_, ok := m.subscribers[s]
+	delete(m.subscribers, s)
+	if ok {
+		s.Close()
+	}
+}
+
+func (m *Miniredis) publish(c, msg string) int {
+	n := 0
+	for s := range m.subscribers {
+		n += s.Publish(c, msg)
+	}
+	return n
+}
+
+// enter 'subscribed state', or return the existing one.
+func (m *Miniredis) subscribedState(c *server.Peer) *Subscriber {
+	ctx := getCtx(c)
+	sub := ctx.subscriber
+	if sub != nil {
+		return sub
+	}
+
+	sub = newSubscriber()
+	m.addSubscriber(sub)
+
+	c.OnDisconnect(func() {
+		m.Lock()
+		m.removeSubscriber(sub)
+		m.Unlock()
+	})
+
+	ctx.subscriber = sub
+
+	go monitorPublish(c, sub.publish)
+	go monitorPpublish(c, sub.ppublish)
+
+	return sub
+}
+
+// whenever the p?sub count drops to 0 subscribed state should be stopped, and
+// all redis commands are allowed again.
+func endSubscriber(m *Miniredis, c *server.Peer) {
+	ctx := getCtx(c)
+	if sub := ctx.subscriber; sub != nil {
+		m.removeSubscriber(sub) // will Close() the sub
+	}
+	ctx.subscriber = nil
+}
+
+// Start a new pubsub subscriber. It can (un) subscribe to channels and
+// patterns, and has a channel to get published messages. Close it with
+// Close().
+// Does not close itself when there are no subscriptions left.
+func (m *Miniredis) NewSubscriber() *Subscriber {
+	sub := newSubscriber()
+
+	m.Lock()
+	m.addSubscriber(sub)
+	m.Unlock()
+
+	return sub
+}
+
+func (m *Miniredis) allSubscribers() []*Subscriber {
+	var subs []*Subscriber
+	for s := range m.subscribers {
+		subs = append(subs, s)
+	}
+	return subs
+}
+
+func (m *Miniredis) Seed(seed int) {
+	m.Lock()
+	defer m.Unlock()
+
+	// m.rand is not safe for concurrent use.
+	m.rand = rand.New(rand.NewSource(int64(seed)))
+}
+
+func (m *Miniredis) randIntn(n int) int {
+	if m.rand == nil {
+		return rand.Intn(n)
+	}
+	return m.rand.Intn(n)
+}
+
+// shuffle shuffles a string. Kinda.
+func (m *Miniredis) shuffle(l []string) {
+	for range l {
+		i := m.randIntn(len(l))
+		j := m.randIntn(len(l))
+		l[i], l[j] = l[j], l[i]
+	}
+}
+
+func (m *Miniredis) effectiveNow() time.Time {
+	if !m.now.IsZero() {
+		return m.now
+	}
+	return time.Now().UTC()
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/pubsub.go b/vendor/github.com/alicebob/miniredis/v2/pubsub.go
new file mode 100644
index 0000000000000..bb31f80a8803d
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/pubsub.go
@@ -0,0 +1,240 @@
+package miniredis
+
+import (
+	"regexp"
+	"sort"
+	"sync"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+// PubsubMessage is what gets broadcasted over pubsub channels.
+type PubsubMessage struct {
+	Channel string
+	Message string
+}
+
+type PubsubPmessage struct {
+	Pattern string
+	Channel string
+	Message string
+}
+
+// Subscriber has the (p)subscriptions.
+type Subscriber struct {
+	publish  chan PubsubMessage
+	ppublish chan PubsubPmessage
+	channels map[string]struct{}
+	patterns map[string]*regexp.Regexp
+	mu       sync.Mutex
+}
+
+// Make a new subscriber. The channel is not buffered, so you will need to keep
+// reading using Messages(). Use Close() when done, or unsubscribe.
+func newSubscriber() *Subscriber {
+	return &Subscriber{
+		publish:  make(chan PubsubMessage),
+		ppublish: make(chan PubsubPmessage),
+		channels: map[string]struct{}{},
+		patterns: map[string]*regexp.Regexp{},
+	}
+}
+
+// Close the listening channel
+func (s *Subscriber) Close() {
+	close(s.publish)
+	close(s.ppublish)
+}
+
+// Count the total number of channels and patterns
+func (s *Subscriber) Count() int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.count()
+}
+
+func (s *Subscriber) count() int {
+	return len(s.channels) + len(s.patterns)
+}
+
+// Subscribe to a channel. Returns the total number of (p)subscriptions after
+// subscribing.
+func (s *Subscriber) Subscribe(c string) int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.channels[c] = struct{}{}
+	return s.count()
+}
+
+// Unsubscribe a channel. Returns the total number of (p)subscriptions after
+// unsubscribing.
+func (s *Subscriber) Unsubscribe(c string) int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	delete(s.channels, c)
+	return s.count()
+}
+
+// Subscribe to a pattern. Returns the total number of (p)subscriptions after
+// subscribing.
+func (s *Subscriber) Psubscribe(pat string) int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.patterns[pat] = patternRE(pat)
+	return s.count()
+}
+
+// Unsubscribe a pattern. Returns the total number of (p)subscriptions after
+// unsubscribing.
+func (s *Subscriber) Punsubscribe(pat string) int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	delete(s.patterns, pat)
+	return s.count()
+}
+
+// List all subscribed channels, in alphabetical order
+func (s *Subscriber) Channels() []string {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var cs []string
+	for c := range s.channels {
+		cs = append(cs, c)
+	}
+	sort.Strings(cs)
+	return cs
+}
+
+// List all subscribed patterns, in alphabetical order
+func (s *Subscriber) Patterns() []string {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var ps []string
+	for p := range s.patterns {
+		ps = append(ps, p)
+	}
+	sort.Strings(ps)
+	return ps
+}
+
+// Publish a message. Will return return how often we sent the message (can be
+// a match for a subscription and for a psubscription.
+func (s *Subscriber) Publish(c, msg string) int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	found := 0
+
+subs:
+	for sub := range s.channels {
+		if sub == c {
+			s.publish <- PubsubMessage{c, msg}
+			found++
+			break subs
+		}
+	}
+
+pats:
+	for orig, pat := range s.patterns {
+		if pat != nil && pat.MatchString(c) {
+			s.ppublish <- PubsubPmessage{orig, c, msg}
+			found++
+			break pats
+		}
+	}
+
+	return found
+}
+
+// The channel to read messages for this subscriber. Only for messages matching
+// a SUBSCRIBE.
+func (s *Subscriber) Messages() <-chan PubsubMessage {
+	return s.publish
+}
+
+// The channel to read messages for this subscriber. Only for messages matching
+// a PSUBSCRIBE.
+func (s *Subscriber) Pmessages() <-chan PubsubPmessage {
+	return s.ppublish
+}
+
+// List all pubsub channels. If `pat` isn't empty channels names must match the
+// pattern. Channels are returned alphabetically.
+func activeChannels(subs []*Subscriber, pat string) []string {
+	channels := map[string]struct{}{}
+	for _, s := range subs {
+		for c := range s.channels {
+			channels[c] = struct{}{}
+		}
+	}
+
+	var cpat *regexp.Regexp
+	if pat != "" {
+		cpat = patternRE(pat)
+	}
+
+	var cs []string
+	for k := range channels {
+		if cpat != nil && !cpat.MatchString(k) {
+			continue
+		}
+		cs = append(cs, k)
+	}
+	sort.Strings(cs)
+	return cs
+}
+
+// Count all subscribed (not psubscribed) clients for the given channel
+// pattern. Channels are returned alphabetically.
+func countSubs(subs []*Subscriber, channel string) int {
+	n := 0
+	for _, p := range subs {
+		for c := range p.channels {
+			if c == channel {
+				n++
+				break
+			}
+		}
+	}
+	return n
+}
+
+// Count the total of all client psubscriptions.
+func countPsubs(subs []*Subscriber) int {
+	n := 0
+	for _, p := range subs {
+		n += len(p.patterns)
+	}
+	return n
+}
+
+func monitorPublish(conn *server.Peer, msgs <-chan PubsubMessage) {
+	for msg := range msgs {
+		conn.Block(func(c *server.Writer) {
+			c.WritePushLen(3)
+			c.WriteBulk("message")
+			c.WriteBulk(msg.Channel)
+			c.WriteBulk(msg.Message)
+			c.Flush()
+		})
+	}
+}
+
+func monitorPpublish(conn *server.Peer, msgs <-chan PubsubPmessage) {
+	for msg := range msgs {
+		conn.Block(func(c *server.Writer) {
+			c.WritePushLen(4)
+			c.WriteBulk("pmessage")
+			c.WriteBulk(msg.Pattern)
+			c.WriteBulk(msg.Channel)
+			c.WriteBulk(msg.Message)
+			c.Flush()
+		})
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/redis.go b/vendor/github.com/alicebob/miniredis/v2/redis.go
new file mode 100644
index 0000000000000..c04f82b2f54dd
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/redis.go
@@ -0,0 +1,209 @@
+package miniredis
+
+import (
+	"fmt"
+	"math/big"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/alicebob/miniredis/v2/server"
+)
+
+const (
+	msgWrongType          = "WRONGTYPE Operation against a key holding the wrong kind of value"
+	msgInvalidInt         = "ERR value is not an integer or out of range"
+	msgInvalidFloat       = "ERR value is not a valid float"
+	msgInvalidMinMax      = "ERR min or max is not a float"
+	msgInvalidRangeItem   = "ERR min or max not valid string range item"
+	msgInvalidTimeout     = "ERR timeout is not a float or out of range"
+	msgSyntaxError        = "ERR syntax error"
+	msgKeyNotFound        = "ERR no such key"
+	msgOutOfRange         = "ERR index out of range"
+	msgInvalidCursor      = "ERR invalid cursor"
+	msgXXandNX            = "ERR XX and NX options at the same time are not compatible"
+	msgNegTimeout         = "ERR timeout is negative"
+	msgInvalidSETime      = "ERR invalid expire time in set"
+	msgInvalidSETEXTime   = "ERR invalid expire time in setex"
+	msgInvalidPSETEXTime  = "ERR invalid expire time in psetex"
+	msgInvalidKeysNumber  = "ERR Number of keys can't be greater than number of args"
+	msgNegativeKeysNumber = "ERR Number of keys can't be negative"
+	msgFScriptUsage       = "ERR Unknown subcommand or wrong number of arguments for '%s'. Try SCRIPT HELP."
+	msgFPubsubUsage       = "ERR Unknown subcommand or wrong number of arguments for '%s'. Try PUBSUB HELP."
+	msgSingleElementPair  = "ERR INCR option supports a single increment-element pair"
+	msgInvalidStreamID    = "ERR Invalid stream ID specified as stream command argument"
+	msgStreamIDTooSmall   = "ERR The ID specified in XADD is equal or smaller than the target stream top item"
+	msgStreamIDZero       = "ERR The ID specified in XADD must be greater than 0-0"
+	msgNoScriptFound      = "NOSCRIPT No matching script. Please use EVAL."
+	msgUnsupportedUnit    = "ERR unsupported unit provided. please use m, km, ft, mi"
+	msgNotFromScripts     = "This Redis command is not allowed from scripts"
+	msgXreadUnbalanced    = "ERR Unbalanced XREAD list of streams: for each stream key an ID or '$' must be specified."
+)
+
+func errWrongNumber(cmd string) string {
+	return fmt.Sprintf("ERR wrong number of arguments for '%s' command", strings.ToLower(cmd))
+}
+
+func errLuaParseError(err error) string {
+	return fmt.Sprintf("ERR Error compiling script (new function): %s", err.Error())
+}
+
+// withTx wraps the non-argument-checking part of command handling code in
+// transaction logic.
+func withTx(
+	m *Miniredis,
+	c *server.Peer,
+	cb txCmd,
+) {
+	ctx := getCtx(c)
+
+	if ctx.nested {
+		// this is a call via Lua's .call(). It's already locked.
+		cb(c, ctx)
+		m.signal.Broadcast()
+		return
+	}
+
+	if inTx(ctx) {
+		addTxCmd(ctx, cb)
+		c.WriteInline("QUEUED")
+		return
+	}
+	m.Lock()
+	cb(c, ctx)
+	// done, wake up anyone who waits on anything.
+	m.signal.Broadcast()
+	m.Unlock()
+}
+
+// blockCmd is executed returns whether it is done
+type blockCmd func(*server.Peer, *connCtx) bool
+
+// blocking keeps trying a command until the callback returns true. Calls
+// onTimeout after the timeout (or when we call this in a transaction).
+func blocking(
+	m *Miniredis,
+	c *server.Peer,
+	timeout time.Duration,
+	cb blockCmd,
+	onTimeout func(*server.Peer),
+) {
+	var (
+		ctx = getCtx(c)
+		dl  *time.Timer
+		dlc <-chan time.Time
+	)
+	if inTx(ctx) {
+		addTxCmd(ctx, func(c *server.Peer, ctx *connCtx) {
+			if !cb(c, ctx) {
+				onTimeout(c)
+			}
+		})
+		c.WriteInline("QUEUED")
+		return
+	}
+	if timeout != 0 {
+		dl = time.NewTimer(timeout)
+		defer dl.Stop()
+		dlc = dl.C
+	}
+
+	m.Lock()
+	defer m.Unlock()
+	for {
+		done := cb(c, ctx)
+		if done {
+			return
+		}
+		// there is no cond.WaitTimeout(), so hence the the goroutine to wait
+		// for a timeout
+		var (
+			wg     sync.WaitGroup
+			wakeup = make(chan struct{}, 1)
+		)
+		wg.Add(1)
+		go func() {
+			m.signal.Wait()
+			wakeup <- struct{}{}
+			wg.Done()
+		}()
+		select {
+		case <-wakeup:
+		case <-dlc:
+			onTimeout(c)
+			m.signal.Broadcast() // to kill the wakeup go routine
+			wg.Wait()
+			return
+		case <-m.Ctx.Done():
+			m.signal.Broadcast() // to kill the wakeup go routine
+			wg.Wait()
+			return
+		}
+		wg.Wait()
+	}
+}
+
+// formatBig formats a float the way redis does
+func formatBig(v *big.Float) string {
+	// Format with %f and strip trailing 0s.
+	if v.IsInf() {
+		return "inf"
+	}
+	// if math.IsInf(v, -1) {
+	// return "-inf"
+	// }
+	return stripZeros(fmt.Sprintf("%.17f", v))
+}
+
+func stripZeros(sv string) string {
+	for strings.Contains(sv, ".") {
+		if sv[len(sv)-1] != '0' {
+			break
+		}
+		// Remove trailing 0s.
+		sv = sv[:len(sv)-1]
+		// Ends with a '.'.
+		if sv[len(sv)-1] == '.' {
+			sv = sv[:len(sv)-1]
+			break
+		}
+	}
+	return sv
+}
+
+// redisRange gives Go offsets for something l long with start/end in
+// Redis semantics. Both start and end can be negative.
+// Used for string range and list range things.
+// The results can be used as: v[start:end]
+// Note that GETRANGE (on a string key) never returns an empty string when end
+// is a large negative number.
+func redisRange(l, start, end int, stringSymantics bool) (int, int) {
+	if start < 0 {
+		start = l + start
+		if start < 0 {
+			start = 0
+		}
+	}
+	if start > l {
+		start = l
+	}
+
+	if end < 0 {
+		end = l + end
+		if end < 0 {
+			end = -1
+			if stringSymantics {
+				end = 0
+			}
+		}
+	}
+	end++ // end argument is inclusive in Redis.
+	if end > l {
+		end = l
+	}
+
+	if end < start {
+		return 0, 0
+	}
+	return start, end
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/server/Makefile b/vendor/github.com/alicebob/miniredis/v2/server/Makefile
new file mode 100644
index 0000000000000..c82e336f9d4ef
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/server/Makefile
@@ -0,0 +1,9 @@
+.PHONY: all build test
+
+all: build test
+
+build:
+	go build
+
+test:
+	go test
diff --git a/vendor/github.com/alicebob/miniredis/v2/server/proto.go b/vendor/github.com/alicebob/miniredis/v2/server/proto.go
new file mode 100644
index 0000000000000..d09d16a14332e
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/server/proto.go
@@ -0,0 +1,155 @@
+package server
+
+import (
+	"bufio"
+	"errors"
+	"strconv"
+)
+
+// ErrProtocol is the general error for unexpected input
+var ErrProtocol = errors.New("invalid request")
+
+// client always sends arrays with bulk strings
+func readArray(rd *bufio.Reader) ([]string, error) {
+	line, err := rd.ReadString('\n')
+	if err != nil {
+		return nil, err
+	}
+	if len(line) < 3 {
+		return nil, ErrProtocol
+	}
+
+	switch line[0] {
+	default:
+		return nil, ErrProtocol
+	case '*':
+		l, err := strconv.Atoi(line[1 : len(line)-2])
+		if err != nil {
+			return nil, err
+		}
+		// l can be -1
+		var fields []string
+		for ; l > 0; l-- {
+			s, err := readString(rd)
+			if err != nil {
+				return nil, err
+			}
+			fields = append(fields, s)
+		}
+		return fields, nil
+	}
+}
+
+func readString(rd *bufio.Reader) (string, error) {
+	line, err := rd.ReadString('\n')
+	if err != nil {
+		return "", err
+	}
+	if len(line) < 3 {
+		return "", ErrProtocol
+	}
+
+	switch line[0] {
+	default:
+		return "", ErrProtocol
+	case '+', '-', ':':
+		// +: simple string
+		// -: errors
+		// :: integer
+		// Simple line based replies.
+		return string(line[1 : len(line)-2]), nil
+	case '$':
+		// bulk strings are: `$5\r\nhello\r\n`
+		length, err := strconv.Atoi(line[1 : len(line)-2])
+		if err != nil {
+			return "", err
+		}
+		if length < 0 {
+			// -1 is a nil response
+			return "", nil
+		}
+		var (
+			buf = make([]byte, length+2)
+			pos = 0
+		)
+		for pos < length+2 {
+			n, err := rd.Read(buf[pos:])
+			if err != nil {
+				return "", err
+			}
+			pos += n
+		}
+		return string(buf[:length]), nil
+	}
+}
+
+// parse a reply
+func ParseReply(rd *bufio.Reader) (interface{}, error) {
+	line, err := rd.ReadString('\n')
+	if err != nil {
+		return nil, err
+	}
+	if len(line) < 3 {
+		return nil, ErrProtocol
+	}
+
+	switch line[0] {
+	default:
+		return nil, ErrProtocol
+	case '+':
+		// +: simple string
+		return string(line[1 : len(line)-2]), nil
+	case '-':
+		// -: errors
+		return nil, errors.New(string(line[1 : len(line)-2]))
+	case ':':
+		// :: integer
+		v := line[1 : len(line)-2]
+		if v == "" {
+			return 0, nil
+		}
+		n, err := strconv.Atoi(v)
+		if err != nil {
+			return nil, ErrProtocol
+		}
+		return n, nil
+	case '$':
+		// bulk strings are: `$5\r\nhello\r\n`
+		length, err := strconv.Atoi(line[1 : len(line)-2])
+		if err != nil {
+			return "", err
+		}
+		if length < 0 {
+			// -1 is a nil response
+			return nil, nil
+		}
+		var (
+			buf = make([]byte, length+2)
+			pos = 0
+		)
+		for pos < length+2 {
+			n, err := rd.Read(buf[pos:])
+			if err != nil {
+				return "", err
+			}
+			pos += n
+		}
+		return string(buf[:length]), nil
+	case '*':
+		// array
+		l, err := strconv.Atoi(line[1 : len(line)-2])
+		if err != nil {
+			return nil, ErrProtocol
+		}
+		// l can be -1
+		var fields []interface{}
+		for ; l > 0; l-- {
+			s, err := ParseReply(rd)
+			if err != nil {
+				return nil, err
+			}
+			fields = append(fields, s)
+		}
+		return fields, nil
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/server/server.go b/vendor/github.com/alicebob/miniredis/v2/server/server.go
new file mode 100644
index 0000000000000..f74ae47ae2063
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/server/server.go
@@ -0,0 +1,472 @@
+package server
+
+import (
+	"bufio"
+	"crypto/tls"
+	"fmt"
+	"math"
+	"net"
+	"strings"
+	"sync"
+	"unicode"
+)
+
+func errUnknownCommand(cmd string, args []string) string {
+	s := fmt.Sprintf("ERR unknown command `%s`, with args beginning with: ", cmd)
+	if len(args) > 20 {
+		args = args[:20]
+	}
+	for _, a := range args {
+		s += fmt.Sprintf("`%s`, ", a)
+	}
+	return s
+}
+
+// Cmd is what Register expects
+type Cmd func(c *Peer, cmd string, args []string)
+
+type DisconnectHandler func(c *Peer)
+
+// Hook is can be added to run before every cmd. Return true if the command is done.
+type Hook func(*Peer, string, ...string) bool
+
+// Server is a simple redis server
+type Server struct {
+	l         net.Listener
+	cmds      map[string]Cmd
+	preHook   Hook
+	peers     map[net.Conn]struct{}
+	mu        sync.Mutex
+	wg        sync.WaitGroup
+	infoConns int
+	infoCmds  int
+}
+
+// NewServer makes a server listening on addr. Close with .Close().
+func NewServer(addr string) (*Server, error) {
+	l, err := net.Listen("tcp", addr)
+	if err != nil {
+		return nil, err
+	}
+	return newServer(l), nil
+}
+
+func NewServerTLS(addr string, cfg *tls.Config) (*Server, error) {
+	l, err := tls.Listen("tcp", addr, cfg)
+	if err != nil {
+		return nil, err
+	}
+	return newServer(l), nil
+}
+
+func newServer(l net.Listener) *Server {
+	s := Server{
+		cmds:  map[string]Cmd{},
+		peers: map[net.Conn]struct{}{},
+		l:     l,
+	}
+
+	s.wg.Add(1)
+	go func() {
+		defer s.wg.Done()
+		s.serve(l)
+
+		s.mu.Lock()
+		for c := range s.peers {
+			c.Close()
+		}
+		s.mu.Unlock()
+	}()
+	return &s
+}
+
+// (un)set a hook which is ran before every call. It returns true if the command is done.
+func (s *Server) SetPreHook(h Hook) {
+	s.mu.Lock()
+	s.preHook = h
+	s.mu.Unlock()
+}
+
+func (s *Server) serve(l net.Listener) {
+	for {
+		conn, err := l.Accept()
+		if err != nil {
+			return
+		}
+		s.ServeConn(conn)
+	}
+}
+
+// ServeConn handles a net.Conn. Nice with net.Pipe()
+func (s *Server) ServeConn(conn net.Conn) {
+	s.wg.Add(1)
+	s.mu.Lock()
+	s.peers[conn] = struct{}{}
+	s.infoConns++
+	s.mu.Unlock()
+
+	go func() {
+		defer s.wg.Done()
+		defer conn.Close()
+
+		s.servePeer(conn)
+
+		s.mu.Lock()
+		delete(s.peers, conn)
+		s.mu.Unlock()
+	}()
+}
+
+// Addr has the net.Addr struct
+func (s *Server) Addr() *net.TCPAddr {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if s.l == nil {
+		return nil
+	}
+	return s.l.Addr().(*net.TCPAddr)
+}
+
+// Close a server started with NewServer. It will wait until all clients are
+// closed.
+func (s *Server) Close() {
+	s.mu.Lock()
+	if s.l != nil {
+		s.l.Close()
+	}
+	s.l = nil
+	s.mu.Unlock()
+
+	s.wg.Wait()
+}
+
+// Register a command. It can't have been registered before. Safe to call on a
+// running server.
+func (s *Server) Register(cmd string, f Cmd) error {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	cmd = strings.ToUpper(cmd)
+	if _, ok := s.cmds[cmd]; ok {
+		return fmt.Errorf("command already registered: %s", cmd)
+	}
+	s.cmds[cmd] = f
+	return nil
+}
+
+func (s *Server) servePeer(c net.Conn) {
+	r := bufio.NewReader(c)
+	peer := &Peer{
+		w: bufio.NewWriter(c),
+	}
+	defer func() {
+		for _, f := range peer.onDisconnect {
+			f()
+		}
+	}()
+
+	for {
+		args, err := readArray(r)
+		if err != nil {
+			return
+		}
+		s.Dispatch(peer, args)
+		peer.Flush()
+
+		s.mu.Lock()
+		closed := peer.closed
+		s.mu.Unlock()
+		if closed {
+			c.Close()
+		}
+	}
+}
+
+func (s *Server) Dispatch(c *Peer, args []string) {
+	cmd, args := args[0], args[1:]
+	cmdUp := strings.ToUpper(cmd)
+	s.mu.Lock()
+	h := s.preHook
+	s.mu.Unlock()
+	if h != nil {
+		if h(c, cmdUp, args...) {
+			return
+		}
+	}
+
+	s.mu.Lock()
+	cb, ok := s.cmds[cmdUp]
+	s.mu.Unlock()
+	if !ok {
+		c.WriteError(errUnknownCommand(cmd, args))
+		return
+	}
+
+	s.mu.Lock()
+	s.infoCmds++
+	s.mu.Unlock()
+	cb(c, cmdUp, args)
+}
+
+// TotalCommands is total (known) commands since this the server started
+func (s *Server) TotalCommands() int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.infoCmds
+}
+
+// ClientsLen gives the number of connected clients right now
+func (s *Server) ClientsLen() int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return len(s.peers)
+}
+
+// TotalConnections give the number of clients connected since the server
+// started, including the currently connected ones
+func (s *Server) TotalConnections() int {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	return s.infoConns
+}
+
+// Peer is a client connected to the server
+type Peer struct {
+	w            *bufio.Writer
+	closed       bool
+	Resp3        bool
+	Ctx          interface{} // anything goes, server won't touch this
+	onDisconnect []func()    // list of callbacks
+	mu           sync.Mutex  // for Block()
+}
+
+func NewPeer(w *bufio.Writer) *Peer {
+	return &Peer{
+		w: w,
+	}
+}
+
+// Flush the write buffer. Called automatically after every redis command
+func (c *Peer) Flush() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.w.Flush()
+}
+
+// Close the client connection after the current command is done.
+func (c *Peer) Close() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.closed = true
+}
+
+// Register a function to execute on disconnect. There can be multiple
+// functions registered.
+func (c *Peer) OnDisconnect(f func()) {
+	c.onDisconnect = append(c.onDisconnect, f)
+}
+
+// issue multiple calls, guarded with a mutex
+func (c *Peer) Block(f func(*Writer)) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	f(&Writer{c.w, c.Resp3})
+}
+
+// WriteError writes a redis 'Error'
+func (c *Peer) WriteError(e string) {
+	c.Block(func(w *Writer) {
+		w.WriteError(e)
+	})
+}
+
+// WriteInline writes a redis inline string
+func (c *Peer) WriteInline(s string) {
+	c.Block(func(w *Writer) {
+		w.WriteInline(s)
+	})
+}
+
+// WriteOK write the inline string `OK`
+func (c *Peer) WriteOK() {
+	c.WriteInline("OK")
+}
+
+// WriteBulk writes a bulk string
+func (c *Peer) WriteBulk(s string) {
+	c.Block(func(w *Writer) {
+		w.WriteBulk(s)
+	})
+}
+
+// WriteNull writes a redis Null element
+func (c *Peer) WriteNull() {
+	c.Block(func(w *Writer) {
+		w.WriteNull()
+	})
+}
+
+// WriteLen starts an array with the given length
+func (c *Peer) WriteLen(n int) {
+	c.Block(func(w *Writer) {
+		w.WriteLen(n)
+	})
+}
+
+// WriteMapLen starts a map with the given length (number of keys)
+func (c *Peer) WriteMapLen(n int) {
+	c.Block(func(w *Writer) {
+		w.WriteMapLen(n)
+	})
+}
+
+// WriteSetLen starts a set with the given length (number of elements)
+func (c *Peer) WriteSetLen(n int) {
+	c.Block(func(w *Writer) {
+		w.WriteSetLen(n)
+	})
+}
+
+// WritePushLen starts a push-data array with the given length
+func (c *Peer) WritePushLen(n int) {
+	c.Block(func(w *Writer) {
+		w.WritePushLen(n)
+	})
+}
+
+// WriteInt writes an integer
+func (c *Peer) WriteInt(n int) {
+	c.Block(func(w *Writer) {
+		w.WriteInt(n)
+	})
+}
+
+// WriteFloat writes a float
+func (c *Peer) WriteFloat(n float64) {
+	c.Block(func(w *Writer) {
+		w.WriteFloat(n)
+	})
+}
+
+// WriteRaw writes a raw redis response
+func (c *Peer) WriteRaw(s string) {
+	c.Block(func(w *Writer) {
+		w.WriteRaw(s)
+	})
+}
+
+func toInline(s string) string {
+	return strings.Map(func(r rune) rune {
+		if unicode.IsSpace(r) {
+			return ' '
+		}
+		return r
+	}, s)
+}
+
+// A Writer is given to the callback in Block()
+type Writer struct {
+	w     *bufio.Writer
+	resp3 bool
+}
+
+// WriteError writes a redis 'Error'
+func (w *Writer) WriteError(e string) {
+	fmt.Fprintf(w.w, "-%s\r\n", toInline(e))
+}
+
+func (w *Writer) WriteLen(n int) {
+	fmt.Fprintf(w.w, "*%d\r\n", n)
+}
+
+func (w *Writer) WriteMapLen(n int) {
+	if w.resp3 {
+		fmt.Fprintf(w.w, "%%%d\r\n", n)
+		return
+	}
+	w.WriteLen(n * 2)
+}
+
+func (w *Writer) WriteSetLen(n int) {
+	if w.resp3 {
+		fmt.Fprintf(w.w, "~%d\r\n", n)
+		return
+	}
+	w.WriteLen(n)
+}
+
+func (w *Writer) WritePushLen(n int) {
+	if w.resp3 {
+		fmt.Fprintf(w.w, ">%d\r\n", n)
+		return
+	}
+	w.WriteLen(n)
+}
+
+// WriteBulk writes a bulk string
+func (w *Writer) WriteBulk(s string) {
+	fmt.Fprintf(w.w, "$%d\r\n%s\r\n", len(s), s)
+}
+
+// WriteInt writes an integer
+func (w *Writer) WriteInt(n int) {
+	fmt.Fprintf(w.w, ":%d\r\n", n)
+}
+
+// WriteFloat writes a float
+func (w *Writer) WriteFloat(n float64) {
+	if w.resp3 {
+		fmt.Fprintf(w.w, ",%s\r\n", formatFloat(n))
+		return
+	}
+	w.WriteBulk(formatFloat(n))
+}
+
+// WriteNull writes a redis Null element
+func (w *Writer) WriteNull() {
+	if w.resp3 {
+		fmt.Fprint(w.w, "_\r\n")
+		return
+	}
+	fmt.Fprintf(w.w, "$-1\r\n")
+}
+
+// WriteInline writes a redis inline string
+func (w *Writer) WriteInline(s string) {
+	fmt.Fprintf(w.w, "+%s\r\n", toInline(s))
+}
+
+// WriteRaw writes a raw redis response
+func (w *Writer) WriteRaw(s string) {
+	fmt.Fprint(w.w, s)
+}
+
+func (w *Writer) Flush() {
+	w.w.Flush()
+}
+
+// formatFloat formats a float the way redis does (sort-of)
+func formatFloat(v float64) string {
+	if math.IsInf(v, 1) {
+		return "inf"
+	}
+	if math.IsInf(v, -1) {
+		return "-inf"
+	}
+	return stripZeros(fmt.Sprintf("%.12f", v))
+}
+
+func stripZeros(sv string) string {
+	for strings.Contains(sv, ".") {
+		if sv[len(sv)-1] != '0' {
+			break
+		}
+		// Remove trailing 0s.
+		sv = sv[:len(sv)-1]
+		// Ends with a '.'.
+		if sv[len(sv)-1] == '.' {
+			sv = sv[:len(sv)-1]
+			break
+		}
+	}
+	return sv
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/sorted_set.go b/vendor/github.com/alicebob/miniredis/v2/sorted_set.go
new file mode 100644
index 0000000000000..96ebd5d71bba1
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/sorted_set.go
@@ -0,0 +1,98 @@
+package miniredis
+
+// The most KISS way to implement a sorted set. Luckily we don't care about
+// performance that much.
+
+import (
+	"sort"
+)
+
+type direction int
+
+const (
+	unsorted direction = iota
+	asc
+	desc
+)
+
+type sortedSet map[string]float64
+
+type ssElem struct {
+	score  float64
+	member string
+}
+type ssElems []ssElem
+
+type byScore ssElems
+
+func (sse byScore) Len() int      { return len(sse) }
+func (sse byScore) Swap(i, j int) { sse[i], sse[j] = sse[j], sse[i] }
+func (sse byScore) Less(i, j int) bool {
+	if sse[i].score != sse[j].score {
+		return sse[i].score < sse[j].score
+	}
+	return sse[i].member < sse[j].member
+}
+
+func newSortedSet() sortedSet {
+	return sortedSet{}
+}
+
+func (ss *sortedSet) card() int {
+	return len(*ss)
+}
+
+func (ss *sortedSet) set(score float64, member string) {
+	(*ss)[member] = score
+}
+
+func (ss *sortedSet) get(member string) (float64, bool) {
+	v, ok := (*ss)[member]
+	return v, ok
+}
+
+// elems gives the list of ssElem, ready to sort.
+func (ss *sortedSet) elems() ssElems {
+	elems := make(ssElems, 0, len(*ss))
+	for e, s := range *ss {
+		elems = append(elems, ssElem{s, e})
+	}
+	return elems
+}
+
+func (ss *sortedSet) byScore(d direction) ssElems {
+	elems := ss.elems()
+	sort.Sort(byScore(elems))
+	if d == desc {
+		reverseElems(elems)
+	}
+	return ssElems(elems)
+}
+
+// rankByScore gives the (0-based) index of member, or returns false.
+func (ss *sortedSet) rankByScore(member string, d direction) (int, bool) {
+	if _, ok := (*ss)[member]; !ok {
+		return 0, false
+	}
+	for i, e := range ss.byScore(d) {
+		if e.member == member {
+			return i, true
+		}
+	}
+	// Can't happen
+	return 0, false
+}
+
+func reverseSlice(o []string) {
+	for i := range make([]struct{}, len(o)/2) {
+		other := len(o) - 1 - i
+		o[i], o[other] = o[other], o[i]
+	}
+}
+
+func reverseElems(o ssElems) {
+	for i := range make([]struct{}, len(o)/2) {
+		other := len(o) - 1 - i
+		o[i], o[other] = o[other], o[i]
+	}
+}
diff --git a/vendor/github.com/alicebob/miniredis/v2/stream.go b/vendor/github.com/alicebob/miniredis/v2/stream.go
new file mode 100644
index 0000000000000..9e1c03256cafc
--- /dev/null
+++ b/vendor/github.com/alicebob/miniredis/v2/stream.go
@@ -0,0 +1,151 @@
+// Basic stream implementation.
+
+package miniredis
+
+import (
+	"errors"
+	"fmt"
+	"math"
+	"strconv"
+	"strings"
+	"time"
+)
+
+var (
+	errInvalidStreamValue = errors.New("stream id is not bigger than the top item")
+	errZeroStreamValue    = errors.New("stream id is 0-0")
+)
+
+type streamKey []StreamEntry
+
+// A StreamEntry is an entry in a stream. The ID is always of the form
+// "123-123". Values should have an even length of entries.
+type StreamEntry struct {
+	ID     string
+	Values []string
+}
+
+type streamGroupKey map[string]streamGroupEntry
+
+type streamGroupEntry struct {
+	lastID  string
+	pending []pendingEntry
+}
+
+type pendingEntry struct {
+	consumer string
+	ID       string
+}
+
+func (ss *streamKey) generateID(now time.Time) string {
+	ts := uint64(now.UnixNano()) / 1000000
+
+	lastID := ss.lastID()
+
+	next := fmt.Sprintf("%d-%d", ts, 0)
+	if streamCmp(lastID, next) == -1 {
+		return next
+	}
+	last := parseStreamID(lastID)
+	return fmt.Sprintf("%d-%d", last[0], last[1]+1)
+}
+
+func (ss *streamKey) lastID() string {
+	if len(*ss) == 0 {
+		return "0-0"
+	}
+
+	return (*ss)[len(*ss)-1].ID
+}
+
+func parseStreamID(id string) [2]uint64 {
+	var res [2]uint64
+	parts := strings.SplitN(id, "-", 2)
+	res[0], _ = strconv.ParseUint(parts[0], 10, 64)
+	if len(parts) == 2 {
+		res[1], _ = strconv.ParseUint(parts[1], 10, 64)
+	}
+	return res
+}
+
+// compares two stream IDs (of the full format: "123-123"). Returns: -1, 0, 1
+func streamCmp(a, b string) int {
+	ap := parseStreamID(a)
+	bp := parseStreamID(b)
+	if ap[0] < bp[0] {
+		return -1
+	}
+	if ap[0] > bp[0] {
+		return 1
+	}
+	if ap[1] < bp[1] {
+		return -1
+	}
+	if ap[1] > bp[1] {
+		return 1
+	}
+	return 0
+}
+
+// formatStreamID makes a full id ("42-42") out of a partial one ("42")
+func formatStreamID(id string) (string, error) {
+	var ts [2]uint64
+	parts := strings.SplitN(id, "-", 2)
+
+	if len(parts) > 0 {
+		p, err := strconv.ParseUint(parts[0], 10, 64)
+		if err != nil {
+			return "", errInvalidEntryID
+		}
+		ts[0] = p
+	}
+	if len(parts) > 1 {
+		p, err := strconv.ParseUint(parts[1], 10, 64)
+		if err != nil {
+			return "", errInvalidEntryID
+		}
+		ts[1] = p
+	}
+	return fmt.Sprintf("%d-%d", ts[0], ts[1]), nil
+}
+
+func formatStreamRangeBound(id string, start bool, reverse bool) (string, error) {
+	if id == "-" {
+		return "0-0", nil
+	}
+
+	if id == "+" {
+		return fmt.Sprintf("%d-%d", uint64(math.MaxUint64), uint64(math.MaxUint64)), nil
+	}
+
+	if id == "0" {
+		return "0-0", nil
+	}
+
+	parts := strings.Split(id, "-")
+	if len(parts) == 2 {
+		return formatStreamID(id)
+	}
+
+	// Incomplete IDs case
+	ts, err := strconv.ParseUint(parts[0], 10, 64)
+	if err != nil {
+		return "", errInvalidEntryID
+	}
+
+	if (!start && !reverse) || (start && reverse) {
+		return fmt.Sprintf("%d-%d", ts, uint64(math.MaxUint64)), nil
+	}
+
+	return fmt.Sprintf("%d-%d", ts, 0), nil
+}
+
+func reversedStreamEntries(o []StreamEntry) []StreamEntry {
+	newStream := make([]StreamEntry, len(o))
+
+	for i, e := range o {
+		newStream[len(o)-i-1] = e
+	}
+
+	return newStream
+}
diff --git a/vendor/github.com/weaveworks/common/test/diff.go b/vendor/github.com/weaveworks/common/test/diff.go
new file mode 100644
index 0000000000000..5678972b61a05
--- /dev/null
+++ b/vendor/github.com/weaveworks/common/test/diff.go
@@ -0,0 +1,24 @@
+package test
+
+import (
+	"github.com/davecgh/go-spew/spew"
+	"github.com/pmezard/go-difflib/difflib"
+)
+
+// Diff diffs two arbitrary data structures, giving human-readable output.
+func Diff(want, have interface{}) string {
+	config := spew.NewDefaultConfig()
+	// Set ContinueOnMethod to true if you cannot see a difference and
+	// want to look beyond the String() method
+	config.ContinueOnMethod = false
+	config.SortKeys = true
+	config.SpewKeys = true
+	text, _ := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
+		A:        difflib.SplitLines(config.Sdump(want)),
+		B:        difflib.SplitLines(config.Sdump(have)),
+		FromFile: "want",
+		ToFile:   "have",
+		Context:  3,
+	})
+	return "\n" + text
+}
diff --git a/vendor/github.com/yuin/gopher-lua/.travis.yml b/vendor/github.com/yuin/gopher-lua/.travis.yml
new file mode 100644
index 0000000000000..68df5e7b1a156
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/.travis.yml
@@ -0,0 +1,18 @@
+language: go
+
+go:
+  - "1.9.x"
+  - "1.10.x"
+  - "1.11.x"
+env:
+  global:
+    GO111MODULE=off
+
+before_install:
+  - go get github.com/axw/gocov/gocov
+  - go get github.com/mattn/goveralls
+  - if ! go get code.google.com/p/go.tools/cmd/cover; then go get golang.org/x/tools/cmd/cover; fi
+install:
+  - go get -u -v $(go list -f '{{join .Imports "\n"}}{{"\n"}}{{join .TestImports "\n"}}' ./... | sort | uniq | grep '\.' | grep -v gopher-lua)
+script:
+  - $HOME/gopath/bin/goveralls -service=travis-ci
diff --git a/vendor/github.com/yuin/gopher-lua/LICENSE b/vendor/github.com/yuin/gopher-lua/LICENSE
new file mode 100644
index 0000000000000..4daf480a2fd02
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Yusuke Inuzuka
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/yuin/gopher-lua/Makefile b/vendor/github.com/yuin/gopher-lua/Makefile
new file mode 100644
index 0000000000000..6d9e55c35f781
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/Makefile
@@ -0,0 +1,10 @@
+.PHONY: build test glua
+
+build:
+	./_tools/go-inline *.go && go fmt . &&  go build
+
+glua: *.go pm/*.go cmd/glua/glua.go
+	./_tools/go-inline *.go && go fmt . && go build cmd/glua/glua.go
+
+test:
+	./_tools/go-inline *.go && go fmt . &&  go test
diff --git a/vendor/github.com/yuin/gopher-lua/README.rst b/vendor/github.com/yuin/gopher-lua/README.rst
new file mode 100644
index 0000000000000..b479e46357ebf
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/README.rst
@@ -0,0 +1,887 @@
+
+===============================================================================
+GopherLua: VM and compiler for Lua in Go.
+===============================================================================
+
+.. image:: https://godoc.org/github.com/yuin/gopher-lua?status.svg
+    :target: http://godoc.org/github.com/yuin/gopher-lua
+
+.. image:: https://travis-ci.org/yuin/gopher-lua.svg
+    :target: https://travis-ci.org/yuin/gopher-lua
+
+.. image:: https://coveralls.io/repos/yuin/gopher-lua/badge.svg
+    :target: https://coveralls.io/r/yuin/gopher-lua
+
+.. image:: https://badges.gitter.im/Join%20Chat.svg
+    :alt: Join the chat at https://gitter.im/yuin/gopher-lua
+    :target: https://gitter.im/yuin/gopher-lua?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
+
+|
+
+
+GopherLua is a Lua5.1 VM and compiler written in Go. GopherLua has a same goal
+with Lua: **Be a scripting language with extensible semantics** . It provides
+Go APIs that allow you to easily embed a scripting language to your Go host
+programs.
+
+.. contents::
+   :depth: 1
+
+----------------------------------------------------------------
+Design principle
+----------------------------------------------------------------
+
+- Be a scripting language with extensible semantics.
+- User-friendly Go API
+    - The stack based API like the one used in the original Lua
+      implementation will cause a performance improvements in GopherLua
+      (It will reduce memory allocations and concrete type <-> interface conversions).
+      GopherLua API is **not** the stack based API.
+      GopherLua give preference to the user-friendliness over the performance.
+
+----------------------------------------------------------------
+How about performance?
+----------------------------------------------------------------
+GopherLua is not fast but not too slow, I think.
+
+GopherLua has almost equivalent ( or little bit better ) performance as Python3 on micro benchmarks.
+
+There are some benchmarks on the `wiki page <https://github.com/yuin/gopher-lua/wiki/Benchmarks>`_ .
+
+----------------------------------------------------------------
+Installation
+----------------------------------------------------------------
+
+.. code-block:: bash
+
+   go get github.com/yuin/gopher-lua
+
+GopherLua supports >= Go1.9.
+
+----------------------------------------------------------------
+Usage
+----------------------------------------------------------------
+GopherLua APIs perform in much the same way as Lua, **but the stack is used only
+for passing arguments and receiving returned values.**
+
+GopherLua supports channel operations. See **"Goroutines"** section.
+
+Import a package.
+
+.. code-block:: go
+
+   import (
+       "github.com/yuin/gopher-lua"
+   )
+
+Run scripts in the VM.
+
+.. code-block:: go
+
+   L := lua.NewState()
+   defer L.Close()
+   if err := L.DoString(`print("hello")`); err != nil {
+       panic(err)
+   }
+
+.. code-block:: go
+
+   L := lua.NewState()
+   defer L.Close()
+   if err := L.DoFile("hello.lua"); err != nil {
+       panic(err)
+   }
+
+Refer to `Lua Reference Manual <http://www.lua.org/manual/5.1/>`_ and `Go doc <http://godoc.org/github.com/yuin/gopher-lua>`_ for further information.
+
+Note that elements that are not commented in `Go doc <http://godoc.org/github.com/yuin/gopher-lua>`_ equivalent to `Lua Reference Manual <http://www.lua.org/manual/5.1/>`_ , except GopherLua uses objects instead of Lua stack indices.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Data model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All data in a GopherLua program is an ``LValue`` . ``LValue`` is an interface
+type that has following methods.
+
+- ``String() string``
+- ``Type() LValueType``
+
+
+Objects implement an LValue interface are
+
+================ ========================= ================== =======================
+ Type name        Go type                   Type() value       Constants
+================ ========================= ================== =======================
+ ``LNilType``      (constants)              ``LTNil``          ``LNil``
+ ``LBool``         (constants)              ``LTBool``         ``LTrue``, ``LFalse``
+ ``LNumber``        float64                 ``LTNumber``       ``-``
+ ``LString``        string                  ``LTString``       ``-``
+ ``LFunction``      struct pointer          ``LTFunction``     ``-``
+ ``LUserData``      struct pointer          ``LTUserData``     ``-``
+ ``LState``         struct pointer          ``LTThread``       ``-``
+ ``LTable``         struct pointer          ``LTTable``        ``-``
+ ``LChannel``       chan LValue             ``LTChannel``      ``-``
+================ ========================= ================== =======================
+
+You can test an object type in Go way(type assertion) or using a ``Type()`` value.
+
+.. code-block:: go
+
+   lv := L.Get(-1) // get the value at the top of the stack
+   if str, ok := lv.(lua.LString); ok {
+       // lv is LString
+       fmt.Println(string(str))
+   }
+   if lv.Type() != lua.LTString {
+       panic("string required.")
+   }
+
+.. code-block:: go
+
+   lv := L.Get(-1) // get the value at the top of the stack
+   if tbl, ok := lv.(*lua.LTable); ok {
+       // lv is LTable
+       fmt.Println(L.ObjLen(tbl))
+   }
+
+Note that ``LBool`` , ``LNumber`` , ``LString`` is not a pointer.
+
+To test ``LNilType`` and ``LBool``, You **must** use pre-defined constants.
+
+.. code-block:: go
+
+   lv := L.Get(-1) // get the value at the top of the stack
+
+   if lv == lua.LTrue { // correct
+   }
+
+   if bl, ok := lv.(lua.LBool); ok && bool(bl) { // wrong
+   }
+
+In Lua, both ``nil`` and ``false`` make a condition false. ``LVIsFalse`` and ``LVAsBool`` implement this specification.
+
+.. code-block:: go
+
+   lv := L.Get(-1) // get the value at the top of the stack
+   if lua.LVIsFalse(lv) { // lv is nil or false
+   }
+
+   if lua.LVAsBool(lv) { // lv is neither nil nor false
+   }
+
+Objects that based on go structs(``LFunction``. ``LUserData``, ``LTable``)
+have some public methods and fields. You can use these methods and fields for
+performance and debugging, but there are some limitations.
+
+- Metatable does not work.
+- No error handlings.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Callstack & Registry size
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The size of an ``LState``'s callstack controls the maximum call depth for Lua functions within a script (Go function calls do not count).
+
+The registry of an ``LState`` implements stack storage for calling functions (both Lua and Go functions) and also for temporary variables in expressions. Its storage requirements will increase with callstack usage and also with code complexity.
+
+Both the registry and the callstack can be set to either a fixed size or to auto size.
+
+When you have a large number of ``LStates`` instantiated in a process, it's worth taking the time to tune the registry and callstack options.
+
++++++++++
+Registry
++++++++++
+
+The registry can have an initial size, a maximum size and a step size configured on a per ``LState`` basis. This will allow the registry to grow as needed. It will not shrink again after growing.
+
+.. code-block:: go
+
+    L := lua.NewState(lua.Options{
+       RegistrySize: 1024 * 20,         // this is the initial size of the registry
+       RegistryMaxSize: 1024 * 80,      // this is the maximum size that the registry can grow to. If set to `0` (the default) then the registry will not auto grow
+       RegistryGrowStep: 32,            // this is how much to step up the registry by each time it runs out of space. The default is `32`.
+    })
+   defer L.Close()
+
+A registry which is too small for a given script will ultimately result in a panic. A registry which is too big will waste memory (which can be significant if many ``LStates`` are instantiated).
+Auto growing registries incur a small performance hit at the point they are resized but will not otherwise affect performance.
+
++++++++++
+Callstack
++++++++++
+
+The callstack can operate in two different modes, fixed or auto size.
+A fixed size callstack has the highest performance and has a fixed memory overhead.
+An auto sizing callstack will allocate and release callstack pages on demand which will ensure the minimum amount of memory is in use at any time. The downside is it will incur a small performance impact every time a new page of callframes is allocated.
+By default an ``LState`` will allocate and free callstack frames in pages of 8, so the allocation overhead is not incurred on every function call. It is very likely that the performance impact of an auto resizing callstack will be negligible for most use cases.
+
+.. code-block:: go
+
+    L := lua.NewState(lua.Options{
+        CallStackSize: 120,                 // this is the maximum callstack size of this LState
+        MinimizeStackMemory: true,          // Defaults to `false` if not specified. If set, the callstack will auto grow and shrink as needed up to a max of `CallStackSize`. If not set, the callstack will be fixed at `CallStackSize`.
+    })
+   defer L.Close()
+
+++++++++++++++++
+Option defaults
+++++++++++++++++
+
+The above examples show how to customize the callstack and registry size on a per ``LState`` basis. You can also adjust some defaults for when options are not specified by altering the values of ``lua.RegistrySize``, ``lua.RegistryGrowStep`` and ``lua.CallStackSize``.
+
+An ``LState`` object that has been created by ``*LState#NewThread()`` inherits the callstack & registry size from the parent ``LState`` object.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Miscellaneous lua.NewState options
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- **Options.SkipOpenLibs bool(default false)**
+    - By default, GopherLua opens all built-in libraries when new LState is created.
+    - You can skip this behaviour by setting this to ``true`` .
+    - Using the various `OpenXXX(L *LState) int` functions you can open only those libraries that you require, for an example see below.
+- **Options.IncludeGoStackTrace bool(default false)**
+    - By default, GopherLua does not show Go stack traces when panics occur.
+    - You can get Go stack traces by setting this to ``true`` .
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Refer to `Lua Reference Manual <http://www.lua.org/manual/5.1/>`_ and `Go doc(LState methods) <http://godoc.org/github.com/yuin/gopher-lua>`_ for further information.
+
++++++++++++++++++++++++++++++++++++++++++
+Calling Go from Lua
++++++++++++++++++++++++++++++++++++++++++
+
+.. code-block:: go
+
+   func Double(L *lua.LState) int {
+       lv := L.ToInt(1)             /* get argument */
+       L.Push(lua.LNumber(lv * 2)) /* push result */
+       return 1                     /* number of results */
+   }
+
+   func main() {
+       L := lua.NewState()
+       defer L.Close()
+       L.SetGlobal("double", L.NewFunction(Double)) /* Original lua_setglobal uses stack... */
+   }
+
+.. code-block:: lua
+
+   print(double(20)) -- > "40"
+
+Any function registered with GopherLua is a ``lua.LGFunction``, defined in ``value.go``
+
+.. code-block:: go
+
+   type LGFunction func(*LState) int
+
+Working with coroutines.
+
+.. code-block:: go
+
+   co, _ := L.NewThread() /* create a new thread */
+   fn := L.GetGlobal("coro").(*lua.LFunction) /* get function from lua */
+   for {
+       st, err, values := L.Resume(co, fn)
+       if st == lua.ResumeError {
+           fmt.Println("yield break(error)")
+           fmt.Println(err.Error())
+           break
+       }
+
+       for i, lv := range values {
+           fmt.Printf("%v : %v\n", i, lv)
+       }
+
+       if st == lua.ResumeOK {
+           fmt.Println("yield break(ok)")
+           break
+       }
+   }
+
++++++++++++++++++++++++++++++++++++++++++
+Opening a subset of builtin modules
++++++++++++++++++++++++++++++++++++++++++
+
+The following demonstrates how to open a subset of the built-in modules in Lua, say for example to avoid enabling modules with access to local files or system calls.
+
+main.go
+
+.. code-block:: go
+
+    func main() {
+        L := lua.NewState(lua.Options{SkipOpenLibs: true})
+        defer L.Close()
+        for _, pair := range []struct {
+            n string
+            f lua.LGFunction
+        }{
+            {lua.LoadLibName, lua.OpenPackage}, // Must be first
+            {lua.BaseLibName, lua.OpenBase},
+            {lua.TabLibName, lua.OpenTable},
+        } {
+            if err := L.CallByParam(lua.P{
+                Fn:      L.NewFunction(pair.f),
+                NRet:    0,
+                Protect: true,
+            }, lua.LString(pair.n)); err != nil {
+                panic(err)
+            }
+        }
+        if err := L.DoFile("main.lua"); err != nil {
+            panic(err)
+        }
+    }
+
++++++++++++++++++++++++++++++++++++++++++
+Creating a module by Go
++++++++++++++++++++++++++++++++++++++++++
+
+mymodule.go
+
+.. code-block:: go
+
+    package mymodule
+
+    import (
+        "github.com/yuin/gopher-lua"
+    )
+
+    func Loader(L *lua.LState) int {
+        // register functions to the table
+        mod := L.SetFuncs(L.NewTable(), exports)
+        // register other stuff
+        L.SetField(mod, "name", lua.LString("value"))
+
+        // returns the module
+        L.Push(mod)
+        return 1
+    }
+
+    var exports = map[string]lua.LGFunction{
+        "myfunc": myfunc,
+    }
+
+    func myfunc(L *lua.LState) int {
+        return 0
+    }
+
+mymain.go
+
+.. code-block:: go
+
+    package main
+
+    import (
+        "./mymodule"
+        "github.com/yuin/gopher-lua"
+    )
+
+    func main() {
+        L := lua.NewState()
+        defer L.Close()
+        L.PreloadModule("mymodule", mymodule.Loader)
+        if err := L.DoFile("main.lua"); err != nil {
+            panic(err)
+        }
+    }
+
+main.lua
+
+.. code-block:: lua
+
+    local m = require("mymodule")
+    m.myfunc()
+    print(m.name)
+
+
++++++++++++++++++++++++++++++++++++++++++
+Calling Lua from Go
++++++++++++++++++++++++++++++++++++++++++
+
+.. code-block:: go
+
+   L := lua.NewState()
+   defer L.Close()
+   if err := L.DoFile("double.lua"); err != nil {
+       panic(err)
+   }
+   if err := L.CallByParam(lua.P{
+       Fn: L.GetGlobal("double"),
+       NRet: 1,
+       Protect: true,
+       }, lua.LNumber(10)); err != nil {
+       panic(err)
+   }
+   ret := L.Get(-1) // returned value
+   L.Pop(1)  // remove received value
+
+If ``Protect`` is false, GopherLua will panic instead of returning an ``error`` value.
+
++++++++++++++++++++++++++++++++++++++++++
+User-Defined types
++++++++++++++++++++++++++++++++++++++++++
+You can extend GopherLua with new types written in Go.
+``LUserData`` is provided for this purpose.
+
+.. code-block:: go
+
+    type Person struct {
+        Name string
+    }
+
+    const luaPersonTypeName = "person"
+
+    // Registers my person type to given L.
+    func registerPersonType(L *lua.LState) {
+        mt := L.NewTypeMetatable(luaPersonTypeName)
+        L.SetGlobal("person", mt)
+        // static attributes
+        L.SetField(mt, "new", L.NewFunction(newPerson))
+        // methods
+        L.SetField(mt, "__index", L.SetFuncs(L.NewTable(), personMethods))
+    }
+
+    // Constructor
+    func newPerson(L *lua.LState) int {
+        person := &Person{L.CheckString(1)}
+        ud := L.NewUserData()
+        ud.Value = person
+        L.SetMetatable(ud, L.GetTypeMetatable(luaPersonTypeName))
+        L.Push(ud)
+        return 1
+    }
+
+    // Checks whether the first lua argument is a *LUserData with *Person and returns this *Person.
+    func checkPerson(L *lua.LState) *Person {
+        ud := L.CheckUserData(1)
+        if v, ok := ud.Value.(*Person); ok {
+            return v
+        }
+        L.ArgError(1, "person expected")
+        return nil
+    }
+
+    var personMethods = map[string]lua.LGFunction{
+        "name": personGetSetName,
+    }
+
+    // Getter and setter for the Person#Name
+    func personGetSetName(L *lua.LState) int {
+        p := checkPerson(L)
+        if L.GetTop() == 2 {
+            p.Name = L.CheckString(2)
+            return 0
+        }
+        L.Push(lua.LString(p.Name))
+        return 1
+    }
+
+    func main() {
+        L := lua.NewState()
+        defer L.Close()
+        registerPersonType(L)
+        if err := L.DoString(`
+            p = person.new("Steeve")
+            print(p:name()) -- "Steeve"
+            p:name("Alice")
+            print(p:name()) -- "Alice"
+        `); err != nil {
+            panic(err)
+        }
+    }
+
++++++++++++++++++++++++++++++++++++++++++
+Terminating a running LState
++++++++++++++++++++++++++++++++++++++++++
+GopherLua supports the `Go Concurrency Patterns: Context <https://blog.golang.org/context>`_ .
+
+
+.. code-block:: go
+
+    L := lua.NewState()
+    defer L.Close()
+    ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+    defer cancel()
+    // set the context to our LState
+    L.SetContext(ctx)
+    err := L.DoString(`
+      local clock = os.clock
+      function sleep(n)  -- seconds
+        local t0 = clock()
+        while clock() - t0 <= n do end
+      end
+      sleep(3)
+    `)
+    // err.Error() contains "context deadline exceeded"
+
+With coroutines
+
+.. code-block:: go
+
+	L := lua.NewState()
+	defer L.Close()
+	ctx, cancel := context.WithCancel(context.Background())
+	L.SetContext(ctx)
+	defer cancel()
+	L.DoString(`
+	    function coro()
+		  local i = 0
+		  while true do
+		    coroutine.yield(i)
+			i = i+1
+		  end
+		  return i
+	    end
+	`)
+	co, cocancel := L.NewThread()
+	defer cocancel()
+	fn := L.GetGlobal("coro").(*LFunction)
+
+	_, err, values := L.Resume(co, fn) // err is nil
+
+	cancel() // cancel the parent context
+
+	_, err, values = L.Resume(co, fn) // err is NOT nil : child context was canceled
+
+**Note that using a context causes performance degradation.**
+
+.. code-block::
+
+    time ./glua-with-context.exe fib.lua
+    9227465
+    0.01s user 0.11s system 1% cpu 7.505 total
+
+    time ./glua-without-context.exe fib.lua
+    9227465
+    0.01s user 0.01s system 0% cpu 5.306 total
+
++++++++++++++++++++++++++++++++++++++++++
+Sharing Lua byte code between LStates
++++++++++++++++++++++++++++++++++++++++++
+Calling ``DoFile`` will load a Lua script, compile it to byte code and run the byte code in a ``LState``.
+
+If you have multiple ``LStates`` which are all required to run the same script, you can share the byte code between them,
+which will save on memory.
+Sharing byte code is safe as it is read only and cannot be altered by lua scripts.
+
+.. code-block:: go
+
+    // CompileLua reads the passed lua file from disk and compiles it.
+    func CompileLua(filePath string) (*lua.FunctionProto, error) {
+        file, err := os.Open(filePath)
+        defer file.Close()
+        if err != nil {
+            return nil, err
+        }
+        reader := bufio.NewReader(file)
+        chunk, err := parse.Parse(reader, filePath)
+        if err != nil {
+            return nil, err
+        }
+        proto, err := lua.Compile(chunk, filePath)
+        if err != nil {
+            return nil, err
+        }
+        return proto, nil
+    }
+
+    // DoCompiledFile takes a FunctionProto, as returned by CompileLua, and runs it in the LState. It is equivalent
+    // to calling DoFile on the LState with the original source file.
+    func DoCompiledFile(L *lua.LState, proto *lua.FunctionProto) error {
+        lfunc := L.NewFunctionFromProto(proto)
+        L.Push(lfunc)
+        return L.PCall(0, lua.MultRet, nil)
+    }
+
+    // Example shows how to share the compiled byte code from a lua script between multiple VMs.
+    func Example() {
+        codeToShare := CompileLua("mylua.lua")
+        a := lua.NewState()
+        b := lua.NewState()
+        c := lua.NewState()
+        DoCompiledFile(a, codeToShare)
+        DoCompiledFile(b, codeToShare)
+        DoCompiledFile(c, codeToShare)
+    }
+
++++++++++++++++++++++++++++++++++++++++++
+Goroutines
++++++++++++++++++++++++++++++++++++++++++
+The ``LState`` is not goroutine-safe. It is recommended to use one LState per goroutine and communicate between goroutines by using channels.
+
+Channels are represented by ``channel`` objects in GopherLua. And a ``channel`` table provides functions for performing channel operations.
+
+Some objects can not be sent over channels due to having non-goroutine-safe objects inside itself.
+
+- a thread(state)
+- a function
+- an userdata
+- a table with a metatable
+
+You **must not** send these objects from Go APIs to channels.
+
+
+
+.. code-block:: go
+
+    func receiver(ch, quit chan lua.LValue) {
+        L := lua.NewState()
+        defer L.Close()
+        L.SetGlobal("ch", lua.LChannel(ch))
+        L.SetGlobal("quit", lua.LChannel(quit))
+        if err := L.DoString(`
+        local exit = false
+        while not exit do
+          channel.select(
+            {"|<-", ch, function(ok, v)
+              if not ok then
+                print("channel closed")
+                exit = true
+              else
+                print("received:", v)
+              end
+            end},
+            {"|<-", quit, function(ok, v)
+                print("quit")
+                exit = true
+            end}
+          )
+        end
+      `); err != nil {
+            panic(err)
+        }
+    }
+
+    func sender(ch, quit chan lua.LValue) {
+        L := lua.NewState()
+        defer L.Close()
+        L.SetGlobal("ch", lua.LChannel(ch))
+        L.SetGlobal("quit", lua.LChannel(quit))
+        if err := L.DoString(`
+        ch:send("1")
+        ch:send("2")
+      `); err != nil {
+            panic(err)
+        }
+        ch <- lua.LString("3")
+        quit <- lua.LTrue
+    }
+
+    func main() {
+        ch := make(chan lua.LValue)
+        quit := make(chan lua.LValue)
+        go receiver(ch, quit)
+        go sender(ch, quit)
+        time.Sleep(3 * time.Second)
+    }
+
+'''''''''''''''
+Go API
+'''''''''''''''
+
+``ToChannel``, ``CheckChannel``, ``OptChannel`` are available.
+
+Refer to `Go doc(LState methods) <http://godoc.org/github.com/yuin/gopher-lua>`_ for further information.
+
+'''''''''''''''
+Lua API
+'''''''''''''''
+
+- **channel.make([buf:int]) -> ch:channel**
+    - Create new channel that has a buffer size of ``buf``. By default, ``buf`` is 0.
+
+- **channel.select(case:table [, case:table, case:table ...]) -> {index:int, recv:any, ok}**
+    - Same as the ``select`` statement in Go. It returns the index of the chosen case and, if that
+      case was a receive operation, the value received and a boolean indicating whether the channel has been closed.
+    - ``case`` is a table that outlined below.
+        - receiving: `{"|<-", ch:channel [, handler:func(ok, data:any)]}`
+        - sending: `{"<-|", ch:channel, data:any [, handler:func(data:any)]}`
+        - default: `{"default" [, handler:func()]}`
+
+``channel.select`` examples:
+
+.. code-block:: lua
+
+    local idx, recv, ok = channel.select(
+      {"|<-", ch1},
+      {"|<-", ch2}
+    )
+    if not ok then
+        print("closed")
+    elseif idx == 1 then -- received from ch1
+        print(recv)
+    elseif idx == 2 then -- received from ch2
+        print(recv)
+    end
+
+.. code-block:: lua
+
+    channel.select(
+      {"|<-", ch1, function(ok, data)
+        print(ok, data)
+      end},
+      {"<-|", ch2, "value", function(data)
+        print(data)
+      end},
+      {"default", function()
+        print("default action")
+      end}
+    )
+
+- **channel:send(data:any)**
+    - Send ``data`` over the channel.
+- **channel:receive() -> ok:bool, data:any**
+    - Receive some data over the channel.
+- **channel:close()**
+    - Close the channel.
+
+''''''''''''''''''''''''''''''
+The LState pool pattern
+''''''''''''''''''''''''''''''
+To create per-thread LState instances, You can use the ``sync.Pool`` like mechanism.
+
+.. code-block:: go
+
+    type lStatePool struct {
+        m     sync.Mutex
+        saved []*lua.LState
+    }
+
+    func (pl *lStatePool) Get() *lua.LState {
+        pl.m.Lock()
+        defer pl.m.Unlock()
+        n := len(pl.saved)
+        if n == 0 {
+            return pl.New()
+        }
+        x := pl.saved[n-1]
+        pl.saved = pl.saved[0 : n-1]
+        return x
+    }
+
+    func (pl *lStatePool) New() *lua.LState {
+        L := lua.NewState()
+        // setting the L up here.
+        // load scripts, set global variables, share channels, etc...
+        return L
+    }
+
+    func (pl *lStatePool) Put(L *lua.LState) {
+        pl.m.Lock()
+        defer pl.m.Unlock()
+        pl.saved = append(pl.saved, L)
+    }
+
+    func (pl *lStatePool) Shutdown() {
+        for _, L := range pl.saved {
+            L.Close()
+        }
+    }
+
+    // Global LState pool
+    var luaPool = &lStatePool{
+        saved: make([]*lua.LState, 0, 4),
+    }
+
+Now, you can get per-thread LState objects from the ``luaPool`` .
+
+.. code-block:: go
+
+    func MyWorker() {
+       L := luaPool.Get()
+       defer luaPool.Put(L)
+       /* your code here */
+    }
+
+    func main() {
+        defer luaPool.Shutdown()
+        go MyWorker()
+        go MyWorker()
+        /* etc... */
+    }
+
+
+----------------------------------------------------------------
+Differences between Lua and GopherLua
+----------------------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Goroutines
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- GopherLua supports channel operations.
+    - GopherLua has a type named ``channel``.
+    - The ``channel`` table provides functions for performing channel operations.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Unsupported functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``string.dump``
+- ``os.setlocale``
+- ``lua_Debug.namewhat``
+- ``package.loadlib``
+- debug hooks
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Miscellaneous notes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- ``collectgarbage`` does not take any arguments and runs the garbage collector for the entire Go program.
+- ``file:setvbuf`` does not support a line buffering.
+- Daylight saving time is not supported.
+- GopherLua has a function to set an environment variable : ``os.setenv(name, value)``
+
+----------------------------------------------------------------
+Standalone interpreter
+----------------------------------------------------------------
+Lua has an interpreter called ``lua`` . GopherLua has an interpreter called ``glua`` .
+
+.. code-block:: bash
+
+   go get github.com/yuin/gopher-lua/cmd/glua
+
+``glua`` has same options as ``lua`` .
+
+----------------------------------------------------------------
+How to Contribute
+----------------------------------------------------------------
+See `Guidlines for contributors <https://github.com/yuin/gopher-lua/tree/master/.github/CONTRIBUTING.md>`_ .
+
+----------------------------------------------------------------
+Libraries for GopherLua
+----------------------------------------------------------------
+
+- `gopher-luar <https://github.com/layeh/gopher-luar>`_ : Simplifies data passing to and from gopher-lua
+- `gluamapper <https://github.com/yuin/gluamapper>`_ : Mapping a Lua table to a Go struct
+- `gluare <https://github.com/yuin/gluare>`_ : Regular expressions for gopher-lua
+- `gluahttp <https://github.com/cjoudrey/gluahttp>`_ : HTTP request module for gopher-lua
+- `gopher-json <https://github.com/layeh/gopher-json>`_ : A simple JSON encoder/decoder for gopher-lua
+- `gluayaml <https://github.com/kohkimakimoto/gluayaml>`_ : Yaml parser for gopher-lua
+- `glua-lfs <https://github.com/layeh/gopher-lfs>`_ : Partially implements the luafilesystem module for gopher-lua
+- `gluaurl <https://github.com/cjoudrey/gluaurl>`_ : A url parser/builder module for gopher-lua
+- `gluahttpscrape <https://github.com/felipejfc/gluahttpscrape>`_ : A simple HTML scraper module for gopher-lua
+- `gluaxmlpath <https://github.com/ailncode/gluaxmlpath>`_ : An xmlpath module for gopher-lua
+- `gmoonscript <https://github.com/rucuriousyet/gmoonscript>`_ : Moonscript Compiler for the Gopher Lua VM
+- `loguago <https://github.com/rucuriousyet/loguago>`_ : Zerolog wrapper for Gopher-Lua
+- `gluacrypto <https://github.com/tengattack/gluacrypto>`_ : A native Go implementation of crypto library for the GopherLua VM.
+- `gluasql <https://github.com/tengattack/gluasql>`_ : A native Go implementation of SQL client for the GopherLua VM.
+- `purr <https://github.com/leyafo/purr>`_ : A http mock testing tool.
+- `vadv/gopher-lua-libs <https://github.com/vadv/gopher-lua-libs>`_ : Some usefull libraries for GopherLua VM.
+- `gluaperiphery <https://github.com/BixData/gluaperiphery>`_ : A periphery library for the GopherLua VM (GPIO, SPI, I2C, MMIO, and Serial peripheral I/O for Linux).
+- `glua-async <https://github.com/CuberL/glua-async>`_ : An async/await implement for gopher-lua.
+- `gopherlua-debugger <https://github.com/edolphin-ydf/gopherlua-debugger>`_ : A debugger for gopher-lua
+----------------------------------------------------------------
+Donation
+----------------------------------------------------------------
+
+BTC: 1NEDSyUmo4SMTDP83JJQSWi1MvQUGGNMZB
+
+----------------------------------------------------------------
+License
+----------------------------------------------------------------
+MIT
+
+----------------------------------------------------------------
+Author
+----------------------------------------------------------------
+Yusuke Inuzuka
diff --git a/vendor/github.com/yuin/gopher-lua/_state.go b/vendor/github.com/yuin/gopher-lua/_state.go
new file mode 100644
index 0000000000000..54af061536c07
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/_state.go
@@ -0,0 +1,2081 @@
+package lua
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/yuin/gopher-lua/parse"
+)
+
+const MultRet = -1
+const RegistryIndex = -10000
+const EnvironIndex = -10001
+const GlobalsIndex = -10002
+
+/* ApiError {{{ */
+
+type ApiError struct {
+	Type       ApiErrorType
+	Object     LValue
+	StackTrace string
+	// Underlying error. This attribute is set only if the Type is ApiErrorFile or ApiErrorSyntax
+	Cause error
+}
+
+func newApiError(code ApiErrorType, object LValue) *ApiError {
+	return &ApiError{code, object, "", nil}
+}
+
+func newApiErrorS(code ApiErrorType, message string) *ApiError {
+	return newApiError(code, LString(message))
+}
+
+func newApiErrorE(code ApiErrorType, err error) *ApiError {
+	return &ApiError{code, LString(err.Error()), "", err}
+}
+
+func (e *ApiError) Error() string {
+	if len(e.StackTrace) > 0 {
+		return fmt.Sprintf("%s\n%s", e.Object.String(), e.StackTrace)
+	}
+	return e.Object.String()
+}
+
+type ApiErrorType int
+
+const (
+	ApiErrorSyntax ApiErrorType = iota
+	ApiErrorFile
+	ApiErrorRun
+	ApiErrorError
+	ApiErrorPanic
+)
+
+/* }}} */
+
+/* ResumeState {{{ */
+
+type ResumeState int
+
+const (
+	ResumeOK ResumeState = iota
+	ResumeYield
+	ResumeError
+)
+
+/* }}} */
+
+/* P {{{ */
+
+type P struct {
+	Fn      LValue
+	NRet    int
+	Protect bool
+	Handler *LFunction
+}
+
+/* }}} */
+
+/* Options {{{ */
+
+// Options is a configuration that is used to create a new LState.
+type Options struct {
+	// Call stack size. This defaults to `lua.CallStackSize`.
+	CallStackSize int
+	// Data stack size. This defaults to `lua.RegistrySize`.
+	RegistrySize int
+	// Allow the registry to grow from the registry size specified up to a value of RegistryMaxSize. A value of 0
+	// indicates no growth is permitted. The registry will not shrink again after any growth.
+	RegistryMaxSize int
+	// If growth is enabled, step up by an additional `RegistryGrowStep` each time to avoid having to resize too often.
+	// This defaults to `lua.RegistryGrowStep`
+	RegistryGrowStep int
+	// Controls whether or not libraries are opened by default
+	SkipOpenLibs bool
+	// Tells whether a Go stacktrace should be included in a Lua stacktrace when panics occur.
+	IncludeGoStackTrace bool
+	// If `MinimizeStackMemory` is set, the call stack will be automatically grown or shrank up to a limit of
+	// `CallStackSize` in order to minimize memory usage. This does incur a slight performance penalty.
+	MinimizeStackMemory bool
+}
+
+/* }}} */
+
+/* Debug {{{ */
+
+type Debug struct {
+	frame           *callFrame
+	Name            string
+	What            string
+	Source          string
+	CurrentLine     int
+	NUpvalues       int
+	LineDefined     int
+	LastLineDefined int
+}
+
+/* }}} */
+
+/* callFrame {{{ */
+
+type callFrame struct {
+	Idx        int
+	Fn         *LFunction
+	Parent     *callFrame
+	Pc         int
+	Base       int
+	LocalBase  int
+	ReturnBase int
+	NArgs      int
+	NRet       int
+	TailCall   int
+}
+
+type callFrameStack interface {
+	Push(v callFrame)
+	Pop() *callFrame
+	Last() *callFrame
+
+	SetSp(sp int)
+	Sp() int
+	At(sp int) *callFrame
+
+	IsFull() bool
+	IsEmpty() bool
+
+	FreeAll()
+}
+
+type fixedCallFrameStack struct {
+	array []callFrame
+	sp    int
+}
+
+func newFixedCallFrameStack(size int) callFrameStack {
+	return &fixedCallFrameStack{
+		array: make([]callFrame, size),
+		sp:    0,
+	}
+}
+
+func (cs *fixedCallFrameStack) IsEmpty() bool { return cs.sp == 0 }
+
+func (cs *fixedCallFrameStack) IsFull() bool { return cs.sp == len(cs.array) }
+
+func (cs *fixedCallFrameStack) Clear() {
+	cs.sp = 0
+}
+
+func (cs *fixedCallFrameStack) Push(v callFrame) {
+	cs.array[cs.sp] = v
+	cs.array[cs.sp].Idx = cs.sp
+	cs.sp++
+}
+
+func (cs *fixedCallFrameStack) Sp() int {
+	return cs.sp
+}
+
+func (cs *fixedCallFrameStack) SetSp(sp int) {
+	cs.sp = sp
+}
+
+func (cs *fixedCallFrameStack) Last() *callFrame {
+	if cs.sp == 0 {
+		return nil
+	}
+	return &cs.array[cs.sp-1]
+}
+
+func (cs *fixedCallFrameStack) At(sp int) *callFrame {
+	return &cs.array[sp]
+}
+
+func (cs *fixedCallFrameStack) Pop() *callFrame {
+	cs.sp--
+	return &cs.array[cs.sp]
+}
+
+func (cs *fixedCallFrameStack) FreeAll() {
+	// nothing to do for fixed callframestack
+}
+
+// FramesPerSegment should be a power of 2 constant for performance reasons. It will allow the go compiler to change
+// the divs and mods into bitshifts. Max is 256 due to current use of uint8 to count how many frames in a segment are
+// used.
+const FramesPerSegment = 8
+
+type callFrameStackSegment struct {
+	array [FramesPerSegment]callFrame
+}
+type segIdx uint16
+type autoGrowingCallFrameStack struct {
+	segments []*callFrameStackSegment
+	segIdx   segIdx
+	// segSp is the number of frames in the current segment which are used. Full 'sp' value is segIdx * FramesPerSegment + segSp.
+	// It points to the next stack slot to use, so 0 means to use the 0th element in the segment, and a value of
+	// FramesPerSegment indicates that the segment is full and cannot accommodate another frame.
+	segSp uint8
+}
+
+var segmentPool sync.Pool
+
+func newCallFrameStackSegment() *callFrameStackSegment {
+	seg := segmentPool.Get()
+	if seg == nil {
+		return &callFrameStackSegment{}
+	}
+	return seg.(*callFrameStackSegment)
+}
+
+func freeCallFrameStackSegment(seg *callFrameStackSegment) {
+	segmentPool.Put(seg)
+}
+
+// newCallFrameStack allocates a new stack for a lua state, which will auto grow up to a max size of at least maxSize.
+// it will actually grow up to the next segment size multiple after maxSize, where the segment size is dictated by
+// FramesPerSegment.
+func newAutoGrowingCallFrameStack(maxSize int) callFrameStack {
+	cs := &autoGrowingCallFrameStack{
+		segments: make([]*callFrameStackSegment, (maxSize+(FramesPerSegment-1))/FramesPerSegment),
+		segIdx:   0,
+	}
+	cs.segments[0] = newCallFrameStackSegment()
+	return cs
+}
+
+func (cs *autoGrowingCallFrameStack) IsEmpty() bool {
+	return cs.segIdx == 0 && cs.segSp == 0
+}
+
+// IsFull returns true if the stack cannot receive any more stack pushes without overflowing
+func (cs *autoGrowingCallFrameStack) IsFull() bool {
+	return int(cs.segIdx) == len(cs.segments) && cs.segSp >= FramesPerSegment
+}
+
+func (cs *autoGrowingCallFrameStack) Clear() {
+	for i := segIdx(1); i <= cs.segIdx; i++ {
+		freeCallFrameStackSegment(cs.segments[i])
+		cs.segments[i] = nil
+	}
+	cs.segIdx = 0
+	cs.segSp = 0
+}
+
+func (cs *autoGrowingCallFrameStack) FreeAll() {
+	for i := segIdx(0); i <= cs.segIdx; i++ {
+		freeCallFrameStackSegment(cs.segments[i])
+		cs.segments[i] = nil
+	}
+}
+
+// Push pushes the passed callFrame onto the stack. it panics if the stack is full, caller should call IsFull() before
+// invoking this to avoid this.
+func (cs *autoGrowingCallFrameStack) Push(v callFrame) {
+	curSeg := cs.segments[cs.segIdx]
+	if cs.segSp >= FramesPerSegment {
+		// segment full, push new segment if allowed
+		if cs.segIdx < segIdx(len(cs.segments)-1) {
+			curSeg = newCallFrameStackSegment()
+			cs.segIdx++
+			cs.segments[cs.segIdx] = curSeg
+			cs.segSp = 0
+		} else {
+			panic("lua callstack overflow")
+		}
+	}
+	curSeg.array[cs.segSp] = v
+	curSeg.array[cs.segSp].Idx = int(cs.segSp) + FramesPerSegment*int(cs.segIdx)
+	cs.segSp++
+}
+
+// Sp retrieves the current stack depth, which is the number of frames currently pushed on the stack.
+func (cs *autoGrowingCallFrameStack) Sp() int {
+	return int(cs.segSp) + int(cs.segIdx)*FramesPerSegment
+}
+
+// SetSp can be used to rapidly unwind the stack, freeing all stack frames on the way. It should not be used to
+// allocate new stack space, use Push() for that.
+func (cs *autoGrowingCallFrameStack) SetSp(sp int) {
+	desiredSegIdx := segIdx(sp / FramesPerSegment)
+	desiredFramesInLastSeg := uint8(sp % FramesPerSegment)
+	for {
+		if cs.segIdx <= desiredSegIdx {
+			break
+		}
+		freeCallFrameStackSegment(cs.segments[cs.segIdx])
+		cs.segments[cs.segIdx] = nil
+		cs.segIdx--
+	}
+	cs.segSp = desiredFramesInLastSeg
+}
+
+func (cs *autoGrowingCallFrameStack) Last() *callFrame {
+	curSeg := cs.segments[cs.segIdx]
+	segSp := cs.segSp
+	if segSp == 0 {
+		if cs.segIdx == 0 {
+			return nil
+		}
+		curSeg = cs.segments[cs.segIdx-1]
+		segSp = FramesPerSegment
+	}
+	return &curSeg.array[segSp-1]
+}
+
+func (cs *autoGrowingCallFrameStack) At(sp int) *callFrame {
+	segIdx := segIdx(sp / FramesPerSegment)
+	frameIdx := uint8(sp % FramesPerSegment)
+	return &cs.segments[segIdx].array[frameIdx]
+}
+
+// Pop pops off the most recent stack frame and returns it
+func (cs *autoGrowingCallFrameStack) Pop() *callFrame {
+	curSeg := cs.segments[cs.segIdx]
+	if cs.segSp == 0 {
+		if cs.segIdx == 0 {
+			// stack empty
+			return nil
+		}
+		freeCallFrameStackSegment(curSeg)
+		cs.segments[cs.segIdx] = nil
+		cs.segIdx--
+		cs.segSp = FramesPerSegment
+		curSeg = cs.segments[cs.segIdx]
+	}
+	cs.segSp--
+	return &curSeg.array[cs.segSp]
+}
+
+/* }}} */
+
+/* registry {{{ */
+
+type registryHandler interface {
+	registryOverflow()
+}
+type registry struct {
+	array   []LValue
+	top     int
+	growBy  int
+	maxSize int
+	alloc   *allocator
+	handler registryHandler
+}
+
+func newRegistry(handler registryHandler, initialSize int, growBy int, maxSize int, alloc *allocator) *registry {
+	return &registry{make([]LValue, initialSize), 0, growBy, maxSize, alloc, handler}
+}
+
+func (rg *registry) checkSize(requiredSize int) { // +inline-start
+	if requiredSize > cap(rg.array) {
+		rg.resize(requiredSize)
+	}
+} // +inline-end
+
+func (rg *registry) resize(requiredSize int) { // +inline-start
+	newSize := requiredSize + rg.growBy // give some padding
+	if newSize > rg.maxSize {
+		newSize = rg.maxSize
+	}
+	if newSize < requiredSize {
+		rg.handler.registryOverflow()
+		return
+	}
+	rg.forceResize(newSize)
+} // +inline-end
+
+func (rg *registry) forceResize(newSize int) {
+	newSlice := make([]LValue, newSize)
+	copy(newSlice, rg.array[:rg.top]) // should we copy the area beyond top? there shouldn't be any valid values there so it shouldn't be necessary.
+	rg.array = newSlice
+}
+func (rg *registry) SetTop(top int) {
+	// +inline-call rg.checkSize top
+	oldtop := rg.top
+	rg.top = top
+	for i := oldtop; i < rg.top; i++ {
+		rg.array[i] = LNil
+	}
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+	//for i := rg.top; i < oldtop; i++ {
+	//	rg.array[i] = LNil
+	//}
+}
+
+func (rg *registry) Top() int {
+	return rg.top
+}
+
+func (rg *registry) Push(v LValue) {
+	newSize := rg.top + 1
+	// +inline-call rg.checkSize newSize
+	rg.array[rg.top] = v
+	rg.top++
+}
+
+func (rg *registry) Pop() LValue {
+	v := rg.array[rg.top-1]
+	rg.array[rg.top-1] = LNil
+	rg.top--
+	return v
+}
+
+func (rg *registry) Get(reg int) LValue {
+	return rg.array[reg]
+}
+
+// CopyRange will move a section of values from index `start` to index `regv`
+// It will move `n` values.
+// `limit` specifies the maximum end range that can be copied from. If it's set to -1, then it defaults to stopping at
+// the top of the registry (values beyond the top are not initialized, so if specifying an alternative `limit` you should
+// pass a value <= rg.top.
+// If start+n is beyond the limit, then nil values will be copied to the destination slots.
+// After the copy, the registry is truncated to be at the end of the copied range, ie the original of the copied values
+// are nilled out. (So top will be regv+n)
+// CopyRange should ideally be renamed to MoveRange.
+func (rg *registry) CopyRange(regv, start, limit, n int) { // +inline-start
+	newSize := regv + n
+	// +inline-call rg.checkSize newSize
+	if limit == -1 || limit > rg.top {
+		limit = rg.top
+	}
+	for i := 0; i < n; i++ {
+		srcIdx := start + i
+		if srcIdx >= limit || srcIdx < 0 {
+			rg.array[regv+i] = LNil
+		} else {
+			rg.array[regv+i] = rg.array[srcIdx]
+		}
+	}
+
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	oldtop := rg.top
+	rg.top = regv + n
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+} // +inline-end
+
+// FillNil fills the registry with nil values from regm to regm+n and then sets the registry top to regm+n
+func (rg *registry) FillNil(regm, n int) { // +inline-start
+	newSize := regm + n
+	// +inline-call rg.checkSize newSize
+	for i := 0; i < n; i++ {
+		rg.array[regm+i] = LNil
+	}
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	oldtop := rg.top
+	rg.top = regm + n
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+} // +inline-end
+
+func (rg *registry) Insert(value LValue, reg int) {
+	top := rg.Top()
+	if reg >= top {
+		rg.Set(reg, value)
+		return
+	}
+	top--
+	for ; top >= reg; top-- {
+		// FIXME consider using copy() here if Insert() is called enough
+		rg.Set(top+1, rg.Get(top))
+	}
+	rg.Set(reg, value)
+}
+
+func (rg *registry) Set(reg int, val LValue) {
+	newSize := reg + 1
+	// +inline-call rg.checkSize newSize
+	rg.array[reg] = val
+	if reg >= rg.top {
+		rg.top = reg + 1
+	}
+}
+
+func (rg *registry) SetNumber(reg int, val LNumber) {
+	newSize := reg + 1
+	// +inline-call rg.checkSize newSize
+	rg.array[reg] = rg.alloc.LNumber2I(val)
+	if reg >= rg.top {
+		rg.top = reg + 1
+	}
+}
+
+func (rg *registry) IsFull() bool {
+	return rg.top >= cap(rg.array)
+}
+
+/* }}} */
+
+/* Global {{{ */
+
+func newGlobal() *Global {
+	return &Global{
+		MainThread: nil,
+		Registry:   newLTable(0, 32),
+		Global:     newLTable(0, 64),
+		builtinMts: make(map[int]LValue),
+		tempFiles:  make([]*os.File, 0, 10),
+	}
+}
+
+/* }}} */
+
+/* package local methods {{{ */
+
+func panicWithTraceback(L *LState) {
+	err := newApiError(ApiErrorRun, L.Get(-1))
+	err.StackTrace = L.stackTrace(0)
+	panic(err)
+}
+
+func panicWithoutTraceback(L *LState) {
+	err := newApiError(ApiErrorRun, L.Get(-1))
+	panic(err)
+}
+
+func newLState(options Options) *LState {
+	al := newAllocator(32)
+	ls := &LState{
+		G:       newGlobal(),
+		Parent:  nil,
+		Panic:   panicWithTraceback,
+		Dead:    false,
+		Options: options,
+
+		stop:         0,
+		alloc:        al,
+		currentFrame: nil,
+		wrapped:      false,
+		uvcache:      nil,
+		hasErrorFunc: false,
+		mainLoop:     mainLoop,
+		ctx:          nil,
+	}
+	if options.MinimizeStackMemory {
+		ls.stack = newAutoGrowingCallFrameStack(options.CallStackSize)
+	} else {
+		ls.stack = newFixedCallFrameStack(options.CallStackSize)
+	}
+	ls.reg = newRegistry(ls, options.RegistrySize, options.RegistryGrowStep, options.RegistryMaxSize, al)
+	ls.Env = ls.G.Global
+	return ls
+}
+
+func (ls *LState) printReg() {
+	println("-------------------------")
+	println("thread:", ls)
+	println("top:", ls.reg.Top())
+	if ls.currentFrame != nil {
+		println("function base:", ls.currentFrame.Base)
+		println("return base:", ls.currentFrame.ReturnBase)
+	} else {
+		println("(vm not started)")
+	}
+	println("local base:", ls.currentLocalBase())
+	for i := 0; i < ls.reg.Top(); i++ {
+		println(i, ls.reg.Get(i).String())
+	}
+	println("-------------------------")
+}
+
+func (ls *LState) printCallStack() {
+	println("-------------------------")
+	for i := 0; i < ls.stack.Sp(); i++ {
+		print(i)
+		print(" ")
+		frame := ls.stack.At(i)
+		if frame == nil {
+			break
+		}
+		if frame.Fn.IsG {
+			println("IsG:", true, "Frame:", frame, "Fn:", frame.Fn)
+		} else {
+			println("IsG:", false, "Frame:", frame, "Fn:", frame.Fn, "pc:", frame.Pc)
+		}
+	}
+	println("-------------------------")
+}
+
+func (ls *LState) closeAllUpvalues() { // +inline-start
+	for cf := ls.currentFrame; cf != nil; cf = cf.Parent {
+		if !cf.Fn.IsG {
+			ls.closeUpvalues(cf.LocalBase)
+		}
+	}
+} // +inline-end
+
+func (ls *LState) raiseError(level int, format string, args ...interface{}) {
+	if !ls.hasErrorFunc {
+		ls.closeAllUpvalues()
+	}
+	message := format
+	if len(args) > 0 {
+		message = fmt.Sprintf(format, args...)
+	}
+	if level > 0 {
+		message = fmt.Sprintf("%v %v", ls.where(level-1, true), message)
+	}
+	if ls.reg.IsFull() {
+		// if the registry is full then it won't be possible to push a value, in this case, force a larger size
+		ls.reg.forceResize(ls.reg.Top() + 1)
+	}
+	ls.reg.Push(LString(message))
+	ls.Panic(ls)
+}
+
+func (ls *LState) findLocal(frame *callFrame, no int) string {
+	fn := frame.Fn
+	if !fn.IsG {
+		if name, ok := fn.LocalName(no, frame.Pc-1); ok {
+			return name
+		}
+	}
+	var top int
+	if ls.currentFrame == frame {
+		top = ls.reg.Top()
+	} else if frame.Idx+1 < ls.stack.Sp() {
+		top = ls.stack.At(frame.Idx + 1).Base
+	} else {
+		return ""
+	}
+	if top-frame.LocalBase >= no {
+		return "(*temporary)"
+	}
+	return ""
+}
+
+func (ls *LState) where(level int, skipg bool) string {
+	dbg, ok := ls.GetStack(level)
+	if !ok {
+		return ""
+	}
+	cf := dbg.frame
+	proto := cf.Fn.Proto
+	sourcename := "[G]"
+	if proto != nil {
+		sourcename = proto.SourceName
+	} else if skipg {
+		return ls.where(level+1, skipg)
+	}
+	line := ""
+	if proto != nil {
+		line = fmt.Sprintf("%v:", proto.DbgSourcePositions[cf.Pc-1])
+	}
+	return fmt.Sprintf("%v:%v", sourcename, line)
+}
+
+func (ls *LState) stackTrace(level int) string {
+	buf := []string{}
+	header := "stack traceback:"
+	if ls.currentFrame != nil {
+		i := 0
+		for dbg, ok := ls.GetStack(i); ok; dbg, ok = ls.GetStack(i) {
+			cf := dbg.frame
+			buf = append(buf, fmt.Sprintf("\t%v in %v", ls.Where(i), ls.formattedFrameFuncName(cf)))
+			if !cf.Fn.IsG && cf.TailCall > 0 {
+				for tc := cf.TailCall; tc > 0; tc-- {
+					buf = append(buf, "\t(tailcall): ?")
+					i++
+				}
+			}
+			i++
+		}
+	}
+	buf = append(buf, fmt.Sprintf("\t%v: %v", "[G]", "?"))
+	buf = buf[intMax(0, intMin(level, len(buf))):len(buf)]
+	if len(buf) > 20 {
+		newbuf := make([]string, 0, 20)
+		newbuf = append(newbuf, buf[0:7]...)
+		newbuf = append(newbuf, "\t...")
+		newbuf = append(newbuf, buf[len(buf)-7:len(buf)]...)
+		buf = newbuf
+	}
+	return fmt.Sprintf("%s\n%s", header, strings.Join(buf, "\n"))
+}
+
+func (ls *LState) formattedFrameFuncName(fr *callFrame) string {
+	name, ischunk := ls.frameFuncName(fr)
+	if ischunk {
+		return name
+	}
+	if name[0] != '(' && name[0] != '<' {
+		return fmt.Sprintf("function '%s'", name)
+	}
+	return fmt.Sprintf("function %s", name)
+}
+
+func (ls *LState) rawFrameFuncName(fr *callFrame) string {
+	name, _ := ls.frameFuncName(fr)
+	return name
+}
+
+func (ls *LState) frameFuncName(fr *callFrame) (string, bool) {
+	frame := fr.Parent
+	if frame == nil {
+		if ls.Parent == nil {
+			return "main chunk", true
+		} else {
+			return "corountine", true
+		}
+	}
+	if !frame.Fn.IsG {
+		pc := frame.Pc - 1
+		for _, call := range frame.Fn.Proto.DbgCalls {
+			if call.Pc == pc {
+				name := call.Name
+				if (name == "?" || fr.TailCall > 0) && !fr.Fn.IsG {
+					name = fmt.Sprintf("<%v:%v>", fr.Fn.Proto.SourceName, fr.Fn.Proto.LineDefined)
+				}
+				return name, false
+			}
+		}
+	}
+	if !fr.Fn.IsG {
+		return fmt.Sprintf("<%v:%v>", fr.Fn.Proto.SourceName, fr.Fn.Proto.LineDefined), false
+	}
+	return "(anonymous)", false
+}
+
+func (ls *LState) isStarted() bool {
+	return ls.currentFrame != nil
+}
+
+func (ls *LState) kill() {
+	ls.Dead = true
+}
+
+func (ls *LState) indexToReg(idx int) int {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		return base + idx - 1
+	} else if idx == 0 {
+		return -1
+	} else {
+		tidx := ls.reg.Top() + idx
+		if tidx < base {
+			return -1
+		}
+		return tidx
+	}
+}
+
+func (ls *LState) currentLocalBase() int {
+	base := 0
+	if ls.currentFrame != nil {
+		base = ls.currentFrame.LocalBase
+	}
+	return base
+}
+
+func (ls *LState) currentEnv() *LTable {
+	return ls.Env
+	/*
+		if ls.currentFrame == nil {
+			return ls.Env
+		}
+		return ls.currentFrame.Fn.Env
+	*/
+}
+
+func (ls *LState) rkValue(idx int) LValue {
+	/*
+		if OpIsK(idx) {
+			return ls.currentFrame.Fn.Proto.Constants[opIndexK(idx)]
+		}
+		return ls.reg.Get(ls.currentFrame.LocalBase + idx)
+	*/
+	if (idx & opBitRk) != 0 {
+		return ls.currentFrame.Fn.Proto.Constants[idx & ^opBitRk]
+	}
+	return ls.reg.array[ls.currentFrame.LocalBase+idx]
+}
+
+func (ls *LState) rkString(idx int) string {
+	if (idx & opBitRk) != 0 {
+		return ls.currentFrame.Fn.Proto.stringConstants[idx & ^opBitRk]
+	}
+	return string(ls.reg.array[ls.currentFrame.LocalBase+idx].(LString))
+}
+
+func (ls *LState) closeUpvalues(idx int) { // +inline-start
+	if ls.uvcache != nil {
+		var prev *Upvalue
+		for uv := ls.uvcache; uv != nil; uv = uv.next {
+			if uv.index >= idx {
+				if prev != nil {
+					prev.next = nil
+				} else {
+					ls.uvcache = nil
+				}
+				uv.Close()
+			}
+			prev = uv
+		}
+	}
+} // +inline-end
+
+func (ls *LState) findUpvalue(idx int) *Upvalue {
+	var prev *Upvalue
+	var next *Upvalue
+	if ls.uvcache != nil {
+		for uv := ls.uvcache; uv != nil; uv = uv.next {
+			if uv.index == idx {
+				return uv
+			}
+			if uv.index > idx {
+				next = uv
+				break
+			}
+			prev = uv
+		}
+	}
+	uv := &Upvalue{reg: ls.reg, index: idx, closed: false}
+	if prev != nil {
+		prev.next = uv
+	} else {
+		ls.uvcache = uv
+	}
+	if next != nil {
+		uv.next = next
+	}
+	return uv
+}
+
+func (ls *LState) metatable(lvalue LValue, rawget bool) LValue {
+	var metatable LValue = LNil
+	switch obj := lvalue.(type) {
+	case *LTable:
+		metatable = obj.Metatable
+	case *LUserData:
+		metatable = obj.Metatable
+	default:
+		if table, ok := ls.G.builtinMts[int(obj.Type())]; ok {
+			metatable = table
+		}
+	}
+
+	if !rawget && metatable != LNil {
+		oldmt := metatable
+		if tb, ok := metatable.(*LTable); ok {
+			metatable = tb.RawGetString("__metatable")
+			if metatable == LNil {
+				metatable = oldmt
+			}
+		}
+	}
+
+	return metatable
+}
+
+func (ls *LState) metaOp1(lvalue LValue, event string) LValue {
+	if mt := ls.metatable(lvalue, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			return tb.RawGetString(event)
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) metaOp2(value1, value2 LValue, event string) LValue {
+	if mt := ls.metatable(value1, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			if ret := tb.RawGetString(event); ret != LNil {
+				return ret
+			}
+		}
+	}
+	if mt := ls.metatable(value2, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			return tb.RawGetString(event)
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) metaCall(lvalue LValue) (*LFunction, bool) {
+	if fn, ok := lvalue.(*LFunction); ok {
+		return fn, false
+	}
+	if fn, ok := ls.metaOp1(lvalue, "__call").(*LFunction); ok {
+		return fn, true
+	}
+	return nil, false
+}
+
+func (ls *LState) initCallFrame(cf *callFrame) { // +inline-start
+	if cf.Fn.IsG {
+		ls.reg.SetTop(cf.LocalBase + cf.NArgs)
+	} else {
+		proto := cf.Fn.Proto
+		nargs := cf.NArgs
+		np := int(proto.NumParameters)
+		newSize := cf.LocalBase + np
+		// +inline-call ls.reg.checkSize newSize
+		for i := nargs; i < np; i++ {
+			ls.reg.array[cf.LocalBase+i] = LNil
+			nargs = np
+		}
+
+		if (proto.IsVarArg & VarArgIsVarArg) == 0 {
+			if nargs < int(proto.NumUsedRegisters) {
+				nargs = int(proto.NumUsedRegisters)
+			}
+			newSize = cf.LocalBase + nargs
+			// +inline-call ls.reg.checkSize newSize
+			for i := np; i < nargs; i++ {
+				ls.reg.array[cf.LocalBase+i] = LNil
+			}
+			ls.reg.top = cf.LocalBase + int(proto.NumUsedRegisters)
+		} else {
+			/* swap vararg positions:
+					   closure
+					   namedparam1 <- lbase
+					   namedparam2
+					   vararg1
+					   vararg2
+
+			           TO
+
+					   closure
+					   nil
+					   nil
+					   vararg1
+					   vararg2
+					   namedparam1 <- lbase
+					   namedparam2
+			*/
+			nvarargs := nargs - np
+			if nvarargs < 0 {
+				nvarargs = 0
+			}
+
+			ls.reg.SetTop(cf.LocalBase + nargs + np)
+			for i := 0; i < np; i++ {
+				//ls.reg.Set(cf.LocalBase+nargs+i, ls.reg.Get(cf.LocalBase+i))
+				ls.reg.array[cf.LocalBase+nargs+i] = ls.reg.array[cf.LocalBase+i]
+				//ls.reg.Set(cf.LocalBase+i, LNil)
+				ls.reg.array[cf.LocalBase+i] = LNil
+			}
+
+			if CompatVarArg {
+				ls.reg.SetTop(cf.LocalBase + nargs + np + 1)
+				if (proto.IsVarArg & VarArgNeedsArg) != 0 {
+					argtb := newLTable(nvarargs, 0)
+					for i := 0; i < nvarargs; i++ {
+						argtb.RawSetInt(i+1, ls.reg.Get(cf.LocalBase+np+i))
+					}
+					argtb.RawSetString("n", LNumber(nvarargs))
+					//ls.reg.Set(cf.LocalBase+nargs+np, argtb)
+					ls.reg.array[cf.LocalBase+nargs+np] = argtb
+				} else {
+					ls.reg.array[cf.LocalBase+nargs+np] = LNil
+				}
+			}
+			cf.LocalBase += nargs
+			maxreg := cf.LocalBase + int(proto.NumUsedRegisters)
+			ls.reg.SetTop(maxreg)
+		}
+	}
+} // +inline-end
+
+func (ls *LState) pushCallFrame(cf callFrame, fn LValue, meta bool) { // +inline-start
+	if meta {
+		cf.NArgs++
+		ls.reg.Insert(fn, cf.LocalBase)
+	}
+	if cf.Fn == nil {
+		ls.RaiseError("attempt to call a non-function object")
+	}
+	if ls.stack.IsFull() {
+		ls.RaiseError("stack overflow")
+	}
+	ls.stack.Push(cf)
+	newcf := ls.stack.Last()
+	// +inline-call ls.initCallFrame newcf
+	ls.currentFrame = newcf
+} // +inline-end
+
+func (ls *LState) callR(nargs, nret, rbase int) {
+	base := ls.reg.Top() - nargs - 1
+	if rbase < 0 {
+		rbase = base
+	}
+	lv := ls.reg.Get(base)
+	fn, meta := ls.metaCall(lv)
+	ls.pushCallFrame(callFrame{
+		Fn:         fn,
+		Pc:         0,
+		Base:       base,
+		LocalBase:  base + 1,
+		ReturnBase: rbase,
+		NArgs:      nargs,
+		NRet:       nret,
+		Parent:     ls.currentFrame,
+		TailCall:   0,
+	}, lv, meta)
+	if ls.G.MainThread == nil {
+		ls.G.MainThread = ls
+		ls.G.CurrentThread = ls
+		ls.mainLoop(ls, nil)
+	} else {
+		ls.mainLoop(ls, ls.currentFrame)
+	}
+	if nret != MultRet {
+		ls.reg.SetTop(rbase + nret)
+	}
+}
+
+func (ls *LState) getField(obj LValue, key LValue) LValue {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			ret := tb.RawGet(key)
+			if ret != LNil {
+				return ret
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__index")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key.String())
+			}
+			return LNil
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(key)
+			ls.Call(2, 1)
+			return ls.reg.Pop()
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in gettable")
+	return nil
+}
+
+func (ls *LState) getFieldString(obj LValue, key string) LValue {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			ret := tb.RawGetString(key)
+			if ret != LNil {
+				return ret
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__index")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key)
+			}
+			return LNil
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(LString(key))
+			ls.Call(2, 1)
+			return ls.reg.Pop()
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in gettable")
+	return nil
+}
+
+func (ls *LState) setField(obj LValue, key LValue, value LValue) {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			if tb.RawGet(key) != LNil {
+				ls.RawSet(tb, key, value)
+				return
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__newindex")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key.String())
+			}
+			ls.RawSet(tb, key, value)
+			return
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(key)
+			ls.reg.Push(value)
+			ls.Call(3, 0)
+			return
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in settable")
+}
+
+func (ls *LState) setFieldString(obj LValue, key string, value LValue) {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			if tb.RawGetString(key) != LNil {
+				tb.RawSetString(key, value)
+				return
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__newindex")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key)
+			}
+			tb.RawSetString(key, value)
+			return
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(LString(key))
+			ls.reg.Push(value)
+			ls.Call(3, 0)
+			return
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in settable")
+}
+
+/* }}} */
+
+/* api methods {{{ */
+
+func NewState(opts ...Options) *LState {
+	var ls *LState
+	if len(opts) == 0 {
+		ls = newLState(Options{
+			CallStackSize: CallStackSize,
+			RegistrySize:  RegistrySize,
+		})
+		ls.OpenLibs()
+	} else {
+		if opts[0].CallStackSize < 1 {
+			opts[0].CallStackSize = CallStackSize
+		}
+		if opts[0].RegistrySize < 128 {
+			opts[0].RegistrySize = RegistrySize
+		}
+		if opts[0].RegistryMaxSize < opts[0].RegistrySize {
+			opts[0].RegistryMaxSize = 0 // disable growth if max size is smaller than initial size
+		} else {
+			// if growth enabled, grow step is set
+			if opts[0].RegistryGrowStep < 1 {
+				opts[0].RegistryGrowStep = RegistryGrowStep
+			}
+		}
+		ls = newLState(opts[0])
+		if !opts[0].SkipOpenLibs {
+			ls.OpenLibs()
+		}
+	}
+	return ls
+}
+
+func (ls *LState) IsClosed() bool {
+	return ls.stack == nil
+}
+
+func (ls *LState) Close() {
+	atomic.AddInt32(&ls.stop, 1)
+	for _, file := range ls.G.tempFiles {
+		// ignore errors in these operations
+		file.Close()
+		os.Remove(file.Name())
+	}
+	ls.stack.FreeAll()
+	ls.stack = nil
+}
+
+/* registry operations {{{ */
+
+func (ls *LState) GetTop() int {
+	return ls.reg.Top() - ls.currentLocalBase()
+}
+
+func (ls *LState) SetTop(idx int) {
+	base := ls.currentLocalBase()
+	newtop := ls.indexToReg(idx) + 1
+	if newtop < base {
+		ls.reg.SetTop(base)
+	} else {
+		ls.reg.SetTop(newtop)
+	}
+}
+
+func (ls *LState) Replace(idx int, value LValue) {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		reg := base + idx - 1
+		if reg < ls.reg.Top() {
+			ls.reg.Set(reg, value)
+		}
+	} else if idx == 0 {
+	} else if idx > RegistryIndex {
+		if tidx := ls.reg.Top() + idx; tidx >= base {
+			ls.reg.Set(tidx, value)
+		}
+	} else {
+		switch idx {
+		case RegistryIndex:
+			if tb, ok := value.(*LTable); ok {
+				ls.G.Registry = tb
+			} else {
+				ls.RaiseError("registry must be a table(%v)", value.Type().String())
+			}
+		case EnvironIndex:
+			if ls.currentFrame == nil {
+				ls.RaiseError("no calling environment")
+			}
+			if tb, ok := value.(*LTable); ok {
+				ls.currentFrame.Fn.Env = tb
+			} else {
+				ls.RaiseError("environment must be a table(%v)", value.Type().String())
+			}
+		case GlobalsIndex:
+			if tb, ok := value.(*LTable); ok {
+				ls.G.Global = tb
+			} else {
+				ls.RaiseError("_G must be a table(%v)", value.Type().String())
+			}
+		default:
+			fn := ls.currentFrame.Fn
+			index := GlobalsIndex - idx - 1
+			if index < len(fn.Upvalues) {
+				fn.Upvalues[index].SetValue(value)
+			}
+		}
+	}
+}
+
+func (ls *LState) Get(idx int) LValue {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		reg := base + idx - 1
+		if reg < ls.reg.Top() {
+			return ls.reg.Get(reg)
+		}
+		return LNil
+	} else if idx == 0 {
+		return LNil
+	} else if idx > RegistryIndex {
+		tidx := ls.reg.Top() + idx
+		if tidx < base {
+			return LNil
+		}
+		return ls.reg.Get(tidx)
+	} else {
+		switch idx {
+		case RegistryIndex:
+			return ls.G.Registry
+		case EnvironIndex:
+			if ls.currentFrame == nil {
+				return ls.Env
+			}
+			return ls.currentFrame.Fn.Env
+		case GlobalsIndex:
+			return ls.G.Global
+		default:
+			fn := ls.currentFrame.Fn
+			index := GlobalsIndex - idx - 1
+			if index < len(fn.Upvalues) {
+				return fn.Upvalues[index].Value()
+			}
+			return LNil
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) Push(value LValue) {
+	ls.reg.Push(value)
+}
+
+func (ls *LState) Pop(n int) {
+	for i := 0; i < n; i++ {
+		if ls.GetTop() == 0 {
+			ls.RaiseError("register underflow")
+		}
+		ls.reg.Pop()
+	}
+}
+
+func (ls *LState) Insert(value LValue, index int) {
+	reg := ls.indexToReg(index)
+	top := ls.reg.Top()
+	if reg >= top {
+		ls.reg.Set(reg, value)
+		return
+	}
+	if reg <= ls.currentLocalBase() {
+		reg = ls.currentLocalBase()
+	}
+	top--
+	for ; top >= reg; top-- {
+		ls.reg.Set(top+1, ls.reg.Get(top))
+	}
+	ls.reg.Set(reg, value)
+}
+
+func (ls *LState) Remove(index int) {
+	reg := ls.indexToReg(index)
+	top := ls.reg.Top()
+	switch {
+	case reg >= top:
+		return
+	case reg < ls.currentLocalBase():
+		return
+	case reg == top-1:
+		ls.Pop(1)
+		return
+	}
+	for i := reg; i < top-1; i++ {
+		ls.reg.Set(i, ls.reg.Get(i+1))
+	}
+	ls.reg.SetTop(top - 1)
+}
+
+/* }}} */
+
+/* object allocation {{{ */
+
+func (ls *LState) NewTable() *LTable {
+	return newLTable(defaultArrayCap, defaultHashCap)
+}
+
+func (ls *LState) CreateTable(acap, hcap int) *LTable {
+	return newLTable(acap, hcap)
+}
+
+// NewThread returns a new LState that shares with the original state all global objects.
+// If the original state has context.Context, the new state has a new child context of the original state and this function returns its cancel function.
+func (ls *LState) NewThread() (*LState, context.CancelFunc) {
+	thread := newLState(ls.Options)
+	thread.G = ls.G
+	thread.Env = ls.Env
+	var f context.CancelFunc = nil
+	if ls.ctx != nil {
+		thread.mainLoop = mainLoopWithContext
+		thread.ctx, f = context.WithCancel(ls.ctx)
+	}
+	return thread, f
+}
+
+func (ls *LState) NewFunctionFromProto(proto *FunctionProto) *LFunction {
+	return newLFunctionL(proto, ls.Env, int(proto.NumUpvalues))
+}
+
+func (ls *LState) NewUserData() *LUserData {
+	return &LUserData{
+		Env:       ls.currentEnv(),
+		Metatable: LNil,
+	}
+}
+
+func (ls *LState) NewFunction(fn LGFunction) *LFunction {
+	return newLFunctionG(fn, ls.currentEnv(), 0)
+}
+
+func (ls *LState) NewClosure(fn LGFunction, upvalues ...LValue) *LFunction {
+	cl := newLFunctionG(fn, ls.currentEnv(), len(upvalues))
+	for i, lv := range upvalues {
+		cl.Upvalues[i] = &Upvalue{}
+		cl.Upvalues[i].Close()
+		cl.Upvalues[i].SetValue(lv)
+	}
+	return cl
+}
+
+/* }}} */
+
+/* toType {{{ */
+
+func (ls *LState) ToBool(n int) bool {
+	return LVAsBool(ls.Get(n))
+}
+
+func (ls *LState) ToInt(n int) int {
+	if lv, ok := ls.Get(n).(LNumber); ok {
+		return int(lv)
+	}
+	if lv, ok := ls.Get(n).(LString); ok {
+		if num, err := parseNumber(string(lv)); err == nil {
+			return int(num)
+		}
+	}
+	return 0
+}
+
+func (ls *LState) ToInt64(n int) int64 {
+	if lv, ok := ls.Get(n).(LNumber); ok {
+		return int64(lv)
+	}
+	if lv, ok := ls.Get(n).(LString); ok {
+		if num, err := parseNumber(string(lv)); err == nil {
+			return int64(num)
+		}
+	}
+	return 0
+}
+
+func (ls *LState) ToNumber(n int) LNumber {
+	return LVAsNumber(ls.Get(n))
+}
+
+func (ls *LState) ToString(n int) string {
+	return LVAsString(ls.Get(n))
+}
+
+func (ls *LState) ToTable(n int) *LTable {
+	if lv, ok := ls.Get(n).(*LTable); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToFunction(n int) *LFunction {
+	if lv, ok := ls.Get(n).(*LFunction); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToUserData(n int) *LUserData {
+	if lv, ok := ls.Get(n).(*LUserData); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToThread(n int) *LState {
+	if lv, ok := ls.Get(n).(*LState); ok {
+		return lv
+	}
+	return nil
+}
+
+/* }}} */
+
+/* error & debug operations {{{ */
+
+func (ls *LState) registryOverflow() {
+	ls.RaiseError("registry overflow")
+}
+
+// This function is equivalent to luaL_error( http://www.lua.org/manual/5.1/manual.html#luaL_error ).
+func (ls *LState) RaiseError(format string, args ...interface{}) {
+	ls.raiseError(1, format, args...)
+}
+
+// This function is equivalent to lua_error( http://www.lua.org/manual/5.1/manual.html#lua_error ).
+func (ls *LState) Error(lv LValue, level int) {
+	if str, ok := lv.(LString); ok {
+		ls.raiseError(level, string(str))
+	} else {
+		if !ls.hasErrorFunc {
+			ls.closeAllUpvalues()
+		}
+		ls.Push(lv)
+		ls.Panic(ls)
+	}
+}
+
+func (ls *LState) GetInfo(what string, dbg *Debug, fn LValue) (LValue, error) {
+	if !strings.HasPrefix(what, ">") {
+		fn = dbg.frame.Fn
+	} else {
+		what = what[1:]
+	}
+	f, ok := fn.(*LFunction)
+	if !ok {
+		return LNil, newApiErrorS(ApiErrorRun, "can not get debug info(an object in not a function)")
+	}
+
+	retfn := false
+	for _, c := range what {
+		switch c {
+		case 'f':
+			retfn = true
+		case 'S':
+			if dbg.frame != nil && dbg.frame.Parent == nil {
+				dbg.What = "main"
+			} else if f.IsG {
+				dbg.What = "G"
+			} else if dbg.frame != nil && dbg.frame.TailCall > 0 {
+				dbg.What = "tail"
+			} else {
+				dbg.What = "Lua"
+			}
+			if !f.IsG {
+				dbg.Source = f.Proto.SourceName
+				dbg.LineDefined = f.Proto.LineDefined
+				dbg.LastLineDefined = f.Proto.LastLineDefined
+			}
+		case 'l':
+			if !f.IsG && dbg.frame != nil {
+				if dbg.frame.Pc > 0 {
+					dbg.CurrentLine = f.Proto.DbgSourcePositions[dbg.frame.Pc-1]
+				}
+			} else {
+				dbg.CurrentLine = -1
+			}
+		case 'u':
+			dbg.NUpvalues = len(f.Upvalues)
+		case 'n':
+			if dbg.frame != nil {
+				dbg.Name = ls.rawFrameFuncName(dbg.frame)
+			}
+		default:
+			return LNil, newApiErrorS(ApiErrorRun, "invalid what: "+string(c))
+		}
+	}
+
+	if retfn {
+		return f, nil
+	}
+	return LNil, nil
+
+}
+
+func (ls *LState) GetStack(level int) (*Debug, bool) {
+	frame := ls.currentFrame
+	for ; level > 0 && frame != nil; frame = frame.Parent {
+		level--
+		if !frame.Fn.IsG {
+			level -= frame.TailCall
+		}
+	}
+
+	if level == 0 && frame != nil {
+		return &Debug{frame: frame}, true
+	} else if level < 0 && ls.stack.Sp() > 0 {
+		return &Debug{frame: ls.stack.At(0)}, true
+	}
+	return &Debug{}, false
+}
+
+func (ls *LState) GetLocal(dbg *Debug, no int) (string, LValue) {
+	frame := dbg.frame
+	if name := ls.findLocal(frame, no); len(name) > 0 {
+		return name, ls.reg.Get(frame.LocalBase + no - 1)
+	}
+	return "", LNil
+}
+
+func (ls *LState) SetLocal(dbg *Debug, no int, lv LValue) string {
+	frame := dbg.frame
+	if name := ls.findLocal(frame, no); len(name) > 0 {
+		ls.reg.Set(frame.LocalBase+no-1, lv)
+		return name
+	}
+	return ""
+}
+
+func (ls *LState) GetUpvalue(fn *LFunction, no int) (string, LValue) {
+	if fn.IsG {
+		return "", LNil
+	}
+
+	no--
+	if no >= 0 && no < len(fn.Upvalues) {
+		return fn.Proto.DbgUpvalues[no], fn.Upvalues[no].Value()
+	}
+	return "", LNil
+}
+
+func (ls *LState) SetUpvalue(fn *LFunction, no int, lv LValue) string {
+	if fn.IsG {
+		return ""
+	}
+
+	no--
+	if no >= 0 && no < len(fn.Upvalues) {
+		fn.Upvalues[no].SetValue(lv)
+		return fn.Proto.DbgUpvalues[no]
+	}
+	return ""
+}
+
+/* }}} */
+
+/* env operations {{{ */
+
+func (ls *LState) GetFEnv(obj LValue) LValue {
+	switch lv := obj.(type) {
+	case *LFunction:
+		return lv.Env
+	case *LUserData:
+		return lv.Env
+	case *LState:
+		return lv.Env
+	}
+	return LNil
+}
+
+func (ls *LState) SetFEnv(obj LValue, env LValue) {
+	tb, ok := env.(*LTable)
+	if !ok {
+		ls.RaiseError("cannot use %v as an environment", env.Type().String())
+	}
+
+	switch lv := obj.(type) {
+	case *LFunction:
+		lv.Env = tb
+	case *LUserData:
+		lv.Env = tb
+	case *LState:
+		lv.Env = tb
+	}
+	/* do nothing */
+}
+
+/* }}} */
+
+/* table operations {{{ */
+
+func (ls *LState) RawGet(tb *LTable, key LValue) LValue {
+	return tb.RawGet(key)
+}
+
+func (ls *LState) RawGetInt(tb *LTable, key int) LValue {
+	return tb.RawGetInt(key)
+}
+
+func (ls *LState) GetField(obj LValue, skey string) LValue {
+	return ls.getFieldString(obj, skey)
+}
+
+func (ls *LState) GetTable(obj LValue, key LValue) LValue {
+	return ls.getField(obj, key)
+}
+
+func (ls *LState) RawSet(tb *LTable, key LValue, value LValue) {
+	if n, ok := key.(LNumber); ok && math.IsNaN(float64(n)) {
+		ls.RaiseError("table index is NaN")
+	} else if key == LNil {
+		ls.RaiseError("table index is nil")
+	}
+	tb.RawSet(key, value)
+}
+
+func (ls *LState) RawSetInt(tb *LTable, key int, value LValue) {
+	tb.RawSetInt(key, value)
+}
+
+func (ls *LState) SetField(obj LValue, key string, value LValue) {
+	ls.setFieldString(obj, key, value)
+}
+
+func (ls *LState) SetTable(obj LValue, key LValue, value LValue) {
+	ls.setField(obj, key, value)
+}
+
+func (ls *LState) ForEach(tb *LTable, cb func(LValue, LValue)) {
+	tb.ForEach(cb)
+}
+
+func (ls *LState) GetGlobal(name string) LValue {
+	return ls.GetField(ls.Get(GlobalsIndex), name)
+}
+
+func (ls *LState) SetGlobal(name string, value LValue) {
+	ls.SetField(ls.Get(GlobalsIndex), name, value)
+}
+
+func (ls *LState) Next(tb *LTable, key LValue) (LValue, LValue) {
+	return tb.Next(key)
+}
+
+/* }}} */
+
+/* unary operations {{{ */
+
+func (ls *LState) ObjLen(v1 LValue) int {
+	if v1.Type() == LTString {
+		return len(string(v1.(LString)))
+	}
+	op := ls.metaOp1(v1, "__len")
+	if op.Type() == LTFunction {
+		ls.Push(op)
+		ls.Push(v1)
+		ls.Call(1, 1)
+		ret := ls.reg.Pop()
+		if ret.Type() == LTNumber {
+			return int(ret.(LNumber))
+		}
+	} else if v1.Type() == LTTable {
+		return v1.(*LTable).Len()
+	}
+	return 0
+}
+
+/* }}} */
+
+/* binary operations {{{ */
+
+func (ls *LState) Concat(values ...LValue) string {
+	top := ls.reg.Top()
+	for _, value := range values {
+		ls.reg.Push(value)
+	}
+	ret := stringConcat(ls, len(values), ls.reg.Top()-1)
+	ls.reg.SetTop(top)
+	return LVAsString(ret)
+}
+
+func (ls *LState) LessThan(lhs, rhs LValue) bool {
+	return lessThan(ls, lhs, rhs)
+}
+
+func (ls *LState) Equal(lhs, rhs LValue) bool {
+	return equals(ls, lhs, rhs, false)
+}
+
+func (ls *LState) RawEqual(lhs, rhs LValue) bool {
+	return equals(ls, lhs, rhs, true)
+}
+
+/* }}} */
+
+/* register operations {{{ */
+
+func (ls *LState) Register(name string, fn LGFunction) {
+	ls.SetGlobal(name, ls.NewFunction(fn))
+}
+
+/* }}} */
+
+/* load and function call operations {{{ */
+
+func (ls *LState) Load(reader io.Reader, name string) (*LFunction, error) {
+	chunk, err := parse.Parse(reader, name)
+	if err != nil {
+		return nil, newApiErrorE(ApiErrorSyntax, err)
+	}
+	proto, err := Compile(chunk, name)
+	if err != nil {
+		return nil, newApiErrorE(ApiErrorSyntax, err)
+	}
+	return newLFunctionL(proto, ls.currentEnv(), 0), nil
+}
+
+func (ls *LState) Call(nargs, nret int) {
+	ls.callR(nargs, nret, -1)
+}
+
+func (ls *LState) PCall(nargs, nret int, errfunc *LFunction) (err error) {
+	err = nil
+	sp := ls.stack.Sp()
+	base := ls.reg.Top() - nargs - 1
+	oldpanic := ls.Panic
+	ls.Panic = panicWithoutTraceback
+	if errfunc != nil {
+		ls.hasErrorFunc = true
+	}
+	defer func() {
+		ls.Panic = oldpanic
+		ls.hasErrorFunc = false
+		rcv := recover()
+		if rcv != nil {
+			if _, ok := rcv.(*ApiError); !ok {
+				err = newApiErrorS(ApiErrorPanic, fmt.Sprint(rcv))
+				if ls.Options.IncludeGoStackTrace {
+					buf := make([]byte, 4096)
+					runtime.Stack(buf, false)
+					err.(*ApiError).StackTrace = strings.Trim(string(buf), "\000") + "\n" + ls.stackTrace(0)
+				}
+			} else {
+				err = rcv.(*ApiError)
+			}
+			if errfunc != nil {
+				ls.Push(errfunc)
+				ls.Push(err.(*ApiError).Object)
+				ls.Panic = panicWithoutTraceback
+				defer func() {
+					ls.Panic = oldpanic
+					rcv := recover()
+					if rcv != nil {
+						if _, ok := rcv.(*ApiError); !ok {
+							err = newApiErrorS(ApiErrorPanic, fmt.Sprint(rcv))
+							if ls.Options.IncludeGoStackTrace {
+								buf := make([]byte, 4096)
+								runtime.Stack(buf, false)
+								err.(*ApiError).StackTrace = strings.Trim(string(buf), "\000") + ls.stackTrace(0)
+							}
+						} else {
+							err = rcv.(*ApiError)
+							err.(*ApiError).StackTrace = ls.stackTrace(0)
+						}
+					}
+				}()
+				ls.Call(1, 1)
+				err = newApiError(ApiErrorError, ls.Get(-1))
+			} else if len(err.(*ApiError).StackTrace) == 0 {
+				err.(*ApiError).StackTrace = ls.stackTrace(0)
+			}
+			ls.stack.SetSp(sp)
+			ls.currentFrame = ls.stack.Last()
+			ls.reg.SetTop(base)
+		}
+		ls.stack.SetSp(sp)
+		if sp == 0 {
+			ls.currentFrame = nil
+		}
+	}()
+
+	ls.Call(nargs, nret)
+
+	return
+}
+
+func (ls *LState) GPCall(fn LGFunction, data LValue) error {
+	ls.Push(newLFunctionG(fn, ls.currentEnv(), 0))
+	ls.Push(data)
+	return ls.PCall(1, MultRet, nil)
+}
+
+func (ls *LState) CallByParam(cp P, args ...LValue) error {
+	ls.Push(cp.Fn)
+	for _, arg := range args {
+		ls.Push(arg)
+	}
+
+	if cp.Protect {
+		return ls.PCall(len(args), cp.NRet, cp.Handler)
+	}
+	ls.Call(len(args), cp.NRet)
+	return nil
+}
+
+/* }}} */
+
+/* metatable operations {{{ */
+
+func (ls *LState) GetMetatable(obj LValue) LValue {
+	return ls.metatable(obj, false)
+}
+
+func (ls *LState) SetMetatable(obj LValue, mt LValue) {
+	switch mt.(type) {
+	case *LNilType, *LTable:
+	default:
+		ls.RaiseError("metatable must be a table or nil, but got %v", mt.Type().String())
+	}
+
+	switch v := obj.(type) {
+	case *LTable:
+		v.Metatable = mt
+	case *LUserData:
+		v.Metatable = mt
+	default:
+		ls.G.builtinMts[int(obj.Type())] = mt
+	}
+}
+
+/* }}} */
+
+/* coroutine operations {{{ */
+
+func (ls *LState) Status(th *LState) string {
+	status := "suspended"
+	if th.Dead {
+		status = "dead"
+	} else if ls.G.CurrentThread == th {
+		status = "running"
+	} else if ls.Parent == th {
+		status = "normal"
+	}
+	return status
+}
+
+func (ls *LState) Resume(th *LState, fn *LFunction, args ...LValue) (ResumeState, error, []LValue) {
+	isstarted := th.isStarted()
+	if !isstarted {
+		base := 0
+		th.stack.Push(callFrame{
+			Fn:         fn,
+			Pc:         0,
+			Base:       base,
+			LocalBase:  base + 1,
+			ReturnBase: base,
+			NArgs:      0,
+			NRet:       MultRet,
+			Parent:     nil,
+			TailCall:   0,
+		})
+	}
+
+	if ls.G.CurrentThread == th {
+		return ResumeError, newApiErrorS(ApiErrorRun, "can not resume a running thread"), nil
+	}
+	if th.Dead {
+		return ResumeError, newApiErrorS(ApiErrorRun, "can not resume a dead thread"), nil
+	}
+	th.Parent = ls
+	ls.G.CurrentThread = th
+	if !isstarted {
+		cf := th.stack.Last()
+		th.currentFrame = cf
+		th.SetTop(0)
+		for _, arg := range args {
+			th.Push(arg)
+		}
+		cf.NArgs = len(args)
+		th.initCallFrame(cf)
+		th.Panic = panicWithoutTraceback
+	} else {
+		for _, arg := range args {
+			th.Push(arg)
+		}
+	}
+	top := ls.GetTop()
+	threadRun(th)
+	haserror := LVIsFalse(ls.Get(top + 1))
+	ret := make([]LValue, 0, ls.GetTop())
+	for idx := top + 2; idx <= ls.GetTop(); idx++ {
+		ret = append(ret, ls.Get(idx))
+	}
+	if len(ret) == 0 {
+		ret = append(ret, LNil)
+	}
+	ls.SetTop(top)
+
+	if haserror {
+		return ResumeError, newApiError(ApiErrorRun, ret[0]), nil
+	} else if th.stack.IsEmpty() {
+		return ResumeOK, nil, ret
+	}
+	return ResumeYield, nil, ret
+}
+
+func (ls *LState) Yield(values ...LValue) int {
+	ls.SetTop(0)
+	for _, lv := range values {
+		ls.Push(lv)
+	}
+	return -1
+}
+
+func (ls *LState) XMoveTo(other *LState, n int) {
+	if ls == other {
+		return
+	}
+	top := ls.GetTop()
+	n = intMin(n, top)
+	for i := n; i > 0; i-- {
+		other.Push(ls.Get(top - i + 1))
+	}
+	ls.SetTop(top - n)
+}
+
+/* }}} */
+
+/* GopherLua original APIs {{{ */
+
+// Set maximum memory size. This function can only be called from the main thread.
+func (ls *LState) SetMx(mx int) {
+	if ls.Parent != nil {
+		ls.RaiseError("sub threads are not allowed to set a memory limit")
+	}
+	go func() {
+		limit := uint64(mx * 1024 * 1024) //MB
+		var s runtime.MemStats
+		for atomic.LoadInt32(&ls.stop) == 0 {
+			runtime.ReadMemStats(&s)
+			if s.Alloc >= limit {
+				fmt.Println("out of memory")
+				os.Exit(3)
+			}
+			time.Sleep(100 * time.Millisecond)
+		}
+	}()
+}
+
+// SetContext set a context ctx to this LState. The provided ctx must be non-nil.
+func (ls *LState) SetContext(ctx context.Context) {
+	ls.mainLoop = mainLoopWithContext
+	ls.ctx = ctx
+}
+
+// Context returns the LState's context. To change the context, use WithContext.
+func (ls *LState) Context() context.Context {
+	return ls.ctx
+}
+
+// RemoveContext removes the context associated with this LState and returns this context.
+func (ls *LState) RemoveContext() context.Context {
+	oldctx := ls.ctx
+	ls.mainLoop = mainLoop
+	ls.ctx = nil
+	return oldctx
+}
+
+// Converts the Lua value at the given acceptable index to the chan LValue.
+func (ls *LState) ToChannel(n int) chan LValue {
+	if lv, ok := ls.Get(n).(LChannel); ok {
+		return (chan LValue)(lv)
+	}
+	return nil
+}
+
+// RemoveCallerFrame removes the stack frame above the current stack frame. This is useful in tail calls. It returns
+// the new current frame.
+func (ls *LState) RemoveCallerFrame() *callFrame {
+	cs := ls.stack
+	sp := cs.Sp()
+	parentFrame := cs.At(sp - 2)
+	currentFrame := cs.At(sp - 1)
+	parentsParentFrame := parentFrame.Parent
+	*parentFrame = *currentFrame
+	parentFrame.Parent = parentsParentFrame
+	parentFrame.Idx = sp - 2
+	cs.Pop()
+	return parentFrame
+}
+
+/* }}} */
+
+/* }}} */
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/_vm.go b/vendor/github.com/yuin/gopher-lua/_vm.go
new file mode 100644
index 0000000000000..874ed9aa4a72b
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/_vm.go
@@ -0,0 +1,1033 @@
+package lua
+
+import (
+	"fmt"
+	"math"
+	"strings"
+)
+
+func mainLoop(L *LState, baseframe *callFrame) {
+	var inst uint32
+	var cf *callFrame
+
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	L.currentFrame = L.stack.Last()
+	if L.currentFrame.Fn.IsG {
+		callGFunction(L, false)
+		return
+	}
+
+	for {
+		cf = L.currentFrame
+		inst = cf.Fn.Proto.Code[cf.Pc]
+		cf.Pc++
+		if jumpTable[int(inst>>26)](L, inst, baseframe) == 1 {
+			return
+		}
+	}
+}
+
+func mainLoopWithContext(L *LState, baseframe *callFrame) {
+	var inst uint32
+	var cf *callFrame
+
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	L.currentFrame = L.stack.Last()
+	if L.currentFrame.Fn.IsG {
+		callGFunction(L, false)
+		return
+	}
+
+	for {
+		cf = L.currentFrame
+		inst = cf.Fn.Proto.Code[cf.Pc]
+		cf.Pc++
+		select {
+		case <-L.ctx.Done():
+			L.RaiseError(L.ctx.Err().Error())
+			return
+		default:
+			if jumpTable[int(inst>>26)](L, inst, baseframe) == 1 {
+				return
+			}
+		}
+	}
+}
+
+// regv is the first target register to copy the return values to.
+// It can be reg.top, indicating that the copied values are going into new registers, or it can be below reg.top
+// Indicating that the values should be within the existing registers.
+// b is the available number of return values + 1.
+// n is the desired number of return values.
+// If n more than the available return values then the extra values are set to nil.
+// When this function returns the top of the registry will be set to regv+n.
+func copyReturnValues(L *LState, regv, start, n, b int) { // +inline-start
+	if b == 1 {
+		// +inline-call L.reg.FillNil  regv n
+	} else {
+		// +inline-call L.reg.CopyRange regv start -1 n
+		if b > 1 && n > (b-1) {
+			// +inline-call L.reg.FillNil  regv+b-1 n-(b-1)
+		}
+	}
+} // +inline-end
+
+func switchToParentThread(L *LState, nargs int, haserror bool, kill bool) {
+	parent := L.Parent
+	if parent == nil {
+		L.RaiseError("can not yield from outside of a coroutine")
+	}
+	L.G.CurrentThread = parent
+	L.Parent = nil
+	if !L.wrapped {
+		if haserror {
+			parent.Push(LFalse)
+		} else {
+			parent.Push(LTrue)
+		}
+	}
+	L.XMoveTo(parent, nargs)
+	L.stack.Pop()
+	offset := L.currentFrame.LocalBase - L.currentFrame.ReturnBase
+	L.currentFrame = L.stack.Last()
+	L.reg.SetTop(L.reg.Top() - offset) // remove 'yield' function(including tailcalled functions)
+	if kill {
+		L.kill()
+	}
+}
+
+func callGFunction(L *LState, tailcall bool) bool {
+	frame := L.currentFrame
+	gfnret := frame.Fn.GFunction(L)
+	if tailcall {
+		L.currentFrame = L.RemoveCallerFrame()
+	}
+
+	if gfnret < 0 {
+		switchToParentThread(L, L.GetTop(), false, false)
+		return true
+	}
+
+	wantret := frame.NRet
+	if wantret == MultRet {
+		wantret = gfnret
+	}
+
+	if tailcall && L.Parent != nil && L.stack.Sp() == 1 {
+		switchToParentThread(L, wantret, false, true)
+		return true
+	}
+
+	// +inline-call L.reg.CopyRange frame.ReturnBase L.reg.Top()-gfnret -1 wantret
+	L.stack.Pop()
+	L.currentFrame = L.stack.Last()
+	return false
+}
+
+func threadRun(L *LState) {
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	defer func() {
+		if rcv := recover(); rcv != nil {
+			var lv LValue
+			if v, ok := rcv.(*ApiError); ok {
+				lv = v.Object
+			} else {
+				lv = LString(fmt.Sprint(rcv))
+			}
+			if parent := L.Parent; parent != nil {
+				if L.wrapped {
+					L.Push(lv)
+					parent.Panic(L)
+				} else {
+					L.SetTop(0)
+					L.Push(lv)
+					switchToParentThread(L, 1, true, true)
+				}
+			} else {
+				panic(rcv)
+			}
+		}
+	}()
+	L.mainLoop(L, nil)
+}
+
+type instFunc func(*LState, uint32, *callFrame) int
+
+var jumpTable [opCodeMax + 1]instFunc
+
+func init() {
+	jumpTable = [opCodeMax + 1]instFunc{
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_MOVE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			reg.Set(RA, reg.Get(lbase+B))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_MOVEN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(lbase+A, reg.Get(lbase+B))
+			code := cf.Fn.Proto.Code
+			pc := cf.Pc
+			for i := 0; i < C; i++ {
+				inst = code[pc]
+				pc++
+				A = int(inst>>18) & 0xff //GETA
+				B = int(inst & 0x1ff)    //GETB
+				reg.Set(lbase+A, reg.Get(lbase+B))
+			}
+			cf.Pc = pc
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADK
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			reg.Set(RA, cf.Fn.Proto.Constants[Bx])
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADBOOL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if B != 0 {
+				reg.Set(RA, LTrue)
+			} else {
+				reg.Set(RA, LFalse)
+			}
+			if C != 0 {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADNIL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			for i := RA; i <= lbase+B; i++ {
+				reg.Set(i, LNil)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETUPVAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			reg.Set(RA, cf.Fn.Upvalues[B].Value())
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETGLOBAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			//reg.Set(RA, L.getField(cf.Fn.Env, cf.Fn.Proto.Constants[Bx]))
+			reg.Set(RA, L.getFieldString(cf.Fn.Env, cf.Fn.Proto.stringConstants[Bx]))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, L.getField(reg.Get(lbase+B), L.rkValue(C)))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETTABLEKS
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, L.getFieldString(reg.Get(lbase+B), L.rkString(C)))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETGLOBAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			//L.setField(cf.Fn.Env, cf.Fn.Proto.Constants[Bx], reg.Get(RA))
+			L.setFieldString(cf.Fn.Env, cf.Fn.Proto.stringConstants[Bx], reg.Get(RA))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETUPVAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			cf.Fn.Upvalues[B].SetValue(reg.Get(RA))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			L.setField(reg.Get(RA), L.rkValue(B), L.rkValue(C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETTABLEKS
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			L.setFieldString(reg.Get(RA), L.rkString(B), L.rkValue(C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NEWTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, newLTable(B, C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SELF
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			selfobj := reg.Get(lbase + B)
+			reg.Set(RA, L.getFieldString(selfobj, L.rkString(C)))
+			reg.Set(RA+1, selfobj)
+			return 0
+		},
+		opArith, // OP_ADD
+		opArith, // OP_SUB
+		opArith, // OP_MUL
+		opArith, // OP_DIV
+		opArith, // OP_MOD
+		opArith, // OP_POW
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_UNM
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			unaryv := L.rkValue(B)
+			if nm, ok := unaryv.(LNumber); ok {
+				reg.SetNumber(RA, -nm)
+			} else {
+				op := L.metaOp1(unaryv, "__unm")
+				if op.Type() == LTFunction {
+					reg.Push(op)
+					reg.Push(unaryv)
+					L.Call(1, 1)
+					reg.Set(RA, reg.Pop())
+				} else if str, ok1 := unaryv.(LString); ok1 {
+					if num, err := parseNumber(string(str)); err == nil {
+						reg.Set(RA, -num)
+					} else {
+						L.RaiseError("__unm undefined")
+					}
+				} else {
+					L.RaiseError("__unm undefined")
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NOT
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			if LVIsFalse(reg.Get(lbase + B)) {
+				reg.Set(RA, LTrue)
+			} else {
+				reg.Set(RA, LFalse)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LEN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			switch lv := L.rkValue(B).(type) {
+			case LString:
+				reg.SetNumber(RA, LNumber(len(lv)))
+			default:
+				op := L.metaOp1(lv, "__len")
+				if op.Type() == LTFunction {
+					reg.Push(op)
+					reg.Push(lv)
+					L.Call(1, 1)
+					ret := reg.Pop()
+					if ret.Type() == LTNumber {
+						reg.SetNumber(RA, ret.(LNumber))
+					} else {
+						reg.SetNumber(RA, LNumber(0))
+					}
+				} else if lv.Type() == LTTable {
+					reg.SetNumber(RA, LNumber(lv.(*LTable).Len()))
+				} else {
+					L.RaiseError("__len undefined")
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CONCAT
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			RC := lbase + C
+			RB := lbase + B
+			reg.Set(RA, stringConcat(L, RC-RB+1, RC))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_JMP
+			cf := L.currentFrame
+			Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+			cf.Pc += Sbx
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_EQ
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			ret := equals(L, L.rkValue(B), L.rkValue(C), false)
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LT
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			ret := lessThan(L, L.rkValue(B), L.rkValue(C))
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LE
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			lhs := L.rkValue(B)
+			rhs := L.rkValue(C)
+			ret := false
+
+			if v1, ok1 := lhs.assertFloat64(); ok1 {
+				if v2, ok2 := rhs.assertFloat64(); ok2 {
+					ret = v1 <= v2
+				} else {
+					L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+				}
+			} else {
+				if lhs.Type() != rhs.Type() {
+					L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+				}
+				switch lhs.Type() {
+				case LTString:
+					ret = strCmp(string(lhs.(LString)), string(rhs.(LString))) <= 0
+				default:
+					switch objectRational(L, lhs, rhs, "__le") {
+					case 1:
+						ret = true
+					case 0:
+						ret = false
+					default:
+						ret = !objectRationalWithError(L, rhs, lhs, "__lt")
+					}
+				}
+			}
+
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TEST
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			C := int(inst>>9) & 0x1ff //GETC
+			if LVAsBool(reg.Get(RA)) == (C == 0) {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TESTSET
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if value := reg.Get(lbase + B); LVAsBool(value) != (C == 0) {
+				reg.Set(RA, value)
+			} else {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CALL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			nargs := B - 1
+			if B == 0 {
+				nargs = reg.Top() - (RA + 1)
+			}
+			lv := reg.Get(RA)
+			nret := C - 1
+			var callable *LFunction
+			var meta bool
+			if fn, ok := lv.assertFunction(); ok {
+				callable = fn
+				meta = false
+			} else {
+				callable, meta = L.metaCall(lv)
+			}
+			// +inline-call L.pushCallFrame callFrame{Fn:callable,Pc:0,Base:RA,LocalBase:RA+1,ReturnBase:RA,NArgs:nargs,NRet:nret,Parent:cf,TailCall:0} lv meta
+			if callable.IsG && callGFunction(L, false) {
+				return 1
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TAILCALL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			nargs := B - 1
+			if B == 0 {
+				nargs = reg.Top() - (RA + 1)
+			}
+			lv := reg.Get(RA)
+			var callable *LFunction
+			var meta bool
+			if fn, ok := lv.assertFunction(); ok {
+				callable = fn
+				meta = false
+			} else {
+				callable, meta = L.metaCall(lv)
+			}
+			if callable == nil {
+				L.RaiseError("attempt to call a non-function object")
+			}
+			// +inline-call L.closeUpvalues lbase
+			if callable.IsG {
+				luaframe := cf
+				L.pushCallFrame(callFrame{
+					Fn:         callable,
+					Pc:         0,
+					Base:       RA,
+					LocalBase:  RA + 1,
+					ReturnBase: cf.ReturnBase,
+					NArgs:      nargs,
+					NRet:       cf.NRet,
+					Parent:     cf,
+					TailCall:   0,
+				}, lv, meta)
+				if callGFunction(L, true) {
+					return 1
+				}
+				if L.currentFrame == nil || L.currentFrame.Fn.IsG || luaframe == baseframe {
+					return 1
+				}
+			} else {
+				base := cf.Base
+				cf.Fn = callable
+				cf.Pc = 0
+				cf.Base = RA
+				cf.LocalBase = RA + 1
+				cf.ReturnBase = cf.ReturnBase
+				cf.NArgs = nargs
+				cf.NRet = cf.NRet
+				cf.TailCall++
+				lbase := cf.LocalBase
+				if meta {
+					cf.NArgs++
+					L.reg.Insert(lv, cf.LocalBase)
+				}
+				// +inline-call L.initCallFrame cf
+				// +inline-call L.reg.CopyRange base RA -1 reg.Top()-RA-1
+				cf.Base = base
+				cf.LocalBase = base + (cf.LocalBase - lbase + 1)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_RETURN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			// +inline-call L.closeUpvalues lbase
+			nret := B - 1
+			if B == 0 {
+				nret = reg.Top() - RA
+			}
+			n := cf.NRet
+			if cf.NRet == MultRet {
+				n = nret
+			}
+
+			if L.Parent != nil && L.stack.Sp() == 1 {
+				// +inline-call copyReturnValues L reg.Top() RA n B
+				switchToParentThread(L, n, false, true)
+				return 1
+			}
+			islast := baseframe == L.stack.Pop() || L.stack.IsEmpty()
+			// +inline-call copyReturnValues L cf.ReturnBase RA n B
+			L.currentFrame = L.stack.Last()
+			if islast || L.currentFrame == nil || L.currentFrame.Fn.IsG {
+				return 1
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_FORLOOP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			if init, ok1 := reg.Get(RA).assertFloat64(); ok1 {
+				if limit, ok2 := reg.Get(RA + 1).assertFloat64(); ok2 {
+					if step, ok3 := reg.Get(RA + 2).assertFloat64(); ok3 {
+						init += step
+						reg.SetNumber(RA, LNumber(init))
+						if (step > 0 && init <= limit) || (step <= 0 && init >= limit) {
+							Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+							cf.Pc += Sbx
+							reg.SetNumber(RA+3, LNumber(init))
+						} else {
+							reg.SetTop(RA + 1)
+						}
+					} else {
+						L.RaiseError("for statement step must be a number")
+					}
+				} else {
+					L.RaiseError("for statement limit must be a number")
+				}
+			} else {
+				L.RaiseError("for statement init must be a number")
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_FORPREP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+			if init, ok1 := reg.Get(RA).assertFloat64(); ok1 {
+				if step, ok2 := reg.Get(RA + 2).assertFloat64(); ok2 {
+					reg.SetNumber(RA, LNumber(init-step))
+				} else {
+					L.RaiseError("for statement step must be a number")
+				}
+			} else {
+				L.RaiseError("for statement init must be a number")
+			}
+			cf.Pc += Sbx
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TFORLOOP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			C := int(inst>>9) & 0x1ff //GETC
+			nret := C
+			reg.SetTop(RA + 3 + 2)
+			reg.Set(RA+3+2, reg.Get(RA+2))
+			reg.Set(RA+3+1, reg.Get(RA+1))
+			reg.Set(RA+3, reg.Get(RA))
+			L.callR(2, nret, RA+3)
+			if value := reg.Get(RA + 3); value != LNil {
+				reg.Set(RA+2, value)
+				pc := cf.Fn.Proto.Code[cf.Pc]
+				cf.Pc += int(pc&0x3ffff) - opMaxArgSbx
+			}
+			cf.Pc++
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETLIST
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if C == 0 {
+				C = int(cf.Fn.Proto.Code[cf.Pc])
+				cf.Pc++
+			}
+			offset := (C - 1) * FieldsPerFlush
+			table := reg.Get(RA).(*LTable)
+			nelem := B
+			if B == 0 {
+				nelem = reg.Top() - RA - 1
+			}
+			for i := 1; i <= nelem; i++ {
+				table.RawSetInt(offset+i, reg.Get(RA+i))
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CLOSE
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			// +inline-call L.closeUpvalues RA
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CLOSURE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			proto := cf.Fn.Proto.FunctionPrototypes[Bx]
+			closure := newLFunctionL(proto, cf.Fn.Env, int(proto.NumUpvalues))
+			reg.Set(RA, closure)
+			for i := 0; i < int(proto.NumUpvalues); i++ {
+				inst = cf.Fn.Proto.Code[cf.Pc]
+				cf.Pc++
+				B := opGetArgB(inst)
+				switch opGetOpCode(inst) {
+				case OP_MOVE:
+					closure.Upvalues[i] = L.findUpvalue(lbase + B)
+				case OP_GETUPVAL:
+					closure.Upvalues[i] = cf.Fn.Upvalues[B]
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_VARARG
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			nparams := int(cf.Fn.Proto.NumParameters)
+			nvarargs := cf.NArgs - nparams
+			if nvarargs < 0 {
+				nvarargs = 0
+			}
+			nwant := B - 1
+			if B == 0 {
+				nwant = nvarargs
+			}
+			// +inline-call reg.CopyRange RA cf.Base+nparams+1 cf.LocalBase nwant
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NOP
+			return 0
+		},
+	}
+}
+
+func opArith(L *LState, inst uint32, baseframe *callFrame) int { //OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_POW
+	reg := L.reg
+	cf := L.currentFrame
+	lbase := cf.LocalBase
+	A := int(inst>>18) & 0xff //GETA
+	RA := lbase + A
+	opcode := int(inst >> 26) //GETOPCODE
+	B := int(inst & 0x1ff)    //GETB
+	C := int(inst>>9) & 0x1ff //GETC
+	lhs := L.rkValue(B)
+	rhs := L.rkValue(C)
+	v1, ok1 := lhs.assertFloat64()
+	v2, ok2 := rhs.assertFloat64()
+	if ok1 && ok2 {
+		reg.SetNumber(RA, numberArith(L, opcode, LNumber(v1), LNumber(v2)))
+	} else {
+		reg.Set(RA, objectArith(L, opcode, lhs, rhs))
+	}
+	return 0
+}
+
+func luaModulo(lhs, rhs LNumber) LNumber {
+	flhs := float64(lhs)
+	frhs := float64(rhs)
+	v := math.Mod(flhs, frhs)
+	if flhs < 0 || frhs < 0 && !(flhs < 0 && frhs < 0) {
+		v += frhs
+	}
+	return LNumber(v)
+}
+
+func numberArith(L *LState, opcode int, lhs, rhs LNumber) LNumber {
+	switch opcode {
+	case OP_ADD:
+		return lhs + rhs
+	case OP_SUB:
+		return lhs - rhs
+	case OP_MUL:
+		return lhs * rhs
+	case OP_DIV:
+		return lhs / rhs
+	case OP_MOD:
+		return luaModulo(lhs, rhs)
+	case OP_POW:
+		flhs := float64(lhs)
+		frhs := float64(rhs)
+		return LNumber(math.Pow(flhs, frhs))
+	}
+	panic("should not reach here")
+	return LNumber(0)
+}
+
+func objectArith(L *LState, opcode int, lhs, rhs LValue) LValue {
+	event := ""
+	switch opcode {
+	case OP_ADD:
+		event = "__add"
+	case OP_SUB:
+		event = "__sub"
+	case OP_MUL:
+		event = "__mul"
+	case OP_DIV:
+		event = "__div"
+	case OP_MOD:
+		event = "__mod"
+	case OP_POW:
+		event = "__pow"
+	}
+	op := L.metaOp2(lhs, rhs, event)
+	if op.Type() == LTFunction {
+		L.reg.Push(op)
+		L.reg.Push(lhs)
+		L.reg.Push(rhs)
+		L.Call(2, 1)
+		return L.reg.Pop()
+	}
+	if str, ok := lhs.(LString); ok {
+		if lnum, err := parseNumber(string(str)); err == nil {
+			lhs = lnum
+		}
+	}
+	if str, ok := rhs.(LString); ok {
+		if rnum, err := parseNumber(string(str)); err == nil {
+			rhs = rnum
+		}
+	}
+	if v1, ok1 := lhs.assertFloat64(); ok1 {
+		if v2, ok2 := rhs.assertFloat64(); ok2 {
+			return numberArith(L, opcode, LNumber(v1), LNumber(v2))
+		}
+	}
+	L.RaiseError(fmt.Sprintf("cannot perform %v operation between %v and %v",
+		strings.TrimLeft(event, "_"), lhs.Type().String(), rhs.Type().String()))
+
+	return LNil
+}
+
+func stringConcat(L *LState, total, last int) LValue {
+	rhs := L.reg.Get(last)
+	total--
+	for i := last - 1; total > 0; {
+		lhs := L.reg.Get(i)
+		if !(LVCanConvToString(lhs) && LVCanConvToString(rhs)) {
+			op := L.metaOp2(lhs, rhs, "__concat")
+			if op.Type() == LTFunction {
+				L.reg.Push(op)
+				L.reg.Push(lhs)
+				L.reg.Push(rhs)
+				L.Call(2, 1)
+				rhs = L.reg.Pop()
+				total--
+				i--
+			} else {
+				L.RaiseError("cannot perform concat operation between %v and %v", lhs.Type().String(), rhs.Type().String())
+				return LNil
+			}
+		} else {
+			buf := make([]string, total+1)
+			buf[total] = LVAsString(rhs)
+			for total > 0 {
+				lhs = L.reg.Get(i)
+				if !LVCanConvToString(lhs) {
+					break
+				}
+				buf[total-1] = LVAsString(lhs)
+				i--
+				total--
+			}
+			rhs = LString(strings.Join(buf, ""))
+		}
+	}
+	return rhs
+}
+
+func lessThan(L *LState, lhs, rhs LValue) bool {
+	// optimization for numbers
+	if v1, ok1 := lhs.assertFloat64(); ok1 {
+		if v2, ok2 := rhs.assertFloat64(); ok2 {
+			return v1 < v2
+		}
+		L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+	}
+	if lhs.Type() != rhs.Type() {
+		L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+		return false
+	}
+	ret := false
+	switch lhs.Type() {
+	case LTString:
+		ret = strCmp(string(lhs.(LString)), string(rhs.(LString))) < 0
+	default:
+		ret = objectRationalWithError(L, lhs, rhs, "__lt")
+	}
+	return ret
+}
+
+func equals(L *LState, lhs, rhs LValue, raw bool) bool {
+	if lhs.Type() != rhs.Type() {
+		return false
+	}
+
+	ret := false
+	switch lhs.Type() {
+	case LTNil:
+		ret = true
+	case LTNumber:
+		v1, _ := lhs.assertFloat64()
+		v2, _ := rhs.assertFloat64()
+		ret = v1 == v2
+	case LTBool:
+		ret = bool(lhs.(LBool)) == bool(rhs.(LBool))
+	case LTString:
+		ret = string(lhs.(LString)) == string(rhs.(LString))
+	case LTUserData, LTTable:
+		if lhs == rhs {
+			ret = true
+		} else if !raw {
+			switch objectRational(L, lhs, rhs, "__eq") {
+			case 1:
+				ret = true
+			default:
+				ret = false
+			}
+		}
+	default:
+		ret = lhs == rhs
+	}
+	return ret
+}
+
+func objectRationalWithError(L *LState, lhs, rhs LValue, event string) bool {
+	switch objectRational(L, lhs, rhs, event) {
+	case 1:
+		return true
+	case 0:
+		return false
+	}
+	L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+	return false
+}
+
+func objectRational(L *LState, lhs, rhs LValue, event string) int {
+	m1 := L.metaOp1(lhs, event)
+	m2 := L.metaOp1(rhs, event)
+	if m1.Type() == LTFunction && m1 == m2 {
+		L.reg.Push(m1)
+		L.reg.Push(lhs)
+		L.reg.Push(rhs)
+		L.Call(2, 1)
+		if LVAsBool(L.reg.Pop()) {
+			return 1
+		}
+		return 0
+	}
+	return -1
+}
diff --git a/vendor/github.com/yuin/gopher-lua/alloc.go b/vendor/github.com/yuin/gopher-lua/alloc.go
new file mode 100644
index 0000000000000..7a8cd63ac10fe
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/alloc.go
@@ -0,0 +1,79 @@
+package lua
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+// iface is an internal representation of the go-interface.
+type iface struct {
+	itab unsafe.Pointer
+	word unsafe.Pointer
+}
+
+const preloadLimit LNumber = 128
+
+var _fv float64
+var _uv uintptr
+
+var preloads [int(preloadLimit)]LValue
+
+func init() {
+	for i := 0; i < int(preloadLimit); i++ {
+		preloads[i] = LNumber(i)
+	}
+}
+
+// allocator is a fast bulk memory allocator for the LValue.
+type allocator struct {
+	size    int
+	fptrs   []float64
+	fheader *reflect.SliceHeader
+
+	scratchValue  LValue
+	scratchValueP *iface
+}
+
+func newAllocator(size int) *allocator {
+	al := &allocator{
+		size:    size,
+		fptrs:   make([]float64, 0, size),
+		fheader: nil,
+	}
+	al.fheader = (*reflect.SliceHeader)(unsafe.Pointer(&al.fptrs))
+	al.scratchValue = LNumber(0)
+	al.scratchValueP = (*iface)(unsafe.Pointer(&al.scratchValue))
+
+	return al
+}
+
+// LNumber2I takes a number value and returns an interface LValue representing the same number.
+// Converting an LNumber to a LValue naively, by doing:
+// `var val LValue = myLNumber`
+// will result in an individual heap alloc of 8 bytes for the float value. LNumber2I amortizes the cost and memory
+// overhead of these allocs by allocating blocks of floats instead.
+// The downside of this is that all of the floats on a given block have to become eligible for gc before the block
+// as a whole can be gc-ed.
+func (al *allocator) LNumber2I(v LNumber) LValue {
+	// first check for shared preloaded numbers
+	if v >= 0 && v < preloadLimit && float64(v) == float64(int64(v)) {
+		return preloads[int(v)]
+	}
+
+	// check if we need a new alloc page
+	if cap(al.fptrs) == len(al.fptrs) {
+		al.fptrs = make([]float64, 0, al.size)
+		al.fheader = (*reflect.SliceHeader)(unsafe.Pointer(&al.fptrs))
+	}
+
+	// alloc a new float, and store our value into it
+	al.fptrs = append(al.fptrs, float64(v))
+	fptr := &al.fptrs[len(al.fptrs)-1]
+
+	// hack our scratch LValue to point to our allocated value
+	// this scratch lvalue is copied when this function returns meaning the scratch value can be reused
+	// on the next call
+	al.scratchValueP.word = unsafe.Pointer(fptr)
+
+	return al.scratchValue
+}
diff --git a/vendor/github.com/yuin/gopher-lua/ast/ast.go b/vendor/github.com/yuin/gopher-lua/ast/ast.go
new file mode 100644
index 0000000000000..f337a2947322e
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/ast/ast.go
@@ -0,0 +1,29 @@
+package ast
+
+type PositionHolder interface {
+	Line() int
+	SetLine(int)
+	LastLine() int
+	SetLastLine(int)
+}
+
+type Node struct {
+	line     int
+	lastline int
+}
+
+func (self *Node) Line() int {
+	return self.line
+}
+
+func (self *Node) SetLine(line int) {
+	self.line = line
+}
+
+func (self *Node) LastLine() int {
+	return self.lastline
+}
+
+func (self *Node) SetLastLine(line int) {
+	self.lastline = line
+}
diff --git a/vendor/github.com/yuin/gopher-lua/ast/expr.go b/vendor/github.com/yuin/gopher-lua/ast/expr.go
new file mode 100644
index 0000000000000..ccda3279101f0
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/ast/expr.go
@@ -0,0 +1,137 @@
+package ast
+
+type Expr interface {
+	PositionHolder
+	exprMarker()
+}
+
+type ExprBase struct {
+	Node
+}
+
+func (expr *ExprBase) exprMarker() {}
+
+/* ConstExprs {{{ */
+
+type ConstExpr interface {
+	Expr
+	constExprMarker()
+}
+
+type ConstExprBase struct {
+	ExprBase
+}
+
+func (expr *ConstExprBase) constExprMarker() {}
+
+type TrueExpr struct {
+	ConstExprBase
+}
+
+type FalseExpr struct {
+	ConstExprBase
+}
+
+type NilExpr struct {
+	ConstExprBase
+}
+
+type NumberExpr struct {
+	ConstExprBase
+
+	Value string
+}
+
+type StringExpr struct {
+	ConstExprBase
+
+	Value string
+}
+
+/* ConstExprs }}} */
+
+type Comma3Expr struct {
+	ExprBase
+}
+
+type IdentExpr struct {
+	ExprBase
+
+	Value string
+}
+
+type AttrGetExpr struct {
+	ExprBase
+
+	Object Expr
+	Key    Expr
+}
+
+type TableExpr struct {
+	ExprBase
+
+	Fields []*Field
+}
+
+type FuncCallExpr struct {
+	ExprBase
+
+	Func      Expr
+	Receiver  Expr
+	Method    string
+	Args      []Expr
+	AdjustRet bool
+}
+
+type LogicalOpExpr struct {
+	ExprBase
+
+	Operator string
+	Lhs      Expr
+	Rhs      Expr
+}
+
+type RelationalOpExpr struct {
+	ExprBase
+
+	Operator string
+	Lhs      Expr
+	Rhs      Expr
+}
+
+type StringConcatOpExpr struct {
+	ExprBase
+
+	Lhs Expr
+	Rhs Expr
+}
+
+type ArithmeticOpExpr struct {
+	ExprBase
+
+	Operator string
+	Lhs      Expr
+	Rhs      Expr
+}
+
+type UnaryMinusOpExpr struct {
+	ExprBase
+	Expr Expr
+}
+
+type UnaryNotOpExpr struct {
+	ExprBase
+	Expr Expr
+}
+
+type UnaryLenOpExpr struct {
+	ExprBase
+	Expr Expr
+}
+
+type FunctionExpr struct {
+	ExprBase
+
+	ParList *ParList
+	Stmts   []Stmt
+}
diff --git a/vendor/github.com/yuin/gopher-lua/ast/misc.go b/vendor/github.com/yuin/gopher-lua/ast/misc.go
new file mode 100644
index 0000000000000..d811c042aa0ec
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/ast/misc.go
@@ -0,0 +1,17 @@
+package ast
+
+type Field struct {
+	Key   Expr
+	Value Expr
+}
+
+type ParList struct {
+	HasVargs bool
+	Names    []string
+}
+
+type FuncName struct {
+	Func     Expr
+	Receiver Expr
+	Method   string
+}
diff --git a/vendor/github.com/yuin/gopher-lua/ast/stmt.go b/vendor/github.com/yuin/gopher-lua/ast/stmt.go
new file mode 100644
index 0000000000000..56ea6d1a23afe
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/ast/stmt.go
@@ -0,0 +1,95 @@
+package ast
+
+type Stmt interface {
+	PositionHolder
+	stmtMarker()
+}
+
+type StmtBase struct {
+	Node
+}
+
+func (stmt *StmtBase) stmtMarker() {}
+
+type AssignStmt struct {
+	StmtBase
+
+	Lhs []Expr
+	Rhs []Expr
+}
+
+type LocalAssignStmt struct {
+	StmtBase
+
+	Names []string
+	Exprs []Expr
+}
+
+type FuncCallStmt struct {
+	StmtBase
+
+	Expr Expr
+}
+
+type DoBlockStmt struct {
+	StmtBase
+
+	Stmts []Stmt
+}
+
+type WhileStmt struct {
+	StmtBase
+
+	Condition Expr
+	Stmts     []Stmt
+}
+
+type RepeatStmt struct {
+	StmtBase
+
+	Condition Expr
+	Stmts     []Stmt
+}
+
+type IfStmt struct {
+	StmtBase
+
+	Condition Expr
+	Then      []Stmt
+	Else      []Stmt
+}
+
+type NumberForStmt struct {
+	StmtBase
+
+	Name  string
+	Init  Expr
+	Limit Expr
+	Step  Expr
+	Stmts []Stmt
+}
+
+type GenericForStmt struct {
+	StmtBase
+
+	Names []string
+	Exprs []Expr
+	Stmts []Stmt
+}
+
+type FuncDefStmt struct {
+	StmtBase
+
+	Name *FuncName
+	Func *FunctionExpr
+}
+
+type ReturnStmt struct {
+	StmtBase
+
+	Exprs []Expr
+}
+
+type BreakStmt struct {
+	StmtBase
+}
diff --git a/vendor/github.com/yuin/gopher-lua/ast/token.go b/vendor/github.com/yuin/gopher-lua/ast/token.go
new file mode 100644
index 0000000000000..820467c9a86bc
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/ast/token.go
@@ -0,0 +1,22 @@
+package ast
+
+import (
+	"fmt"
+)
+
+type Position struct {
+	Source string
+	Line   int
+	Column int
+}
+
+type Token struct {
+	Type int
+	Name string
+	Str  string
+	Pos  Position
+}
+
+func (self *Token) String() string {
+	return fmt.Sprintf("<type:%v, str:%v>", self.Name, self.Str)
+}
diff --git a/vendor/github.com/yuin/gopher-lua/auxlib.go b/vendor/github.com/yuin/gopher-lua/auxlib.go
new file mode 100644
index 0000000000000..61a3b8b6100a5
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/auxlib.go
@@ -0,0 +1,460 @@
+package lua
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+)
+
+/* checkType {{{ */
+
+func (ls *LState) CheckAny(n int) LValue {
+	if n > ls.GetTop() {
+		ls.ArgError(n, "value expected")
+	}
+	return ls.Get(n)
+}
+
+func (ls *LState) CheckInt(n int) int {
+	v := ls.Get(n)
+	if intv, ok := v.(LNumber); ok {
+		return int(intv)
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) CheckInt64(n int) int64 {
+	v := ls.Get(n)
+	if intv, ok := v.(LNumber); ok {
+		return int64(intv)
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) CheckNumber(n int) LNumber {
+	v := ls.Get(n)
+	if lv, ok := v.(LNumber); ok {
+		return lv
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) CheckString(n int) string {
+	v := ls.Get(n)
+	if lv, ok := v.(LString); ok {
+		return string(lv)
+	} else if LVCanConvToString(v) {
+		return ls.ToString(n)
+	}
+	ls.TypeError(n, LTString)
+	return ""
+}
+
+func (ls *LState) CheckBool(n int) bool {
+	v := ls.Get(n)
+	if lv, ok := v.(LBool); ok {
+		return bool(lv)
+	}
+	ls.TypeError(n, LTBool)
+	return false
+}
+
+func (ls *LState) CheckTable(n int) *LTable {
+	v := ls.Get(n)
+	if lv, ok := v.(*LTable); ok {
+		return lv
+	}
+	ls.TypeError(n, LTTable)
+	return nil
+}
+
+func (ls *LState) CheckFunction(n int) *LFunction {
+	v := ls.Get(n)
+	if lv, ok := v.(*LFunction); ok {
+		return lv
+	}
+	ls.TypeError(n, LTFunction)
+	return nil
+}
+
+func (ls *LState) CheckUserData(n int) *LUserData {
+	v := ls.Get(n)
+	if lv, ok := v.(*LUserData); ok {
+		return lv
+	}
+	ls.TypeError(n, LTUserData)
+	return nil
+}
+
+func (ls *LState) CheckThread(n int) *LState {
+	v := ls.Get(n)
+	if lv, ok := v.(*LState); ok {
+		return lv
+	}
+	ls.TypeError(n, LTThread)
+	return nil
+}
+
+func (ls *LState) CheckType(n int, typ LValueType) {
+	v := ls.Get(n)
+	if v.Type() != typ {
+		ls.TypeError(n, typ)
+	}
+}
+
+func (ls *LState) CheckTypes(n int, typs ...LValueType) {
+	vt := ls.Get(n).Type()
+	for _, typ := range typs {
+		if vt == typ {
+			return
+		}
+	}
+	buf := []string{}
+	for _, typ := range typs {
+		buf = append(buf, typ.String())
+	}
+	ls.ArgError(n, strings.Join(buf, " or ")+" expected, got "+ls.Get(n).Type().String())
+}
+
+func (ls *LState) CheckOption(n int, options []string) int {
+	str := ls.CheckString(n)
+	for i, v := range options {
+		if v == str {
+			return i
+		}
+	}
+	ls.ArgError(n, fmt.Sprintf("invalid option: %s (must be one of %s)", str, strings.Join(options, ",")))
+	return 0
+}
+
+/* }}} */
+
+/* optType {{{ */
+
+func (ls *LState) OptInt(n int, d int) int {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if intv, ok := v.(LNumber); ok {
+		return int(intv)
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) OptInt64(n int, d int64) int64 {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if intv, ok := v.(LNumber); ok {
+		return int64(intv)
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) OptNumber(n int, d LNumber) LNumber {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(LNumber); ok {
+		return lv
+	}
+	ls.TypeError(n, LTNumber)
+	return 0
+}
+
+func (ls *LState) OptString(n int, d string) string {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(LString); ok {
+		return string(lv)
+	}
+	ls.TypeError(n, LTString)
+	return ""
+}
+
+func (ls *LState) OptBool(n int, d bool) bool {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(LBool); ok {
+		return bool(lv)
+	}
+	ls.TypeError(n, LTBool)
+	return false
+}
+
+func (ls *LState) OptTable(n int, d *LTable) *LTable {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(*LTable); ok {
+		return lv
+	}
+	ls.TypeError(n, LTTable)
+	return nil
+}
+
+func (ls *LState) OptFunction(n int, d *LFunction) *LFunction {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(*LFunction); ok {
+		return lv
+	}
+	ls.TypeError(n, LTFunction)
+	return nil
+}
+
+func (ls *LState) OptUserData(n int, d *LUserData) *LUserData {
+	v := ls.Get(n)
+	if v == LNil {
+		return d
+	}
+	if lv, ok := v.(*LUserData); ok {
+		return lv
+	}
+	ls.TypeError(n, LTUserData)
+	return nil
+}
+
+/* }}} */
+
+/* error operations {{{ */
+
+func (ls *LState) ArgError(n int, message string) {
+	ls.RaiseError("bad argument #%v to %v (%v)", n, ls.rawFrameFuncName(ls.currentFrame), message)
+}
+
+func (ls *LState) TypeError(n int, typ LValueType) {
+	ls.RaiseError("bad argument #%v to %v (%v expected, got %v)", n, ls.rawFrameFuncName(ls.currentFrame), typ.String(), ls.Get(n).Type().String())
+}
+
+/* }}} */
+
+/* debug operations {{{ */
+
+func (ls *LState) Where(level int) string {
+	return ls.where(level, false)
+}
+
+/* }}} */
+
+/* table operations {{{ */
+
+func (ls *LState) FindTable(obj *LTable, n string, size int) LValue {
+	names := strings.Split(n, ".")
+	curobj := obj
+	for _, name := range names {
+		if curobj.Type() != LTTable {
+			return LNil
+		}
+		nextobj := ls.RawGet(curobj, LString(name))
+		if nextobj == LNil {
+			tb := ls.CreateTable(0, size)
+			ls.RawSet(curobj, LString(name), tb)
+			curobj = tb
+		} else if nextobj.Type() != LTTable {
+			return LNil
+		} else {
+			curobj = nextobj.(*LTable)
+		}
+	}
+	return curobj
+}
+
+/* }}} */
+
+/* register operations {{{ */
+
+func (ls *LState) RegisterModule(name string, funcs map[string]LGFunction) LValue {
+	tb := ls.FindTable(ls.Get(RegistryIndex).(*LTable), "_LOADED", 1)
+	mod := ls.GetField(tb, name)
+	if mod.Type() != LTTable {
+		newmod := ls.FindTable(ls.Get(GlobalsIndex).(*LTable), name, len(funcs))
+		if newmodtb, ok := newmod.(*LTable); !ok {
+			ls.RaiseError("name conflict for module(%v)", name)
+		} else {
+			for fname, fn := range funcs {
+				newmodtb.RawSetString(fname, ls.NewFunction(fn))
+			}
+			ls.SetField(tb, name, newmodtb)
+			return newmodtb
+		}
+	}
+	return mod
+}
+
+func (ls *LState) SetFuncs(tb *LTable, funcs map[string]LGFunction, upvalues ...LValue) *LTable {
+	for fname, fn := range funcs {
+		tb.RawSetString(fname, ls.NewClosure(fn, upvalues...))
+	}
+	return tb
+}
+
+/* }}} */
+
+/* metatable operations {{{ */
+
+func (ls *LState) NewTypeMetatable(typ string) *LTable {
+	regtable := ls.Get(RegistryIndex)
+	mt := ls.GetField(regtable, typ)
+	if tb, ok := mt.(*LTable); ok {
+		return tb
+	}
+	mtnew := ls.NewTable()
+	ls.SetField(regtable, typ, mtnew)
+	return mtnew
+}
+
+func (ls *LState) GetMetaField(obj LValue, event string) LValue {
+	return ls.metaOp1(obj, event)
+}
+
+func (ls *LState) GetTypeMetatable(typ string) LValue {
+	return ls.GetField(ls.Get(RegistryIndex), typ)
+}
+
+func (ls *LState) CallMeta(obj LValue, event string) LValue {
+	op := ls.metaOp1(obj, event)
+	if op.Type() == LTFunction {
+		ls.reg.Push(op)
+		ls.reg.Push(obj)
+		ls.Call(1, 1)
+		return ls.reg.Pop()
+	}
+	return LNil
+}
+
+/* }}} */
+
+/* load and function call operations {{{ */
+
+func (ls *LState) LoadFile(path string) (*LFunction, error) {
+	var file *os.File
+	var err error
+	if len(path) == 0 {
+		file = os.Stdin
+	} else {
+		file, err = os.Open(path)
+		defer file.Close()
+		if err != nil {
+			return nil, newApiErrorE(ApiErrorFile, err)
+		}
+	}
+
+	reader := bufio.NewReader(file)
+	// get the first character.
+	c, err := reader.ReadByte()
+	if err != nil && err != io.EOF {
+		return nil, newApiErrorE(ApiErrorFile, err)
+	}
+	if c == byte('#') {
+		// Unix exec. file?
+		// skip first line
+		_, err, _ = readBufioLine(reader)
+		if err != nil {
+			return nil, newApiErrorE(ApiErrorFile, err)
+		}
+	}
+
+	if err != io.EOF {
+		// if the file is not empty,
+		// unread the first character of the file or newline character(readBufioLine's last byte).
+		err = reader.UnreadByte()
+		if err != nil {
+			return nil, newApiErrorE(ApiErrorFile, err)
+		}
+	}
+
+	return ls.Load(reader, path)
+}
+
+func (ls *LState) LoadString(source string) (*LFunction, error) {
+	return ls.Load(strings.NewReader(source), "<string>")
+}
+
+func (ls *LState) DoFile(path string) error {
+	if fn, err := ls.LoadFile(path); err != nil {
+		return err
+	} else {
+		ls.Push(fn)
+		return ls.PCall(0, MultRet, nil)
+	}
+}
+
+func (ls *LState) DoString(source string) error {
+	if fn, err := ls.LoadString(source); err != nil {
+		return err
+	} else {
+		ls.Push(fn)
+		return ls.PCall(0, MultRet, nil)
+	}
+}
+
+/* }}} */
+
+/* GopherLua original APIs {{{ */
+
+// ToStringMeta returns string representation of given LValue.
+// This method calls the `__tostring` meta method if defined.
+func (ls *LState) ToStringMeta(lv LValue) LValue {
+	if fn, ok := ls.metaOp1(lv, "__tostring").assertFunction(); ok {
+		ls.Push(fn)
+		ls.Push(lv)
+		ls.Call(1, 1)
+		return ls.reg.Pop()
+	} else {
+		return LString(lv.String())
+	}
+}
+
+// Set a module loader to the package.preload table.
+func (ls *LState) PreloadModule(name string, loader LGFunction) {
+	preload := ls.GetField(ls.GetField(ls.Get(EnvironIndex), "package"), "preload")
+	if _, ok := preload.(*LTable); !ok {
+		ls.RaiseError("package.preload must be a table")
+	}
+	ls.SetField(preload, name, ls.NewFunction(loader))
+}
+
+// Checks whether the given index is an LChannel and returns this channel.
+func (ls *LState) CheckChannel(n int) chan LValue {
+	v := ls.Get(n)
+	if ch, ok := v.(LChannel); ok {
+		return (chan LValue)(ch)
+	}
+	ls.TypeError(n, LTChannel)
+	return nil
+}
+
+// If the given index is a LChannel, returns this channel. If this argument is absent or is nil, returns ch. Otherwise, raises an error.
+func (ls *LState) OptChannel(n int, ch chan LValue) chan LValue {
+	v := ls.Get(n)
+	if v == LNil {
+		return ch
+	}
+	if ch, ok := v.(LChannel); ok {
+		return (chan LValue)(ch)
+	}
+	ls.TypeError(n, LTChannel)
+	return nil
+}
+
+/* }}} */
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/baselib.go b/vendor/github.com/yuin/gopher-lua/baselib.go
new file mode 100644
index 0000000000000..06c90619eef5e
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/baselib.go
@@ -0,0 +1,592 @@
+package lua
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"runtime"
+	"strconv"
+	"strings"
+)
+
+/* basic functions {{{ */
+
+func OpenBase(L *LState) int {
+	global := L.Get(GlobalsIndex).(*LTable)
+	L.SetGlobal("_G", global)
+	L.SetGlobal("_VERSION", LString(LuaVersion))
+	L.SetGlobal("_GOPHER_LUA_VERSION", LString(PackageName+" "+PackageVersion))
+	basemod := L.RegisterModule("_G", baseFuncs)
+	global.RawSetString("ipairs", L.NewClosure(baseIpairs, L.NewFunction(ipairsaux)))
+	global.RawSetString("pairs", L.NewClosure(basePairs, L.NewFunction(pairsaux)))
+	L.Push(basemod)
+	return 1
+}
+
+var baseFuncs = map[string]LGFunction{
+	"assert":         baseAssert,
+	"collectgarbage": baseCollectGarbage,
+	"dofile":         baseDoFile,
+	"error":          baseError,
+	"getfenv":        baseGetFEnv,
+	"getmetatable":   baseGetMetatable,
+	"load":           baseLoad,
+	"loadfile":       baseLoadFile,
+	"loadstring":     baseLoadString,
+	"next":           baseNext,
+	"pcall":          basePCall,
+	"print":          basePrint,
+	"rawequal":       baseRawEqual,
+	"rawget":         baseRawGet,
+	"rawset":         baseRawSet,
+	"select":         baseSelect,
+	"_printregs":     base_PrintRegs,
+	"setfenv":        baseSetFEnv,
+	"setmetatable":   baseSetMetatable,
+	"tonumber":       baseToNumber,
+	"tostring":       baseToString,
+	"type":           baseType,
+	"unpack":         baseUnpack,
+	"xpcall":         baseXPCall,
+	// loadlib
+	"module":  loModule,
+	"require": loRequire,
+	// hidden features
+	"newproxy": baseNewProxy,
+}
+
+func baseAssert(L *LState) int {
+	if !L.ToBool(1) {
+		L.RaiseError(L.OptString(2, "assertion failed!"))
+		return 0
+	}
+	return L.GetTop()
+}
+
+func baseCollectGarbage(L *LState) int {
+	runtime.GC()
+	return 0
+}
+
+func baseDoFile(L *LState) int {
+	src := L.ToString(1)
+	top := L.GetTop()
+	fn, err := L.LoadFile(src)
+	if err != nil {
+		L.Push(LString(err.Error()))
+		L.Panic(L)
+	}
+	L.Push(fn)
+	L.Call(0, MultRet)
+	return L.GetTop() - top
+}
+
+func baseError(L *LState) int {
+	obj := L.CheckAny(1)
+	level := L.OptInt(2, 1)
+	L.Error(obj, level)
+	return 0
+}
+
+func baseGetFEnv(L *LState) int {
+	var value LValue
+	if L.GetTop() == 0 {
+		value = LNumber(1)
+	} else {
+		value = L.Get(1)
+	}
+
+	if fn, ok := value.(*LFunction); ok {
+		if !fn.IsG {
+			L.Push(fn.Env)
+		} else {
+			L.Push(L.G.Global)
+		}
+		return 1
+	}
+
+	if number, ok := value.(LNumber); ok {
+		level := int(float64(number))
+		if level <= 0 {
+			L.Push(L.Env)
+		} else {
+			cf := L.currentFrame
+			for i := 0; i < level && cf != nil; i++ {
+				cf = cf.Parent
+			}
+			if cf == nil || cf.Fn.IsG {
+				L.Push(L.G.Global)
+			} else {
+				L.Push(cf.Fn.Env)
+			}
+		}
+		return 1
+	}
+
+	L.Push(L.G.Global)
+	return 1
+}
+
+func baseGetMetatable(L *LState) int {
+	L.Push(L.GetMetatable(L.CheckAny(1)))
+	return 1
+}
+
+func ipairsaux(L *LState) int {
+	tb := L.CheckTable(1)
+	i := L.CheckInt(2)
+	i++
+	v := tb.RawGetInt(i)
+	if v == LNil {
+		return 0
+	} else {
+		L.Pop(1)
+		L.Push(LNumber(i))
+		L.Push(LNumber(i))
+		L.Push(v)
+		return 2
+	}
+}
+
+func baseIpairs(L *LState) int {
+	tb := L.CheckTable(1)
+	L.Push(L.Get(UpvalueIndex(1)))
+	L.Push(tb)
+	L.Push(LNumber(0))
+	return 3
+}
+
+func loadaux(L *LState, reader io.Reader, chunkname string) int {
+	if fn, err := L.Load(reader, chunkname); err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	} else {
+		L.Push(fn)
+		return 1
+	}
+}
+
+func baseLoad(L *LState) int {
+	fn := L.CheckFunction(1)
+	chunkname := L.OptString(2, "?")
+	top := L.GetTop()
+	buf := []string{}
+	for {
+		L.SetTop(top)
+		L.Push(fn)
+		L.Call(0, 1)
+		ret := L.reg.Pop()
+		if ret == LNil {
+			break
+		} else if LVCanConvToString(ret) {
+			str := ret.String()
+			if len(str) > 0 {
+				buf = append(buf, string(str))
+			} else {
+				break
+			}
+		} else {
+			L.Push(LNil)
+			L.Push(LString("reader function must return a string"))
+			return 2
+		}
+	}
+	return loadaux(L, strings.NewReader(strings.Join(buf, "")), chunkname)
+}
+
+func baseLoadFile(L *LState) int {
+	var reader io.Reader
+	var chunkname string
+	var err error
+	if L.GetTop() < 1 {
+		reader = os.Stdin
+		chunkname = "<stdin>"
+	} else {
+		chunkname = L.CheckString(1)
+		reader, err = os.Open(chunkname)
+		if err != nil {
+			L.Push(LNil)
+			L.Push(LString(fmt.Sprintf("can not open file: %v", chunkname)))
+			return 2
+		}
+		defer reader.(*os.File).Close()
+	}
+	return loadaux(L, reader, chunkname)
+}
+
+func baseLoadString(L *LState) int {
+	return loadaux(L, strings.NewReader(L.CheckString(1)), L.OptString(2, "<string>"))
+}
+
+func baseNext(L *LState) int {
+	tb := L.CheckTable(1)
+	index := LNil
+	if L.GetTop() >= 2 {
+		index = L.Get(2)
+	}
+	key, value := tb.Next(index)
+	if key == LNil {
+		L.Push(LNil)
+		return 1
+	}
+	L.Push(key)
+	L.Push(value)
+	return 2
+}
+
+func pairsaux(L *LState) int {
+	tb := L.CheckTable(1)
+	key, value := tb.Next(L.Get(2))
+	if key == LNil {
+		return 0
+	} else {
+		L.Pop(1)
+		L.Push(key)
+		L.Push(key)
+		L.Push(value)
+		return 2
+	}
+}
+
+func basePairs(L *LState) int {
+	tb := L.CheckTable(1)
+	L.Push(L.Get(UpvalueIndex(1)))
+	L.Push(tb)
+	L.Push(LNil)
+	return 3
+}
+
+func basePCall(L *LState) int {
+	L.CheckAny(1)
+	v := L.Get(1)
+	if v.Type() != LTFunction {
+		L.Push(LFalse)
+		L.Push(LString("attempt to call a " + v.Type().String() + " value"))
+		return 2
+	}
+	nargs := L.GetTop() - 1
+	if err := L.PCall(nargs, MultRet, nil); err != nil {
+		L.Push(LFalse)
+		if aerr, ok := err.(*ApiError); ok {
+			L.Push(aerr.Object)
+		} else {
+			L.Push(LString(err.Error()))
+		}
+		return 2
+	} else {
+		L.Insert(LTrue, 1)
+		return L.GetTop()
+	}
+}
+
+func basePrint(L *LState) int {
+	top := L.GetTop()
+	for i := 1; i <= top; i++ {
+		fmt.Print(L.ToStringMeta(L.Get(i)).String())
+		if i != top {
+			fmt.Print("\t")
+		}
+	}
+	fmt.Println("")
+	return 0
+}
+
+func base_PrintRegs(L *LState) int {
+	L.printReg()
+	return 0
+}
+
+func baseRawEqual(L *LState) int {
+	if L.CheckAny(1) == L.CheckAny(2) {
+		L.Push(LTrue)
+	} else {
+		L.Push(LFalse)
+	}
+	return 1
+}
+
+func baseRawGet(L *LState) int {
+	L.Push(L.RawGet(L.CheckTable(1), L.CheckAny(2)))
+	return 1
+}
+
+func baseRawSet(L *LState) int {
+	L.RawSet(L.CheckTable(1), L.CheckAny(2), L.CheckAny(3))
+	return 0
+}
+
+func baseSelect(L *LState) int {
+	L.CheckTypes(1, LTNumber, LTString)
+	switch lv := L.Get(1).(type) {
+	case LNumber:
+		idx := int(lv)
+		num := L.reg.Top() - L.indexToReg(int(lv)) - 1
+		if idx < 0 {
+			num++
+		}
+		return num
+	case LString:
+		if string(lv) != "#" {
+			L.ArgError(1, "invalid string '"+string(lv)+"'")
+		}
+		L.Push(LNumber(L.GetTop() - 1))
+		return 1
+	}
+	return 0
+}
+
+func baseSetFEnv(L *LState) int {
+	var value LValue
+	if L.GetTop() == 0 {
+		value = LNumber(1)
+	} else {
+		value = L.Get(1)
+	}
+	env := L.CheckTable(2)
+
+	if fn, ok := value.(*LFunction); ok {
+		if fn.IsG {
+			L.RaiseError("cannot change the environment of given object")
+		} else {
+			fn.Env = env
+			L.Push(fn)
+			return 1
+		}
+	}
+
+	if number, ok := value.(LNumber); ok {
+		level := int(float64(number))
+		if level <= 0 {
+			L.Env = env
+			return 0
+		}
+
+		cf := L.currentFrame
+		for i := 0; i < level && cf != nil; i++ {
+			cf = cf.Parent
+		}
+		if cf == nil || cf.Fn.IsG {
+			L.RaiseError("cannot change the environment of given object")
+		} else {
+			cf.Fn.Env = env
+			L.Push(cf.Fn)
+			return 1
+		}
+	}
+
+	L.RaiseError("cannot change the environment of given object")
+	return 0
+}
+
+func baseSetMetatable(L *LState) int {
+	L.CheckTypes(2, LTNil, LTTable)
+	obj := L.Get(1)
+	if obj == LNil {
+		L.RaiseError("cannot set metatable to a nil object.")
+	}
+	mt := L.Get(2)
+	if m := L.metatable(obj, true); m != LNil {
+		if tb, ok := m.(*LTable); ok && tb.RawGetString("__metatable") != LNil {
+			L.RaiseError("cannot change a protected metatable")
+		}
+	}
+	L.SetMetatable(obj, mt)
+	L.SetTop(1)
+	return 1
+}
+
+func baseToNumber(L *LState) int {
+	base := L.OptInt(2, 10)
+	noBase := L.Get(2) == LNil
+
+	switch lv := L.CheckAny(1).(type) {
+	case LNumber:
+		L.Push(lv)
+	case LString:
+		str := strings.Trim(string(lv), " \n\t")
+		if strings.Index(str, ".") > -1 {
+			if v, err := strconv.ParseFloat(str, LNumberBit); err != nil {
+				L.Push(LNil)
+			} else {
+				L.Push(LNumber(v))
+			}
+		} else {
+			if noBase && strings.HasPrefix(strings.ToLower(str), "0x") {
+				base, str = 16, str[2:] // Hex number
+			}
+			if v, err := strconv.ParseInt(str, base, LNumberBit); err != nil {
+				L.Push(LNil)
+			} else {
+				L.Push(LNumber(v))
+			}
+		}
+	default:
+		L.Push(LNil)
+	}
+	return 1
+}
+
+func baseToString(L *LState) int {
+	v1 := L.CheckAny(1)
+	L.Push(L.ToStringMeta(v1))
+	return 1
+}
+
+func baseType(L *LState) int {
+	L.Push(LString(L.CheckAny(1).Type().String()))
+	return 1
+}
+
+func baseUnpack(L *LState) int {
+	tb := L.CheckTable(1)
+	start := L.OptInt(2, 1)
+	end := L.OptInt(3, tb.Len())
+	for i := start; i <= end; i++ {
+		L.Push(tb.RawGetInt(i))
+	}
+	ret := end - start + 1
+	if ret < 0 {
+		return 0
+	}
+	return ret
+}
+
+func baseXPCall(L *LState) int {
+	fn := L.CheckFunction(1)
+	errfunc := L.CheckFunction(2)
+
+	top := L.GetTop()
+	L.Push(fn)
+	if err := L.PCall(0, MultRet, errfunc); err != nil {
+		L.Push(LFalse)
+		if aerr, ok := err.(*ApiError); ok {
+			L.Push(aerr.Object)
+		} else {
+			L.Push(LString(err.Error()))
+		}
+		return 2
+	} else {
+		L.Insert(LTrue, top+1)
+		return L.GetTop() - top
+	}
+}
+
+/* }}} */
+
+/* load lib {{{ */
+
+func loModule(L *LState) int {
+	name := L.CheckString(1)
+	loaded := L.GetField(L.Get(RegistryIndex), "_LOADED")
+	tb := L.GetField(loaded, name)
+	if _, ok := tb.(*LTable); !ok {
+		tb = L.FindTable(L.Get(GlobalsIndex).(*LTable), name, 1)
+		if tb == LNil {
+			L.RaiseError("name conflict for module: %v", name)
+		}
+		L.SetField(loaded, name, tb)
+	}
+	if L.GetField(tb, "_NAME") == LNil {
+		L.SetField(tb, "_M", tb)
+		L.SetField(tb, "_NAME", LString(name))
+		names := strings.Split(name, ".")
+		pname := ""
+		if len(names) > 1 {
+			pname = strings.Join(names[:len(names)-1], ".") + "."
+		}
+		L.SetField(tb, "_PACKAGE", LString(pname))
+	}
+
+	caller := L.currentFrame.Parent
+	if caller == nil {
+		L.RaiseError("no calling stack.")
+	} else if caller.Fn.IsG {
+		L.RaiseError("module() can not be called from GFunctions.")
+	}
+	L.SetFEnv(caller.Fn, tb)
+
+	top := L.GetTop()
+	for i := 2; i <= top; i++ {
+		L.Push(L.Get(i))
+		L.Push(tb)
+		L.Call(1, 0)
+	}
+	L.Push(tb)
+	return 1
+}
+
+var loopdetection = &LUserData{}
+
+func loRequire(L *LState) int {
+	name := L.CheckString(1)
+	loaded := L.GetField(L.Get(RegistryIndex), "_LOADED")
+	lv := L.GetField(loaded, name)
+	if LVAsBool(lv) {
+		if lv == loopdetection {
+			L.RaiseError("loop or previous error loading module: %s", name)
+		}
+		L.Push(lv)
+		return 1
+	}
+	loaders, ok := L.GetField(L.Get(RegistryIndex), "_LOADERS").(*LTable)
+	if !ok {
+		L.RaiseError("package.loaders must be a table")
+	}
+	messages := []string{}
+	var modasfunc LValue
+	for i := 1; ; i++ {
+		loader := L.RawGetInt(loaders, i)
+		if loader == LNil {
+			L.RaiseError("module %s not found:\n\t%s, ", name, strings.Join(messages, "\n\t"))
+		}
+		L.Push(loader)
+		L.Push(LString(name))
+		L.Call(1, 1)
+		ret := L.reg.Pop()
+		switch retv := ret.(type) {
+		case *LFunction:
+			modasfunc = retv
+			goto loopbreak
+		case LString:
+			messages = append(messages, string(retv))
+		}
+	}
+loopbreak:
+	L.SetField(loaded, name, loopdetection)
+	L.Push(modasfunc)
+	L.Push(LString(name))
+	L.Call(1, 1)
+	ret := L.reg.Pop()
+	modv := L.GetField(loaded, name)
+	if ret != LNil && modv == loopdetection {
+		L.SetField(loaded, name, ret)
+		L.Push(ret)
+	} else if modv == loopdetection {
+		L.SetField(loaded, name, LTrue)
+		L.Push(LTrue)
+	} else {
+		L.Push(modv)
+	}
+	return 1
+}
+
+/* }}} */
+
+/* hidden features {{{ */
+
+func baseNewProxy(L *LState) int {
+	ud := L.NewUserData()
+	L.SetTop(1)
+	if L.Get(1) == LTrue {
+		L.SetMetatable(ud, L.NewTable())
+	} else if d, ok := L.Get(1).(*LUserData); ok {
+		L.SetMetatable(ud, L.GetMetatable(d))
+	}
+	L.Push(ud)
+	return 1
+}
+
+/* }}} */
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/channellib.go b/vendor/github.com/yuin/gopher-lua/channellib.go
new file mode 100644
index 0000000000000..a92bf72cd0e2c
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/channellib.go
@@ -0,0 +1,184 @@
+package lua
+
+import (
+	"reflect"
+)
+
+func checkChannel(L *LState, idx int) reflect.Value {
+	ch := L.CheckChannel(idx)
+	return reflect.ValueOf(ch)
+}
+
+func checkGoroutineSafe(L *LState, idx int) LValue {
+	v := L.CheckAny(2)
+	if !isGoroutineSafe(v) {
+		L.ArgError(2, "can not send a function, userdata, thread or table that has a metatable")
+	}
+	return v
+}
+
+func OpenChannel(L *LState) int {
+	var mod LValue
+	//_, ok := L.G.builtinMts[int(LTChannel)]
+	//	if !ok {
+	mod = L.RegisterModule(ChannelLibName, channelFuncs)
+	mt := L.SetFuncs(L.NewTable(), channelMethods)
+	mt.RawSetString("__index", mt)
+	L.G.builtinMts[int(LTChannel)] = mt
+	//	}
+	L.Push(mod)
+	return 1
+}
+
+var channelFuncs = map[string]LGFunction{
+	"make":   channelMake,
+	"select": channelSelect,
+}
+
+func channelMake(L *LState) int {
+	buffer := L.OptInt(1, 0)
+	L.Push(LChannel(make(chan LValue, buffer)))
+	return 1
+}
+
+func channelSelect(L *LState) int {
+	//TODO check case table size
+	cases := make([]reflect.SelectCase, L.GetTop())
+	top := L.GetTop()
+	for i := 0; i < top; i++ {
+		cas := reflect.SelectCase{
+			Dir:  reflect.SelectSend,
+			Chan: reflect.ValueOf(nil),
+			Send: reflect.ValueOf(nil),
+		}
+		tbl := L.CheckTable(i + 1)
+		dir, ok1 := tbl.RawGetInt(1).(LString)
+		if !ok1 {
+			L.ArgError(i+1, "invalid select case")
+		}
+		switch string(dir) {
+		case "<-|":
+			ch, ok := tbl.RawGetInt(2).(LChannel)
+			if !ok {
+				L.ArgError(i+1, "invalid select case")
+			}
+			cas.Chan = reflect.ValueOf((chan LValue)(ch))
+			v := tbl.RawGetInt(3)
+			if !isGoroutineSafe(v) {
+				L.ArgError(i+1, "can not send a function, userdata, thread or table that has a metatable")
+			}
+			cas.Send = reflect.ValueOf(v)
+		case "|<-":
+			ch, ok := tbl.RawGetInt(2).(LChannel)
+			if !ok {
+				L.ArgError(i+1, "invalid select case")
+			}
+			cas.Chan = reflect.ValueOf((chan LValue)(ch))
+			cas.Dir = reflect.SelectRecv
+		case "default":
+			cas.Dir = reflect.SelectDefault
+		default:
+			L.ArgError(i+1, "invalid channel direction:"+string(dir))
+		}
+		cases[i] = cas
+	}
+
+	if L.ctx != nil {
+		cases = append(cases, reflect.SelectCase{
+			Dir:  reflect.SelectRecv,
+			Chan: reflect.ValueOf(L.ctx.Done()),
+			Send: reflect.ValueOf(nil),
+		})
+	}
+
+	pos, recv, rok := reflect.Select(cases)
+
+	if L.ctx != nil && pos == L.GetTop() {
+		return 0
+	}
+
+	lv := LNil
+	if recv.Kind() != 0 {
+		lv, _ = recv.Interface().(LValue)
+		if lv == nil {
+			lv = LNil
+		}
+	}
+	tbl := L.Get(pos + 1).(*LTable)
+	last := tbl.RawGetInt(tbl.Len())
+	if last.Type() == LTFunction {
+		L.Push(last)
+		switch cases[pos].Dir {
+		case reflect.SelectRecv:
+			if rok {
+				L.Push(LTrue)
+			} else {
+				L.Push(LFalse)
+			}
+			L.Push(lv)
+			L.Call(2, 0)
+		case reflect.SelectSend:
+			L.Push(tbl.RawGetInt(3))
+			L.Call(1, 0)
+		case reflect.SelectDefault:
+			L.Call(0, 0)
+		}
+	}
+	L.Push(LNumber(pos + 1))
+	L.Push(lv)
+	if rok {
+		L.Push(LTrue)
+	} else {
+		L.Push(LFalse)
+	}
+	return 3
+}
+
+var channelMethods = map[string]LGFunction{
+	"receive": channelReceive,
+	"send":    channelSend,
+	"close":   channelClose,
+}
+
+func channelReceive(L *LState) int {
+	rch := checkChannel(L, 1)
+	var v reflect.Value
+	var ok bool
+	if L.ctx != nil {
+		cases := []reflect.SelectCase{{
+			Dir:  reflect.SelectRecv,
+			Chan: reflect.ValueOf(L.ctx.Done()),
+			Send: reflect.ValueOf(nil),
+		}, {
+			Dir:  reflect.SelectRecv,
+			Chan: rch,
+			Send: reflect.ValueOf(nil),
+		}}
+		_, v, ok = reflect.Select(cases)
+	} else {
+		v, ok = rch.Recv()
+	}
+	if ok {
+		L.Push(LTrue)
+		L.Push(v.Interface().(LValue))
+	} else {
+		L.Push(LFalse)
+		L.Push(LNil)
+	}
+	return 2
+}
+
+func channelSend(L *LState) int {
+	rch := checkChannel(L, 1)
+	v := checkGoroutineSafe(L, 2)
+	rch.Send(reflect.ValueOf(v))
+	return 0
+}
+
+func channelClose(L *LState) int {
+	rch := checkChannel(L, 1)
+	rch.Close()
+	return 0
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/compile.go b/vendor/github.com/yuin/gopher-lua/compile.go
new file mode 100644
index 0000000000000..d3c665ae57c29
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/compile.go
@@ -0,0 +1,1672 @@
+package lua
+
+import (
+	"fmt"
+	"github.com/yuin/gopher-lua/ast"
+	"math"
+	"reflect"
+)
+
+/* internal constants & structs  {{{ */
+
+const maxRegisters = 200
+
+type expContextType int
+
+const (
+	ecGlobal expContextType = iota
+	ecUpvalue
+	ecLocal
+	ecTable
+	ecVararg
+	ecMethod
+	ecNone
+)
+
+const regNotDefined = opMaxArgsA + 1
+const labelNoJump = 0
+
+type expcontext struct {
+	ctype expContextType
+	reg   int
+	// varargopt >= 0: wants varargopt+1 results, i.e  a = func()
+	// varargopt = -1: ignore results             i.e  func()
+	// varargopt = -2: receive all results        i.e  a = {func()}
+	varargopt int
+}
+
+type assigncontext struct {
+	ec       *expcontext
+	keyrk    int
+	valuerk  int
+	keyks    bool
+	needmove bool
+}
+
+type lblabels struct {
+	t int
+	f int
+	e int
+	b bool
+}
+
+type constLValueExpr struct {
+	ast.ExprBase
+
+	Value LValue
+}
+
+// }}}
+
+/* utilities {{{ */
+var _ecnone0 = &expcontext{ecNone, regNotDefined, 0}
+var _ecnonem1 = &expcontext{ecNone, regNotDefined, -1}
+var _ecnonem2 = &expcontext{ecNone, regNotDefined, -2}
+var ecfuncdef = &expcontext{ecMethod, regNotDefined, 0}
+
+func ecupdate(ec *expcontext, ctype expContextType, reg, varargopt int) {
+	if ec == _ecnone0 || ec == _ecnonem1 || ec == _ecnonem2 {
+		panic("can not update ec cache")
+	}
+	ec.ctype = ctype
+	ec.reg = reg
+	ec.varargopt = varargopt
+}
+
+func ecnone(varargopt int) *expcontext {
+	switch varargopt {
+	case 0:
+		return _ecnone0
+	case -1:
+		return _ecnonem1
+	case -2:
+		return _ecnonem2
+	}
+	return &expcontext{ecNone, regNotDefined, varargopt}
+}
+
+func shouldmove(ec *expcontext, reg int) bool {
+	return ec.ctype == ecLocal && ec.reg != regNotDefined && ec.reg != reg
+}
+
+func sline(pos ast.PositionHolder) int {
+	return pos.Line()
+}
+
+func eline(pos ast.PositionHolder) int {
+	return pos.LastLine()
+}
+
+func savereg(ec *expcontext, reg int) int {
+	if ec.ctype != ecLocal || ec.reg == regNotDefined {
+		return reg
+	}
+	return ec.reg
+}
+
+func raiseCompileError(context *funcContext, line int, format string, args ...interface{}) {
+	msg := fmt.Sprintf(format, args...)
+	panic(&CompileError{context: context, Line: line, Message: msg})
+}
+
+func isVarArgReturnExpr(expr ast.Expr) bool {
+	switch ex := expr.(type) {
+	case *ast.FuncCallExpr:
+		return !ex.AdjustRet
+	case *ast.Comma3Expr:
+		return true
+	}
+	return false
+}
+
+func lnumberValue(expr ast.Expr) (LNumber, bool) {
+	if ex, ok := expr.(*ast.NumberExpr); ok {
+		lv, err := parseNumber(ex.Value)
+		if err != nil {
+			lv = LNumber(math.NaN())
+		}
+		return lv, true
+	} else if ex, ok := expr.(*constLValueExpr); ok {
+		return ex.Value.(LNumber), true
+	}
+	return 0, false
+}
+
+/* utilities }}} */
+
+type CompileError struct { // {{{
+	context *funcContext
+	Line    int
+	Message string
+}
+
+func (e *CompileError) Error() string {
+	return fmt.Sprintf("compile error near line(%v) %v: %v", e.Line, e.context.Proto.SourceName, e.Message)
+} // }}}
+
+type codeStore struct { // {{{
+	codes []uint32
+	lines []int
+	pc    int
+}
+
+func (cd *codeStore) Add(inst uint32, line int) {
+	if l := len(cd.codes); l <= 0 || cd.pc == l {
+		cd.codes = append(cd.codes, inst)
+		cd.lines = append(cd.lines, line)
+	} else {
+		cd.codes[cd.pc] = inst
+		cd.lines[cd.pc] = line
+	}
+	cd.pc++
+}
+
+func (cd *codeStore) AddABC(op int, a int, b int, c int, line int) {
+	cd.Add(opCreateABC(op, a, b, c), line)
+}
+
+func (cd *codeStore) AddABx(op int, a int, bx int, line int) {
+	cd.Add(opCreateABx(op, a, bx), line)
+}
+
+func (cd *codeStore) AddASbx(op int, a int, sbx int, line int) {
+	cd.Add(opCreateASbx(op, a, sbx), line)
+}
+
+func (cd *codeStore) PropagateKMV(top int, save *int, reg *int, inc int) {
+	lastinst := cd.Last()
+	if opGetArgA(lastinst) >= top {
+		switch opGetOpCode(lastinst) {
+		case OP_LOADK:
+			cindex := opGetArgBx(lastinst)
+			if cindex <= opMaxIndexRk {
+				cd.Pop()
+				*save = opRkAsk(cindex)
+				return
+			}
+		case OP_MOVE:
+			cd.Pop()
+			*save = opGetArgB(lastinst)
+			return
+		}
+	}
+	*save = *reg
+	*reg = *reg + inc
+}
+
+func (cd *codeStore) PropagateMV(top int, save *int, reg *int, inc int) {
+	lastinst := cd.Last()
+	if opGetArgA(lastinst) >= top {
+		switch opGetOpCode(lastinst) {
+		case OP_MOVE:
+			cd.Pop()
+			*save = opGetArgB(lastinst)
+			return
+		}
+	}
+	*save = *reg
+	*reg = *reg + inc
+}
+
+func (cd *codeStore) AddLoadNil(a, b, line int) {
+	last := cd.Last()
+	if opGetOpCode(last) == OP_LOADNIL && (opGetArgA(last)+opGetArgB(last)) == a {
+		cd.SetB(cd.LastPC(), b)
+	} else {
+		cd.AddABC(OP_LOADNIL, a, b, 0, line)
+	}
+}
+
+func (cd *codeStore) SetOpCode(pc int, v int) {
+	opSetOpCode(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) SetA(pc int, v int) {
+	opSetArgA(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) SetB(pc int, v int) {
+	opSetArgB(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) SetC(pc int, v int) {
+	opSetArgC(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) SetBx(pc int, v int) {
+	opSetArgBx(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) SetSbx(pc int, v int) {
+	opSetArgSbx(&cd.codes[pc], v)
+}
+
+func (cd *codeStore) At(pc int) uint32 {
+	return cd.codes[pc]
+}
+
+func (cd *codeStore) List() []uint32 {
+	return cd.codes[:cd.pc]
+}
+
+func (cd *codeStore) PosList() []int {
+	return cd.lines[:cd.pc]
+}
+
+func (cd *codeStore) LastPC() int {
+	return cd.pc - 1
+}
+
+func (cd *codeStore) Last() uint32 {
+	if cd.pc == 0 {
+		return opInvalidInstruction
+	}
+	return cd.codes[cd.pc-1]
+}
+
+func (cd *codeStore) Pop() {
+	cd.pc--
+} /* }}} Code */
+
+/* {{{ VarNamePool */
+
+type varNamePoolValue struct {
+	Index int
+	Name  string
+}
+
+type varNamePool struct {
+	names  []string
+	offset int
+}
+
+func newVarNamePool(offset int) *varNamePool {
+	return &varNamePool{make([]string, 0, 16), offset}
+}
+
+func (vp *varNamePool) Names() []string {
+	return vp.names
+}
+
+func (vp *varNamePool) List() []varNamePoolValue {
+	result := make([]varNamePoolValue, len(vp.names), len(vp.names))
+	for i, name := range vp.names {
+		result[i].Index = i + vp.offset
+		result[i].Name = name
+	}
+	return result
+}
+
+func (vp *varNamePool) LastIndex() int {
+	return vp.offset + len(vp.names)
+}
+
+func (vp *varNamePool) Find(name string) int {
+	for i := len(vp.names) - 1; i >= 0; i-- {
+		if vp.names[i] == name {
+			return i + vp.offset
+		}
+	}
+	return -1
+}
+
+func (vp *varNamePool) RegisterUnique(name string) int {
+	index := vp.Find(name)
+	if index < 0 {
+		return vp.Register(name)
+	}
+	return index
+}
+
+func (vp *varNamePool) Register(name string) int {
+	vp.names = append(vp.names, name)
+	return len(vp.names) - 1 + vp.offset
+}
+
+/* }}} VarNamePool */
+
+/* FuncContext {{{ */
+
+type codeBlock struct {
+	LocalVars  *varNamePool
+	BreakLabel int
+	Parent     *codeBlock
+	RefUpvalue bool
+	LineStart  int
+	LastLine   int
+}
+
+func newCodeBlock(localvars *varNamePool, blabel int, parent *codeBlock, pos ast.PositionHolder) *codeBlock {
+	bl := &codeBlock{localvars, blabel, parent, false, 0, 0}
+	if pos != nil {
+		bl.LineStart = pos.Line()
+		bl.LastLine = pos.LastLine()
+	}
+	return bl
+}
+
+type funcContext struct {
+	Proto    *FunctionProto
+	Code     *codeStore
+	Parent   *funcContext
+	Upvalues *varNamePool
+	Block    *codeBlock
+	Blocks   []*codeBlock
+	regTop   int
+	labelId  int
+	labelPc  map[int]int
+}
+
+func newFuncContext(sourcename string, parent *funcContext) *funcContext {
+	fc := &funcContext{
+		Proto:    newFunctionProto(sourcename),
+		Code:     &codeStore{make([]uint32, 0, 1024), make([]int, 0, 1024), 0},
+		Parent:   parent,
+		Upvalues: newVarNamePool(0),
+		Block:    newCodeBlock(newVarNamePool(0), labelNoJump, nil, nil),
+		regTop:   0,
+		labelId:  1,
+		labelPc:  map[int]int{},
+	}
+	fc.Blocks = []*codeBlock{fc.Block}
+	return fc
+}
+
+func (fc *funcContext) NewLabel() int {
+	ret := fc.labelId
+	fc.labelId++
+	return ret
+}
+
+func (fc *funcContext) SetLabelPc(label int, pc int) {
+	fc.labelPc[label] = pc
+}
+
+func (fc *funcContext) GetLabelPc(label int) int {
+	return fc.labelPc[label]
+}
+
+func (fc *funcContext) ConstIndex(value LValue) int {
+	ctype := value.Type()
+	for i, lv := range fc.Proto.Constants {
+		if lv.Type() == ctype && lv == value {
+			return i
+		}
+	}
+	fc.Proto.Constants = append(fc.Proto.Constants, value)
+	v := len(fc.Proto.Constants) - 1
+	if v > opMaxArgBx {
+		raiseCompileError(fc, fc.Proto.LineDefined, "too many constants")
+	}
+	return v
+}
+
+func (fc *funcContext) RegisterLocalVar(name string) int {
+	ret := fc.Block.LocalVars.Register(name)
+	fc.Proto.DbgLocals = append(fc.Proto.DbgLocals, &DbgLocalInfo{Name: name, StartPc: fc.Code.LastPC() + 1})
+	fc.SetRegTop(fc.RegTop() + 1)
+	return ret
+}
+
+func (fc *funcContext) FindLocalVarAndBlock(name string) (int, *codeBlock) {
+	for block := fc.Block; block != nil; block = block.Parent {
+		if index := block.LocalVars.Find(name); index > -1 {
+			return index, block
+		}
+	}
+	return -1, nil
+}
+
+func (fc *funcContext) FindLocalVar(name string) int {
+	idx, _ := fc.FindLocalVarAndBlock(name)
+	return idx
+}
+
+func (fc *funcContext) LocalVars() []varNamePoolValue {
+	result := make([]varNamePoolValue, 0, 32)
+	for _, block := range fc.Blocks {
+		result = append(result, block.LocalVars.List()...)
+	}
+	return result
+}
+
+func (fc *funcContext) EnterBlock(blabel int, pos ast.PositionHolder) {
+	fc.Block = newCodeBlock(newVarNamePool(fc.RegTop()), blabel, fc.Block, pos)
+	fc.Blocks = append(fc.Blocks, fc.Block)
+}
+
+func (fc *funcContext) CloseUpvalues() int {
+	n := -1
+	if fc.Block.RefUpvalue {
+		n = fc.Block.Parent.LocalVars.LastIndex()
+		fc.Code.AddABC(OP_CLOSE, n, 0, 0, fc.Block.LastLine)
+	}
+	return n
+}
+
+func (fc *funcContext) LeaveBlock() int {
+	closed := fc.CloseUpvalues()
+	fc.EndScope()
+	fc.Block = fc.Block.Parent
+	fc.SetRegTop(fc.Block.LocalVars.LastIndex())
+	return closed
+}
+
+func (fc *funcContext) EndScope() {
+	for _, vr := range fc.Block.LocalVars.List() {
+		fc.Proto.DbgLocals[vr.Index].EndPc = fc.Code.LastPC()
+	}
+}
+
+func (fc *funcContext) SetRegTop(top int) {
+	if top > maxRegisters {
+		raiseCompileError(fc, fc.Proto.LineDefined, "too many local variables")
+	}
+	fc.regTop = top
+}
+
+func (fc *funcContext) RegTop() int {
+	return fc.regTop
+}
+
+/* FuncContext }}} */
+
+func compileChunk(context *funcContext, chunk []ast.Stmt) { // {{{
+	for _, stmt := range chunk {
+		compileStmt(context, stmt)
+	}
+} // }}}
+
+func compileBlock(context *funcContext, chunk []ast.Stmt) { // {{{
+	if len(chunk) == 0 {
+		return
+	}
+	ph := &ast.Node{}
+	ph.SetLine(sline(chunk[0]))
+	ph.SetLastLine(eline(chunk[len(chunk)-1]))
+	context.EnterBlock(labelNoJump, ph)
+	for _, stmt := range chunk {
+		compileStmt(context, stmt)
+	}
+	context.LeaveBlock()
+} // }}}
+
+func compileStmt(context *funcContext, stmt ast.Stmt) { // {{{
+	switch st := stmt.(type) {
+	case *ast.AssignStmt:
+		compileAssignStmt(context, st)
+	case *ast.LocalAssignStmt:
+		compileLocalAssignStmt(context, st)
+	case *ast.FuncCallStmt:
+		compileFuncCallExpr(context, context.RegTop(), st.Expr.(*ast.FuncCallExpr), ecnone(-1))
+	case *ast.DoBlockStmt:
+		context.EnterBlock(labelNoJump, st)
+		compileChunk(context, st.Stmts)
+		context.LeaveBlock()
+	case *ast.WhileStmt:
+		compileWhileStmt(context, st)
+	case *ast.RepeatStmt:
+		compileRepeatStmt(context, st)
+	case *ast.FuncDefStmt:
+		compileFuncDefStmt(context, st)
+	case *ast.ReturnStmt:
+		compileReturnStmt(context, st)
+	case *ast.IfStmt:
+		compileIfStmt(context, st)
+	case *ast.BreakStmt:
+		compileBreakStmt(context, st)
+	case *ast.NumberForStmt:
+		compileNumberForStmt(context, st)
+	case *ast.GenericForStmt:
+		compileGenericForStmt(context, st)
+	}
+} // }}}
+
+func compileAssignStmtLeft(context *funcContext, stmt *ast.AssignStmt) (int, []*assigncontext) { // {{{
+	reg := context.RegTop()
+	acs := make([]*assigncontext, 0, len(stmt.Lhs))
+	for i, lhs := range stmt.Lhs {
+		islast := i == len(stmt.Lhs)-1
+		switch st := lhs.(type) {
+		case *ast.IdentExpr:
+			identtype := getIdentRefType(context, context, st)
+			ec := &expcontext{identtype, regNotDefined, 0}
+			switch identtype {
+			case ecGlobal:
+				context.ConstIndex(LString(st.Value))
+			case ecUpvalue:
+				context.Upvalues.RegisterUnique(st.Value)
+			case ecLocal:
+				if islast {
+					ec.reg = context.FindLocalVar(st.Value)
+				}
+			}
+			acs = append(acs, &assigncontext{ec, 0, 0, false, false})
+		case *ast.AttrGetExpr:
+			ac := &assigncontext{&expcontext{ecTable, regNotDefined, 0}, 0, 0, false, false}
+			compileExprWithKMVPropagation(context, st.Object, &reg, &ac.ec.reg)
+			ac.keyrk = reg
+			reg += compileExpr(context, reg, st.Key, ecnone(0))
+			if _, ok := st.Key.(*ast.StringExpr); ok {
+				ac.keyks = true
+			}
+			acs = append(acs, ac)
+
+		default:
+			panic("invalid left expression.")
+		}
+	}
+	return reg, acs
+} // }}}
+
+func compileAssignStmtRight(context *funcContext, stmt *ast.AssignStmt, reg int, acs []*assigncontext) (int, []*assigncontext) { // {{{
+	lennames := len(stmt.Lhs)
+	lenexprs := len(stmt.Rhs)
+	namesassigned := 0
+
+	for namesassigned < lennames {
+		ac := acs[namesassigned]
+		ec := ac.ec
+		var expr ast.Expr = nil
+		if namesassigned >= lenexprs {
+			expr = &ast.NilExpr{}
+			expr.SetLine(sline(stmt.Lhs[namesassigned]))
+			expr.SetLastLine(eline(stmt.Lhs[namesassigned]))
+		} else if isVarArgReturnExpr(stmt.Rhs[namesassigned]) && (lenexprs-namesassigned-1) <= 0 {
+			varargopt := lennames - namesassigned - 1
+			regstart := reg
+			reginc := compileExpr(context, reg, stmt.Rhs[namesassigned], ecnone(varargopt))
+			reg += reginc
+			for i := namesassigned; i < namesassigned+int(reginc); i++ {
+				acs[i].needmove = true
+				if acs[i].ec.ctype == ecTable {
+					acs[i].valuerk = regstart + (i - namesassigned)
+				}
+			}
+			namesassigned = lennames
+			continue
+		}
+
+		if expr == nil {
+			expr = stmt.Rhs[namesassigned]
+		}
+		idx := reg
+		reginc := compileExpr(context, reg, expr, ec)
+		if ec.ctype == ecTable {
+			if _, ok := expr.(*ast.LogicalOpExpr); !ok {
+				context.Code.PropagateKMV(context.RegTop(), &ac.valuerk, &reg, reginc)
+			} else {
+				ac.valuerk = idx
+				reg += reginc
+			}
+		} else {
+			ac.needmove = reginc != 0
+			reg += reginc
+		}
+		namesassigned += 1
+	}
+
+	rightreg := reg - 1
+
+	// extra right exprs
+	for i := namesassigned; i < lenexprs; i++ {
+		varargopt := -1
+		if i != lenexprs-1 {
+			varargopt = 0
+		}
+		reg += compileExpr(context, reg, stmt.Rhs[i], ecnone(varargopt))
+	}
+	return rightreg, acs
+} // }}}
+
+func compileAssignStmt(context *funcContext, stmt *ast.AssignStmt) { // {{{
+	code := context.Code
+	lennames := len(stmt.Lhs)
+	reg, acs := compileAssignStmtLeft(context, stmt)
+	reg, acs = compileAssignStmtRight(context, stmt, reg, acs)
+
+	for i := lennames - 1; i >= 0; i-- {
+		ex := stmt.Lhs[i]
+		switch acs[i].ec.ctype {
+		case ecLocal:
+			if acs[i].needmove {
+				code.AddABC(OP_MOVE, context.FindLocalVar(ex.(*ast.IdentExpr).Value), reg, 0, sline(ex))
+				reg -= 1
+			}
+		case ecGlobal:
+			code.AddABx(OP_SETGLOBAL, reg, context.ConstIndex(LString(ex.(*ast.IdentExpr).Value)), sline(ex))
+			reg -= 1
+		case ecUpvalue:
+			code.AddABC(OP_SETUPVAL, reg, context.Upvalues.RegisterUnique(ex.(*ast.IdentExpr).Value), 0, sline(ex))
+			reg -= 1
+		case ecTable:
+			opcode := OP_SETTABLE
+			if acs[i].keyks {
+				opcode = OP_SETTABLEKS
+			}
+			code.AddABC(opcode, acs[i].ec.reg, acs[i].keyrk, acs[i].valuerk, sline(ex))
+			if !opIsK(acs[i].valuerk) {
+				reg -= 1
+			}
+		}
+	}
+} // }}}
+
+func compileRegAssignment(context *funcContext, names []string, exprs []ast.Expr, reg int, nvars int, line int) { // {{{
+	lennames := len(names)
+	lenexprs := len(exprs)
+	namesassigned := 0
+	ec := &expcontext{}
+
+	for namesassigned < lennames && namesassigned < lenexprs {
+		if isVarArgReturnExpr(exprs[namesassigned]) && (lenexprs-namesassigned-1) <= 0 {
+
+			varargopt := nvars - namesassigned
+			ecupdate(ec, ecVararg, reg, varargopt-1)
+			compileExpr(context, reg, exprs[namesassigned], ec)
+			reg += varargopt
+			namesassigned = lennames
+		} else {
+			ecupdate(ec, ecLocal, reg, 0)
+			compileExpr(context, reg, exprs[namesassigned], ec)
+			reg += 1
+			namesassigned += 1
+		}
+	}
+
+	// extra left names
+	if lennames > namesassigned {
+		restleft := lennames - namesassigned - 1
+		context.Code.AddLoadNil(reg, reg+restleft, line)
+		reg += restleft
+	}
+
+	// extra right exprs
+	for i := namesassigned; i < lenexprs; i++ {
+		varargopt := -1
+		if i != lenexprs-1 {
+			varargopt = 0
+		}
+		ecupdate(ec, ecNone, reg, varargopt)
+		reg += compileExpr(context, reg, exprs[i], ec)
+	}
+} // }}}
+
+func compileLocalAssignStmt(context *funcContext, stmt *ast.LocalAssignStmt) { // {{{
+	reg := context.RegTop()
+	if len(stmt.Names) == 1 && len(stmt.Exprs) == 1 {
+		if _, ok := stmt.Exprs[0].(*ast.FunctionExpr); ok {
+			context.RegisterLocalVar(stmt.Names[0])
+			compileRegAssignment(context, stmt.Names, stmt.Exprs, reg, len(stmt.Names), sline(stmt))
+			return
+		}
+	}
+
+	compileRegAssignment(context, stmt.Names, stmt.Exprs, reg, len(stmt.Names), sline(stmt))
+	for _, name := range stmt.Names {
+		context.RegisterLocalVar(name)
+	}
+} // }}}
+
+func compileReturnStmt(context *funcContext, stmt *ast.ReturnStmt) { // {{{
+	lenexprs := len(stmt.Exprs)
+	code := context.Code
+	reg := context.RegTop()
+	a := reg
+	lastisvaarg := false
+
+	if lenexprs == 1 {
+		switch ex := stmt.Exprs[0].(type) {
+		case *ast.IdentExpr:
+			if idx := context.FindLocalVar(ex.Value); idx > -1 {
+				code.AddABC(OP_RETURN, idx, 2, 0, sline(stmt))
+				return
+			}
+		case *ast.FuncCallExpr:
+			reg += compileExpr(context, reg, ex, ecnone(-2))
+			code.SetOpCode(code.LastPC(), OP_TAILCALL)
+			code.AddABC(OP_RETURN, a, 0, 0, sline(stmt))
+			return
+		}
+	}
+
+	for i, expr := range stmt.Exprs {
+		if i == lenexprs-1 && isVarArgReturnExpr(expr) {
+			compileExpr(context, reg, expr, ecnone(-2))
+			lastisvaarg = true
+		} else {
+			reg += compileExpr(context, reg, expr, ecnone(0))
+		}
+	}
+	count := reg - a + 1
+	if lastisvaarg {
+		count = 0
+	}
+	context.Code.AddABC(OP_RETURN, a, count, 0, sline(stmt))
+} // }}}
+
+func compileIfStmt(context *funcContext, stmt *ast.IfStmt) { // {{{
+	thenlabel := context.NewLabel()
+	elselabel := context.NewLabel()
+	endlabel := context.NewLabel()
+
+	compileBranchCondition(context, context.RegTop(), stmt.Condition, thenlabel, elselabel, false)
+	context.SetLabelPc(thenlabel, context.Code.LastPC())
+	compileBlock(context, stmt.Then)
+	if len(stmt.Else) > 0 {
+		context.Code.AddASbx(OP_JMP, 0, endlabel, sline(stmt))
+	}
+	context.SetLabelPc(elselabel, context.Code.LastPC())
+	if len(stmt.Else) > 0 {
+		compileBlock(context, stmt.Else)
+		context.SetLabelPc(endlabel, context.Code.LastPC())
+	}
+
+} // }}}
+
+func compileBranchCondition(context *funcContext, reg int, expr ast.Expr, thenlabel, elselabel int, hasnextcond bool) { // {{{
+	// TODO folding constants?
+	code := context.Code
+	flip := 0
+	jumplabel := elselabel
+	if hasnextcond {
+		flip = 1
+		jumplabel = thenlabel
+	}
+
+	switch ex := expr.(type) {
+	case *ast.FalseExpr, *ast.NilExpr:
+		if !hasnextcond {
+			code.AddASbx(OP_JMP, 0, elselabel, sline(expr))
+			return
+		}
+	case *ast.TrueExpr, *ast.NumberExpr, *ast.StringExpr:
+		if !hasnextcond {
+			return
+		}
+	case *ast.UnaryNotOpExpr:
+		compileBranchCondition(context, reg, ex.Expr, elselabel, thenlabel, !hasnextcond)
+		return
+	case *ast.LogicalOpExpr:
+		switch ex.Operator {
+		case "and":
+			nextcondlabel := context.NewLabel()
+			compileBranchCondition(context, reg, ex.Lhs, nextcondlabel, elselabel, false)
+			context.SetLabelPc(nextcondlabel, context.Code.LastPC())
+			compileBranchCondition(context, reg, ex.Rhs, thenlabel, elselabel, hasnextcond)
+		case "or":
+			nextcondlabel := context.NewLabel()
+			compileBranchCondition(context, reg, ex.Lhs, thenlabel, nextcondlabel, true)
+			context.SetLabelPc(nextcondlabel, context.Code.LastPC())
+			compileBranchCondition(context, reg, ex.Rhs, thenlabel, elselabel, hasnextcond)
+		}
+		return
+	case *ast.RelationalOpExpr:
+		compileRelationalOpExprAux(context, reg, ex, flip, jumplabel)
+		return
+	}
+
+	a := reg
+	compileExprWithMVPropagation(context, expr, &reg, &a)
+	code.AddABC(OP_TEST, a, 0, 0^flip, sline(expr))
+	code.AddASbx(OP_JMP, 0, jumplabel, sline(expr))
+} // }}}
+
+func compileWhileStmt(context *funcContext, stmt *ast.WhileStmt) { // {{{
+	thenlabel := context.NewLabel()
+	elselabel := context.NewLabel()
+	condlabel := context.NewLabel()
+
+	context.SetLabelPc(condlabel, context.Code.LastPC())
+	compileBranchCondition(context, context.RegTop(), stmt.Condition, thenlabel, elselabel, false)
+	context.SetLabelPc(thenlabel, context.Code.LastPC())
+	context.EnterBlock(elselabel, stmt)
+	compileChunk(context, stmt.Stmts)
+	context.CloseUpvalues()
+	context.Code.AddASbx(OP_JMP, 0, condlabel, eline(stmt))
+	context.LeaveBlock()
+	context.SetLabelPc(elselabel, context.Code.LastPC())
+} // }}}
+
+func compileRepeatStmt(context *funcContext, stmt *ast.RepeatStmt) { // {{{
+	initlabel := context.NewLabel()
+	thenlabel := context.NewLabel()
+	elselabel := context.NewLabel()
+
+	context.SetLabelPc(initlabel, context.Code.LastPC())
+	context.SetLabelPc(elselabel, context.Code.LastPC())
+	context.EnterBlock(thenlabel, stmt)
+	compileChunk(context, stmt.Stmts)
+	compileBranchCondition(context, context.RegTop(), stmt.Condition, thenlabel, elselabel, false)
+
+	context.SetLabelPc(thenlabel, context.Code.LastPC())
+	n := context.LeaveBlock()
+
+	if n > -1 {
+		label := context.NewLabel()
+		context.Code.AddASbx(OP_JMP, 0, label, eline(stmt))
+		context.SetLabelPc(elselabel, context.Code.LastPC())
+		context.Code.AddABC(OP_CLOSE, n, 0, 0, eline(stmt))
+		context.Code.AddASbx(OP_JMP, 0, initlabel, eline(stmt))
+		context.SetLabelPc(label, context.Code.LastPC())
+	}
+
+} // }}}
+
+func compileBreakStmt(context *funcContext, stmt *ast.BreakStmt) { // {{{
+	for block := context.Block; block != nil; block = block.Parent {
+		if label := block.BreakLabel; label != labelNoJump {
+			if block.RefUpvalue {
+				context.Code.AddABC(OP_CLOSE, block.Parent.LocalVars.LastIndex(), 0, 0, sline(stmt))
+			}
+			context.Code.AddASbx(OP_JMP, 0, label, sline(stmt))
+			return
+		}
+	}
+	raiseCompileError(context, sline(stmt), "no loop to break")
+} // }}}
+
+func compileFuncDefStmt(context *funcContext, stmt *ast.FuncDefStmt) { // {{{
+	if stmt.Name.Func == nil {
+		reg := context.RegTop()
+		var treg, kreg int
+		compileExprWithKMVPropagation(context, stmt.Name.Receiver, &reg, &treg)
+		kreg = loadRk(context, &reg, stmt.Func, LString(stmt.Name.Method))
+		compileExpr(context, reg, stmt.Func, ecfuncdef)
+		context.Code.AddABC(OP_SETTABLE, treg, kreg, reg, sline(stmt.Name.Receiver))
+	} else {
+		astmt := &ast.AssignStmt{Lhs: []ast.Expr{stmt.Name.Func}, Rhs: []ast.Expr{stmt.Func}}
+		astmt.SetLine(sline(stmt.Func))
+		astmt.SetLastLine(eline(stmt.Func))
+		compileAssignStmt(context, astmt)
+	}
+} // }}}
+
+func compileNumberForStmt(context *funcContext, stmt *ast.NumberForStmt) { // {{{
+	code := context.Code
+	endlabel := context.NewLabel()
+	ec := &expcontext{}
+
+	context.EnterBlock(endlabel, stmt)
+	reg := context.RegTop()
+	rindex := context.RegisterLocalVar("(for index)")
+	ecupdate(ec, ecLocal, rindex, 0)
+	compileExpr(context, reg, stmt.Init, ec)
+
+	reg = context.RegTop()
+	rlimit := context.RegisterLocalVar("(for limit)")
+	ecupdate(ec, ecLocal, rlimit, 0)
+	compileExpr(context, reg, stmt.Limit, ec)
+
+	reg = context.RegTop()
+	rstep := context.RegisterLocalVar("(for step)")
+	if stmt.Step == nil {
+		stmt.Step = &ast.NumberExpr{Value: "1"}
+		stmt.Step.SetLine(sline(stmt.Init))
+	}
+	ecupdate(ec, ecLocal, rstep, 0)
+	compileExpr(context, reg, stmt.Step, ec)
+
+	code.AddASbx(OP_FORPREP, rindex, 0, sline(stmt))
+
+	context.RegisterLocalVar(stmt.Name)
+
+	bodypc := code.LastPC()
+	compileChunk(context, stmt.Stmts)
+
+	context.LeaveBlock()
+
+	flpc := code.LastPC()
+	code.AddASbx(OP_FORLOOP, rindex, bodypc-(flpc+1), sline(stmt))
+
+	context.SetLabelPc(endlabel, code.LastPC())
+	code.SetSbx(bodypc, flpc-bodypc)
+
+} // }}}
+
+func compileGenericForStmt(context *funcContext, stmt *ast.GenericForStmt) { // {{{
+	code := context.Code
+	endlabel := context.NewLabel()
+	bodylabel := context.NewLabel()
+	fllabel := context.NewLabel()
+	nnames := len(stmt.Names)
+
+	context.EnterBlock(endlabel, stmt)
+	rgen := context.RegisterLocalVar("(for generator)")
+	context.RegisterLocalVar("(for state)")
+	context.RegisterLocalVar("(for control)")
+
+	compileRegAssignment(context, stmt.Names, stmt.Exprs, context.RegTop()-3, 3, sline(stmt))
+
+	code.AddASbx(OP_JMP, 0, fllabel, sline(stmt))
+
+	for _, name := range stmt.Names {
+		context.RegisterLocalVar(name)
+	}
+
+	context.SetLabelPc(bodylabel, code.LastPC())
+	compileChunk(context, stmt.Stmts)
+
+	context.LeaveBlock()
+
+	context.SetLabelPc(fllabel, code.LastPC())
+	code.AddABC(OP_TFORLOOP, rgen, 0, nnames, sline(stmt))
+	code.AddASbx(OP_JMP, 0, bodylabel, sline(stmt))
+
+	context.SetLabelPc(endlabel, code.LastPC())
+} // }}}
+
+func compileExpr(context *funcContext, reg int, expr ast.Expr, ec *expcontext) int { // {{{
+	code := context.Code
+	sreg := savereg(ec, reg)
+	sused := 1
+	if sreg < reg {
+		sused = 0
+	}
+
+	switch ex := expr.(type) {
+	case *ast.StringExpr:
+		code.AddABx(OP_LOADK, sreg, context.ConstIndex(LString(ex.Value)), sline(ex))
+		return sused
+	case *ast.NumberExpr:
+		num, err := parseNumber(ex.Value)
+		if err != nil {
+			num = LNumber(math.NaN())
+		}
+		code.AddABx(OP_LOADK, sreg, context.ConstIndex(num), sline(ex))
+		return sused
+	case *constLValueExpr:
+		code.AddABx(OP_LOADK, sreg, context.ConstIndex(ex.Value), sline(ex))
+		return sused
+	case *ast.NilExpr:
+		code.AddLoadNil(sreg, sreg, sline(ex))
+		return sused
+	case *ast.FalseExpr:
+		code.AddABC(OP_LOADBOOL, sreg, 0, 0, sline(ex))
+		return sused
+	case *ast.TrueExpr:
+		code.AddABC(OP_LOADBOOL, sreg, 1, 0, sline(ex))
+		return sused
+	case *ast.IdentExpr:
+		switch getIdentRefType(context, context, ex) {
+		case ecGlobal:
+			code.AddABx(OP_GETGLOBAL, sreg, context.ConstIndex(LString(ex.Value)), sline(ex))
+		case ecUpvalue:
+			code.AddABC(OP_GETUPVAL, sreg, context.Upvalues.RegisterUnique(ex.Value), 0, sline(ex))
+		case ecLocal:
+			b := context.FindLocalVar(ex.Value)
+			code.AddABC(OP_MOVE, sreg, b, 0, sline(ex))
+		}
+		return sused
+	case *ast.Comma3Expr:
+		if context.Proto.IsVarArg == 0 {
+			raiseCompileError(context, sline(ex), "cannot use '...' outside a vararg function")
+		}
+		context.Proto.IsVarArg &= ^VarArgNeedsArg
+		code.AddABC(OP_VARARG, sreg, 2+ec.varargopt, 0, sline(ex))
+		if context.RegTop() > (sreg+2+ec.varargopt) || ec.varargopt < -1 {
+			return 0
+		}
+		return (sreg + 1 + ec.varargopt) - reg
+	case *ast.AttrGetExpr:
+		a := sreg
+		b := reg
+		compileExprWithMVPropagation(context, ex.Object, &reg, &b)
+		c := reg
+		compileExprWithKMVPropagation(context, ex.Key, &reg, &c)
+		opcode := OP_GETTABLE
+		if _, ok := ex.Key.(*ast.StringExpr); ok {
+			opcode = OP_GETTABLEKS
+		}
+		code.AddABC(opcode, a, b, c, sline(ex))
+		return sused
+	case *ast.TableExpr:
+		compileTableExpr(context, reg, ex, ec)
+		return 1
+	case *ast.ArithmeticOpExpr:
+		compileArithmeticOpExpr(context, reg, ex, ec)
+		return sused
+	case *ast.StringConcatOpExpr:
+		compileStringConcatOpExpr(context, reg, ex, ec)
+		return sused
+	case *ast.UnaryMinusOpExpr, *ast.UnaryNotOpExpr, *ast.UnaryLenOpExpr:
+		compileUnaryOpExpr(context, reg, ex, ec)
+		return sused
+	case *ast.RelationalOpExpr:
+		compileRelationalOpExpr(context, reg, ex, ec)
+		return sused
+	case *ast.LogicalOpExpr:
+		compileLogicalOpExpr(context, reg, ex, ec)
+		return sused
+	case *ast.FuncCallExpr:
+		return compileFuncCallExpr(context, reg, ex, ec)
+	case *ast.FunctionExpr:
+		childcontext := newFuncContext(context.Proto.SourceName, context)
+		compileFunctionExpr(childcontext, ex, ec)
+		protono := len(context.Proto.FunctionPrototypes)
+		context.Proto.FunctionPrototypes = append(context.Proto.FunctionPrototypes, childcontext.Proto)
+		code.AddABx(OP_CLOSURE, sreg, protono, sline(ex))
+		for _, upvalue := range childcontext.Upvalues.List() {
+			localidx, block := context.FindLocalVarAndBlock(upvalue.Name)
+			if localidx > -1 {
+				code.AddABC(OP_MOVE, 0, localidx, 0, sline(ex))
+				block.RefUpvalue = true
+			} else {
+				upvalueidx := context.Upvalues.Find(upvalue.Name)
+				if upvalueidx < 0 {
+					upvalueidx = context.Upvalues.RegisterUnique(upvalue.Name)
+				}
+				code.AddABC(OP_GETUPVAL, 0, upvalueidx, 0, sline(ex))
+			}
+		}
+		return sused
+	default:
+		panic(fmt.Sprintf("expr %v not implemented.", reflect.TypeOf(ex).Elem().Name()))
+	}
+
+} // }}}
+
+func compileExprWithPropagation(context *funcContext, expr ast.Expr, reg *int, save *int, propergator func(int, *int, *int, int)) { // {{{
+	reginc := compileExpr(context, *reg, expr, ecnone(0))
+	if _, ok := expr.(*ast.LogicalOpExpr); ok {
+		*save = *reg
+		*reg = *reg + reginc
+	} else {
+		propergator(context.RegTop(), save, reg, reginc)
+	}
+} // }}}
+
+func compileExprWithKMVPropagation(context *funcContext, expr ast.Expr, reg *int, save *int) { // {{{
+	compileExprWithPropagation(context, expr, reg, save, context.Code.PropagateKMV)
+} // }}}
+
+func compileExprWithMVPropagation(context *funcContext, expr ast.Expr, reg *int, save *int) { // {{{
+	compileExprWithPropagation(context, expr, reg, save, context.Code.PropagateMV)
+} // }}}
+
+func constFold(exp ast.Expr) ast.Expr { // {{{
+	switch expr := exp.(type) {
+	case *ast.ArithmeticOpExpr:
+		lvalue, lisconst := lnumberValue(constFold(expr.Lhs))
+		rvalue, risconst := lnumberValue(constFold(expr.Rhs))
+		if lisconst && risconst {
+			switch expr.Operator {
+			case "+":
+				return &constLValueExpr{Value: lvalue + rvalue}
+			case "-":
+				return &constLValueExpr{Value: lvalue - rvalue}
+			case "*":
+				return &constLValueExpr{Value: lvalue * rvalue}
+			case "/":
+				return &constLValueExpr{Value: lvalue / rvalue}
+			case "%":
+				return &constLValueExpr{Value: luaModulo(lvalue, rvalue)}
+			case "^":
+				return &constLValueExpr{Value: LNumber(math.Pow(float64(lvalue), float64(rvalue)))}
+			default:
+				panic(fmt.Sprintf("unknown binop: %v", expr.Operator))
+			}
+		} else {
+			return expr
+		}
+	case *ast.UnaryMinusOpExpr:
+		expr.Expr = constFold(expr.Expr)
+		if value, ok := lnumberValue(expr.Expr); ok {
+			return &constLValueExpr{Value: LNumber(-value)}
+		}
+		return expr
+	default:
+
+		return exp
+	}
+} // }}}
+
+func compileFunctionExpr(context *funcContext, funcexpr *ast.FunctionExpr, ec *expcontext) { // {{{
+	context.Proto.LineDefined = sline(funcexpr)
+	context.Proto.LastLineDefined = eline(funcexpr)
+	if len(funcexpr.ParList.Names) > maxRegisters {
+		raiseCompileError(context, context.Proto.LineDefined, "register overflow")
+	}
+	context.Proto.NumParameters = uint8(len(funcexpr.ParList.Names))
+	if ec.ctype == ecMethod {
+		context.Proto.NumParameters += 1
+		context.RegisterLocalVar("self")
+	}
+	for _, name := range funcexpr.ParList.Names {
+		context.RegisterLocalVar(name)
+	}
+	if funcexpr.ParList.HasVargs {
+		if CompatVarArg {
+			context.Proto.IsVarArg = VarArgHasArg | VarArgNeedsArg
+			if context.Parent != nil {
+				context.RegisterLocalVar("arg")
+			}
+		}
+		context.Proto.IsVarArg |= VarArgIsVarArg
+	}
+
+	compileChunk(context, funcexpr.Stmts)
+
+	context.Code.AddABC(OP_RETURN, 0, 1, 0, eline(funcexpr))
+	context.EndScope()
+	context.Proto.Code = context.Code.List()
+	context.Proto.DbgSourcePositions = context.Code.PosList()
+	context.Proto.DbgUpvalues = context.Upvalues.Names()
+	context.Proto.NumUpvalues = uint8(len(context.Proto.DbgUpvalues))
+	for _, clv := range context.Proto.Constants {
+		sv := ""
+		if slv, ok := clv.(LString); ok {
+			sv = string(slv)
+		}
+		context.Proto.stringConstants = append(context.Proto.stringConstants, sv)
+	}
+	patchCode(context)
+} // }}}
+
+func compileTableExpr(context *funcContext, reg int, ex *ast.TableExpr, ec *expcontext) { // {{{
+	code := context.Code
+	/*
+		tablereg := savereg(ec, reg)
+		if tablereg == reg {
+			reg += 1
+		}
+	*/
+	tablereg := reg
+	reg++
+	code.AddABC(OP_NEWTABLE, tablereg, 0, 0, sline(ex))
+	tablepc := code.LastPC()
+	regbase := reg
+
+	arraycount := 0
+	lastvararg := false
+	for i, field := range ex.Fields {
+		islast := i == len(ex.Fields)-1
+		if field.Key == nil {
+			if islast && isVarArgReturnExpr(field.Value) {
+				reg += compileExpr(context, reg, field.Value, ecnone(-2))
+				lastvararg = true
+			} else {
+				reg += compileExpr(context, reg, field.Value, ecnone(0))
+				arraycount += 1
+			}
+		} else {
+			regorg := reg
+			b := reg
+			compileExprWithKMVPropagation(context, field.Key, &reg, &b)
+			c := reg
+			compileExprWithKMVPropagation(context, field.Value, &reg, &c)
+			opcode := OP_SETTABLE
+			if _, ok := field.Key.(*ast.StringExpr); ok {
+				opcode = OP_SETTABLEKS
+			}
+			code.AddABC(opcode, tablereg, b, c, sline(ex))
+			reg = regorg
+		}
+		flush := arraycount % FieldsPerFlush
+		if (arraycount != 0 && (flush == 0 || islast)) || lastvararg {
+			reg = regbase
+			num := flush
+			if num == 0 {
+				num = FieldsPerFlush
+			}
+			c := (arraycount-1)/FieldsPerFlush + 1
+			b := num
+			if islast && isVarArgReturnExpr(field.Value) {
+				b = 0
+			}
+			line := field.Value
+			if field.Key != nil {
+				line = field.Key
+			}
+			if c > 511 {
+				c = 0
+			}
+			code.AddABC(OP_SETLIST, tablereg, b, c, sline(line))
+			if c == 0 {
+				code.Add(uint32(c), sline(line))
+			}
+		}
+	}
+	code.SetB(tablepc, int2Fb(arraycount))
+	code.SetC(tablepc, int2Fb(len(ex.Fields)-arraycount))
+	if shouldmove(ec, tablereg) {
+		code.AddABC(OP_MOVE, ec.reg, tablereg, 0, sline(ex))
+	}
+} // }}}
+
+func compileArithmeticOpExpr(context *funcContext, reg int, expr *ast.ArithmeticOpExpr, ec *expcontext) { // {{{
+	exp := constFold(expr)
+	if ex, ok := exp.(*constLValueExpr); ok {
+		exp.SetLine(sline(expr))
+		compileExpr(context, reg, ex, ec)
+		return
+	}
+	expr, _ = exp.(*ast.ArithmeticOpExpr)
+	a := savereg(ec, reg)
+	b := reg
+	compileExprWithKMVPropagation(context, expr.Lhs, &reg, &b)
+	c := reg
+	compileExprWithKMVPropagation(context, expr.Rhs, &reg, &c)
+
+	op := 0
+	switch expr.Operator {
+	case "+":
+		op = OP_ADD
+	case "-":
+		op = OP_SUB
+	case "*":
+		op = OP_MUL
+	case "/":
+		op = OP_DIV
+	case "%":
+		op = OP_MOD
+	case "^":
+		op = OP_POW
+	}
+	context.Code.AddABC(op, a, b, c, sline(expr))
+} // }}}
+
+func compileStringConcatOpExpr(context *funcContext, reg int, expr *ast.StringConcatOpExpr, ec *expcontext) { // {{{
+	code := context.Code
+	crange := 1
+	for current := expr.Rhs; current != nil; {
+		if ex, ok := current.(*ast.StringConcatOpExpr); ok {
+			crange += 1
+			current = ex.Rhs
+		} else {
+			current = nil
+		}
+	}
+	a := savereg(ec, reg)
+	basereg := reg
+	reg += compileExpr(context, reg, expr.Lhs, ecnone(0))
+	reg += compileExpr(context, reg, expr.Rhs, ecnone(0))
+	for pc := code.LastPC(); pc != 0 && opGetOpCode(code.At(pc)) == OP_CONCAT; pc-- {
+		code.Pop()
+	}
+	code.AddABC(OP_CONCAT, a, basereg, basereg+crange, sline(expr))
+} // }}}
+
+func compileUnaryOpExpr(context *funcContext, reg int, expr ast.Expr, ec *expcontext) { // {{{
+	opcode := 0
+	code := context.Code
+	var operandexpr ast.Expr
+	switch ex := expr.(type) {
+	case *ast.UnaryMinusOpExpr:
+		exp := constFold(ex)
+		if lvexpr, ok := exp.(*constLValueExpr); ok {
+			exp.SetLine(sline(expr))
+			compileExpr(context, reg, lvexpr, ec)
+			return
+		}
+		ex, _ = exp.(*ast.UnaryMinusOpExpr)
+		operandexpr = ex.Expr
+		opcode = OP_UNM
+	case *ast.UnaryNotOpExpr:
+		switch ex.Expr.(type) {
+		case *ast.TrueExpr:
+			code.AddABC(OP_LOADBOOL, savereg(ec, reg), 0, 0, sline(expr))
+			return
+		case *ast.FalseExpr, *ast.NilExpr:
+			code.AddABC(OP_LOADBOOL, savereg(ec, reg), 1, 0, sline(expr))
+			return
+		default:
+			opcode = OP_NOT
+			operandexpr = ex.Expr
+		}
+	case *ast.UnaryLenOpExpr:
+		opcode = OP_LEN
+		operandexpr = ex.Expr
+	}
+
+	a := savereg(ec, reg)
+	b := reg
+	compileExprWithMVPropagation(context, operandexpr, &reg, &b)
+	code.AddABC(opcode, a, b, 0, sline(expr))
+} // }}}
+
+func compileRelationalOpExprAux(context *funcContext, reg int, expr *ast.RelationalOpExpr, flip int, label int) { // {{{
+	code := context.Code
+	b := reg
+	compileExprWithKMVPropagation(context, expr.Lhs, &reg, &b)
+	c := reg
+	compileExprWithKMVPropagation(context, expr.Rhs, &reg, &c)
+	switch expr.Operator {
+	case "<":
+		code.AddABC(OP_LT, 0^flip, b, c, sline(expr))
+	case ">":
+		code.AddABC(OP_LT, 0^flip, c, b, sline(expr))
+	case "<=":
+		code.AddABC(OP_LE, 0^flip, b, c, sline(expr))
+	case ">=":
+		code.AddABC(OP_LE, 0^flip, c, b, sline(expr))
+	case "==":
+		code.AddABC(OP_EQ, 0^flip, b, c, sline(expr))
+	case "~=":
+		code.AddABC(OP_EQ, 1^flip, b, c, sline(expr))
+	}
+	code.AddASbx(OP_JMP, 0, label, sline(expr))
+} // }}}
+
+func compileRelationalOpExpr(context *funcContext, reg int, expr *ast.RelationalOpExpr, ec *expcontext) { // {{{
+	a := savereg(ec, reg)
+	code := context.Code
+	jumplabel := context.NewLabel()
+	compileRelationalOpExprAux(context, reg, expr, 1, jumplabel)
+	code.AddABC(OP_LOADBOOL, a, 0, 1, sline(expr))
+	context.SetLabelPc(jumplabel, code.LastPC())
+	code.AddABC(OP_LOADBOOL, a, 1, 0, sline(expr))
+} // }}}
+
+func compileLogicalOpExpr(context *funcContext, reg int, expr *ast.LogicalOpExpr, ec *expcontext) { // {{{
+	a := savereg(ec, reg)
+	code := context.Code
+	endlabel := context.NewLabel()
+	lb := &lblabels{context.NewLabel(), context.NewLabel(), endlabel, false}
+	nextcondlabel := context.NewLabel()
+	if expr.Operator == "and" {
+		compileLogicalOpExprAux(context, reg, expr.Lhs, ec, nextcondlabel, endlabel, false, lb)
+		context.SetLabelPc(nextcondlabel, code.LastPC())
+		compileLogicalOpExprAux(context, reg, expr.Rhs, ec, endlabel, endlabel, false, lb)
+	} else {
+		compileLogicalOpExprAux(context, reg, expr.Lhs, ec, endlabel, nextcondlabel, true, lb)
+		context.SetLabelPc(nextcondlabel, code.LastPC())
+		compileLogicalOpExprAux(context, reg, expr.Rhs, ec, endlabel, endlabel, false, lb)
+	}
+
+	if lb.b {
+		context.SetLabelPc(lb.f, code.LastPC())
+		code.AddABC(OP_LOADBOOL, a, 0, 1, sline(expr))
+		context.SetLabelPc(lb.t, code.LastPC())
+		code.AddABC(OP_LOADBOOL, a, 1, 0, sline(expr))
+	}
+
+	lastinst := code.Last()
+	if opGetOpCode(lastinst) == OP_JMP && opGetArgSbx(lastinst) == endlabel {
+		code.Pop()
+	}
+
+	context.SetLabelPc(endlabel, code.LastPC())
+} // }}}
+
+func compileLogicalOpExprAux(context *funcContext, reg int, expr ast.Expr, ec *expcontext, thenlabel, elselabel int, hasnextcond bool, lb *lblabels) { // {{{
+	// TODO folding constants?
+	code := context.Code
+	flip := 0
+	jumplabel := elselabel
+	if hasnextcond {
+		flip = 1
+		jumplabel = thenlabel
+	}
+
+	switch ex := expr.(type) {
+	case *ast.FalseExpr:
+		if elselabel == lb.e {
+			code.AddASbx(OP_JMP, 0, lb.f, sline(expr))
+			lb.b = true
+		} else {
+			code.AddASbx(OP_JMP, 0, elselabel, sline(expr))
+		}
+		return
+	case *ast.NilExpr:
+		if elselabel == lb.e {
+			compileExpr(context, reg, expr, ec)
+			code.AddASbx(OP_JMP, 0, lb.e, sline(expr))
+		} else {
+			code.AddASbx(OP_JMP, 0, elselabel, sline(expr))
+		}
+		return
+	case *ast.TrueExpr:
+		if thenlabel == lb.e {
+			code.AddASbx(OP_JMP, 0, lb.t, sline(expr))
+			lb.b = true
+		} else {
+			code.AddASbx(OP_JMP, 0, thenlabel, sline(expr))
+		}
+		return
+	case *ast.NumberExpr, *ast.StringExpr:
+		if thenlabel == lb.e {
+			compileExpr(context, reg, expr, ec)
+			code.AddASbx(OP_JMP, 0, lb.e, sline(expr))
+		} else {
+			code.AddASbx(OP_JMP, 0, thenlabel, sline(expr))
+		}
+		return
+	case *ast.LogicalOpExpr:
+		switch ex.Operator {
+		case "and":
+			nextcondlabel := context.NewLabel()
+			compileLogicalOpExprAux(context, reg, ex.Lhs, ec, nextcondlabel, elselabel, false, lb)
+			context.SetLabelPc(nextcondlabel, context.Code.LastPC())
+			compileLogicalOpExprAux(context, reg, ex.Rhs, ec, thenlabel, elselabel, hasnextcond, lb)
+		case "or":
+			nextcondlabel := context.NewLabel()
+			compileLogicalOpExprAux(context, reg, ex.Lhs, ec, thenlabel, nextcondlabel, true, lb)
+			context.SetLabelPc(nextcondlabel, context.Code.LastPC())
+			compileLogicalOpExprAux(context, reg, ex.Rhs, ec, thenlabel, elselabel, hasnextcond, lb)
+		}
+		return
+	case *ast.RelationalOpExpr:
+		if thenlabel == elselabel {
+			flip ^= 1
+			jumplabel = lb.t
+			lb.b = true
+		} else if thenlabel == lb.e {
+			jumplabel = lb.t
+			lb.b = true
+		} else if elselabel == lb.e {
+			jumplabel = lb.f
+			lb.b = true
+		}
+		compileRelationalOpExprAux(context, reg, ex, flip, jumplabel)
+		return
+	}
+
+	a := reg
+	sreg := savereg(ec, a)
+	if !hasnextcond && thenlabel == elselabel {
+		reg += compileExpr(context, reg, expr, &expcontext{ec.ctype, intMax(a, sreg), ec.varargopt})
+		last := context.Code.Last()
+		if opGetOpCode(last) == OP_MOVE && opGetArgA(last) == a {
+			context.Code.SetA(context.Code.LastPC(), sreg)
+		} else {
+			context.Code.AddABC(OP_MOVE, sreg, a, 0, sline(expr))
+		}
+	} else {
+		reg += compileExpr(context, reg, expr, ecnone(0))
+		if sreg == a {
+			code.AddABC(OP_TEST, a, 0, 0^flip, sline(expr))
+		} else {
+			code.AddABC(OP_TESTSET, sreg, a, 0^flip, sline(expr))
+		}
+	}
+	code.AddASbx(OP_JMP, 0, jumplabel, sline(expr))
+} // }}}
+
+func compileFuncCallExpr(context *funcContext, reg int, expr *ast.FuncCallExpr, ec *expcontext) int { // {{{
+	funcreg := reg
+	if ec.ctype == ecLocal && ec.reg == (int(context.Proto.NumParameters)-1) {
+		funcreg = ec.reg
+		reg = ec.reg
+	}
+	argc := len(expr.Args)
+	islastvararg := false
+	name := "(anonymous)"
+
+	if expr.Func != nil { // hoge.func()
+		reg += compileExpr(context, reg, expr.Func, ecnone(0))
+		name = getExprName(context, expr.Func)
+	} else { // hoge:method()
+		b := reg
+		compileExprWithMVPropagation(context, expr.Receiver, &reg, &b)
+		c := loadRk(context, &reg, expr, LString(expr.Method))
+		context.Code.AddABC(OP_SELF, funcreg, b, c, sline(expr))
+		// increments a register for an implicit "self"
+		reg = b + 1
+		reg2 := funcreg + 2
+		if reg2 > reg {
+			reg = reg2
+		}
+		argc += 1
+		name = string(expr.Method)
+	}
+
+	for i, ar := range expr.Args {
+		islastvararg = (i == len(expr.Args)-1) && isVarArgReturnExpr(ar)
+		if islastvararg {
+			compileExpr(context, reg, ar, ecnone(-2))
+		} else {
+			reg += compileExpr(context, reg, ar, ecnone(0))
+		}
+	}
+	b := argc + 1
+	if islastvararg {
+		b = 0
+	}
+	context.Code.AddABC(OP_CALL, funcreg, b, ec.varargopt+2, sline(expr))
+	context.Proto.DbgCalls = append(context.Proto.DbgCalls, DbgCall{Pc: context.Code.LastPC(), Name: name})
+
+	if ec.varargopt == 0 && shouldmove(ec, funcreg) {
+		context.Code.AddABC(OP_MOVE, ec.reg, funcreg, 0, sline(expr))
+		return 1
+	}
+	if context.RegTop() > (funcreg+2+ec.varargopt) || ec.varargopt < -1 {
+		return 0
+	}
+	return ec.varargopt + 1
+} // }}}
+
+func loadRk(context *funcContext, reg *int, expr ast.Expr, cnst LValue) int { // {{{
+	cindex := context.ConstIndex(cnst)
+	if cindex <= opMaxIndexRk {
+		return opRkAsk(cindex)
+	} else {
+		ret := *reg
+		*reg++
+		context.Code.AddABx(OP_LOADK, ret, cindex, sline(expr))
+		return ret
+	}
+} // }}}
+
+func getIdentRefType(context *funcContext, current *funcContext, expr *ast.IdentExpr) expContextType { // {{{
+	if current == nil {
+		return ecGlobal
+	} else if current.FindLocalVar(expr.Value) > -1 {
+		if current == context {
+			return ecLocal
+		}
+		return ecUpvalue
+	}
+	return getIdentRefType(context, current.Parent, expr)
+} // }}}
+
+func getExprName(context *funcContext, expr ast.Expr) string { // {{{
+	switch ex := expr.(type) {
+	case *ast.IdentExpr:
+		return ex.Value
+	case *ast.AttrGetExpr:
+		switch kex := ex.Key.(type) {
+		case *ast.StringExpr:
+			return kex.Value
+		}
+		return "?"
+	}
+	return "?"
+} // }}}
+
+func patchCode(context *funcContext) { // {{{
+	maxreg := 1
+	if np := int(context.Proto.NumParameters); np > 1 {
+		maxreg = np
+	}
+	moven := 0
+	code := context.Code.List()
+	for pc := 0; pc < len(code); pc++ {
+		inst := code[pc]
+		curop := opGetOpCode(inst)
+		switch curop {
+		case OP_CLOSURE:
+			pc += int(context.Proto.FunctionPrototypes[opGetArgBx(inst)].NumUpvalues)
+			moven = 0
+			continue
+		case OP_SETGLOBAL, OP_SETUPVAL, OP_EQ, OP_LT, OP_LE, OP_TEST,
+			OP_TAILCALL, OP_RETURN, OP_FORPREP, OP_FORLOOP, OP_TFORLOOP,
+			OP_SETLIST, OP_CLOSE:
+			/* nothing to do */
+		case OP_CALL:
+			if reg := opGetArgA(inst) + opGetArgC(inst) - 2; reg > maxreg {
+				maxreg = reg
+			}
+		case OP_VARARG:
+			if reg := opGetArgA(inst) + opGetArgB(inst) - 1; reg > maxreg {
+				maxreg = reg
+			}
+		case OP_SELF:
+			if reg := opGetArgA(inst) + 1; reg > maxreg {
+				maxreg = reg
+			}
+		case OP_LOADNIL:
+			if reg := opGetArgB(inst); reg > maxreg {
+				maxreg = reg
+			}
+		case OP_JMP: // jump to jump optimization
+			distance := 0
+			count := 0 // avoiding infinite loops
+			for jmp := inst; opGetOpCode(jmp) == OP_JMP && count < 5; jmp = context.Code.At(pc + distance + 1) {
+				d := context.GetLabelPc(opGetArgSbx(jmp)) - pc
+				if d > opMaxArgSbx {
+					if distance == 0 {
+						raiseCompileError(context, context.Proto.LineDefined, "too long to jump.")
+					}
+					break
+				}
+				distance = d
+				count++
+			}
+			if distance == 0 {
+				context.Code.SetOpCode(pc, OP_NOP)
+			} else {
+				context.Code.SetSbx(pc, distance)
+			}
+		default:
+			if reg := opGetArgA(inst); reg > maxreg {
+				maxreg = reg
+			}
+		}
+
+		// bulk move optimization(reducing op dipatch costs)
+		if curop == OP_MOVE {
+			moven++
+		} else {
+			if moven > 1 {
+				context.Code.SetOpCode(pc-moven, OP_MOVEN)
+				context.Code.SetC(pc-moven, intMin(moven-1, opMaxArgsC))
+			}
+			moven = 0
+		}
+	}
+	maxreg++
+	if maxreg > maxRegisters {
+		raiseCompileError(context, context.Proto.LineDefined, "register overflow(too many local variables)")
+	}
+	context.Proto.NumUsedRegisters = uint8(maxreg)
+} // }}}
+
+func Compile(chunk []ast.Stmt, name string) (proto *FunctionProto, err error) { // {{{
+	defer func() {
+		if rcv := recover(); rcv != nil {
+			if _, ok := rcv.(*CompileError); ok {
+				err = rcv.(error)
+			} else {
+				panic(rcv)
+			}
+		}
+	}()
+	err = nil
+	parlist := &ast.ParList{HasVargs: true, Names: []string{}}
+	funcexpr := &ast.FunctionExpr{ParList: parlist, Stmts: chunk}
+	context := newFuncContext(name, nil)
+	compileFunctionExpr(context, funcexpr, ecnone(0))
+	proto = context.Proto
+	return
+} // }}}
diff --git a/vendor/github.com/yuin/gopher-lua/config.go b/vendor/github.com/yuin/gopher-lua/config.go
new file mode 100644
index 0000000000000..f58b59393a5c3
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/config.go
@@ -0,0 +1,36 @@
+package lua
+
+import (
+	"os"
+)
+
+var CompatVarArg = true
+var FieldsPerFlush = 50
+var RegistrySize = 256 * 20
+var RegistryGrowStep = 32
+var CallStackSize = 256
+var MaxTableGetLoop = 100
+var MaxArrayIndex = 67108864
+
+type LNumber float64
+
+const LNumberBit = 64
+const LNumberScanFormat = "%f"
+const LuaVersion = "Lua 5.1"
+
+var LuaPath = "LUA_PATH"
+var LuaLDir string
+var LuaPathDefault string
+var LuaOS string
+
+func init() {
+	if os.PathSeparator == '/' { // unix-like
+		LuaOS = "unix"
+		LuaLDir = "/usr/local/share/lua/5.1"
+		LuaPathDefault = "./?.lua;" + LuaLDir + "/?.lua;" + LuaLDir + "/?/init.lua"
+	} else { // windows
+		LuaOS = "windows"
+		LuaLDir = "!\\lua"
+		LuaPathDefault = ".\\?.lua;" + LuaLDir + "\\?.lua;" + LuaLDir + "\\?\\init.lua"
+	}
+}
diff --git a/vendor/github.com/yuin/gopher-lua/coroutinelib.go b/vendor/github.com/yuin/gopher-lua/coroutinelib.go
new file mode 100644
index 0000000000000..d42c41a1dfc57
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/coroutinelib.go
@@ -0,0 +1,112 @@
+package lua
+
+func OpenCoroutine(L *LState) int {
+	// TODO: Tie module name to contents of linit.go?
+	mod := L.RegisterModule(CoroutineLibName, coFuncs)
+	L.Push(mod)
+	return 1
+}
+
+var coFuncs = map[string]LGFunction{
+	"create":  coCreate,
+	"yield":   coYield,
+	"resume":  coResume,
+	"running": coRunning,
+	"status":  coStatus,
+	"wrap":    coWrap,
+}
+
+func coCreate(L *LState) int {
+	fn := L.CheckFunction(1)
+	newthread, _ := L.NewThread()
+	base := 0
+	newthread.stack.Push(callFrame{
+		Fn:         fn,
+		Pc:         0,
+		Base:       base,
+		LocalBase:  base + 1,
+		ReturnBase: base,
+		NArgs:      0,
+		NRet:       MultRet,
+		Parent:     nil,
+		TailCall:   0,
+	})
+	L.Push(newthread)
+	return 1
+}
+
+func coYield(L *LState) int {
+	return -1
+}
+
+func coResume(L *LState) int {
+	th := L.CheckThread(1)
+	if L.G.CurrentThread == th {
+		msg := "can not resume a running thread"
+		if th.wrapped {
+			L.RaiseError(msg)
+			return 0
+		}
+		L.Push(LFalse)
+		L.Push(LString(msg))
+		return 2
+	}
+	if th.Dead {
+		msg := "can not resume a dead thread"
+		if th.wrapped {
+			L.RaiseError(msg)
+			return 0
+		}
+		L.Push(LFalse)
+		L.Push(LString(msg))
+		return 2
+	}
+	th.Parent = L
+	L.G.CurrentThread = th
+	if !th.isStarted() {
+		cf := th.stack.Last()
+		th.currentFrame = cf
+		th.SetTop(0)
+		nargs := L.GetTop() - 1
+		L.XMoveTo(th, nargs)
+		cf.NArgs = nargs
+		th.initCallFrame(cf)
+		th.Panic = panicWithoutTraceback
+	} else {
+		nargs := L.GetTop() - 1
+		L.XMoveTo(th, nargs)
+	}
+	top := L.GetTop()
+	threadRun(th)
+	return L.GetTop() - top
+}
+
+func coRunning(L *LState) int {
+	if L.G.MainThread == L {
+		L.Push(LNil)
+		return 1
+	}
+	L.Push(L.G.CurrentThread)
+	return 1
+}
+
+func coStatus(L *LState) int {
+	L.Push(LString(L.Status(L.CheckThread(1))))
+	return 1
+}
+
+func wrapaux(L *LState) int {
+	L.Insert(L.ToThread(UpvalueIndex(1)), 1)
+	return coResume(L)
+}
+
+func coWrap(L *LState) int {
+	coCreate(L)
+	L.CheckThread(L.GetTop()).wrapped = true
+	v := L.Get(L.GetTop())
+	L.Pop(1)
+	L.Push(L.NewClosure(wrapaux, v))
+	return 1
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/debuglib.go b/vendor/github.com/yuin/gopher-lua/debuglib.go
new file mode 100644
index 0000000000000..41f883f1d067c
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/debuglib.go
@@ -0,0 +1,173 @@
+package lua
+
+import (
+	"fmt"
+	"strings"
+)
+
+func OpenDebug(L *LState) int {
+	dbgmod := L.RegisterModule(DebugLibName, debugFuncs)
+	L.Push(dbgmod)
+	return 1
+}
+
+var debugFuncs = map[string]LGFunction{
+	"getfenv":      debugGetFEnv,
+	"getinfo":      debugGetInfo,
+	"getlocal":     debugGetLocal,
+	"getmetatable": debugGetMetatable,
+	"getupvalue":   debugGetUpvalue,
+	"setfenv":      debugSetFEnv,
+	"setlocal":     debugSetLocal,
+	"setmetatable": debugSetMetatable,
+	"setupvalue":   debugSetUpvalue,
+	"traceback":    debugTraceback,
+}
+
+func debugGetFEnv(L *LState) int {
+	L.Push(L.GetFEnv(L.CheckAny(1)))
+	return 1
+}
+
+func debugGetInfo(L *LState) int {
+	L.CheckTypes(1, LTFunction, LTNumber)
+	arg1 := L.Get(1)
+	what := L.OptString(2, "Slunf")
+	var dbg *Debug
+	var fn LValue
+	var err error
+	var ok bool
+	switch lv := arg1.(type) {
+	case *LFunction:
+		dbg = &Debug{}
+		fn, err = L.GetInfo(">"+what, dbg, lv)
+	case LNumber:
+		dbg, ok = L.GetStack(int(lv))
+		if !ok {
+			L.Push(LNil)
+			return 1
+		}
+		fn, err = L.GetInfo(what, dbg, LNil)
+	}
+
+	if err != nil {
+		L.Push(LNil)
+		return 1
+	}
+	tbl := L.NewTable()
+	if len(dbg.Name) > 0 {
+		tbl.RawSetString("name", LString(dbg.Name))
+	} else {
+		tbl.RawSetString("name", LNil)
+	}
+	tbl.RawSetString("what", LString(dbg.What))
+	tbl.RawSetString("source", LString(dbg.Source))
+	tbl.RawSetString("currentline", LNumber(dbg.CurrentLine))
+	tbl.RawSetString("nups", LNumber(dbg.NUpvalues))
+	tbl.RawSetString("linedefined", LNumber(dbg.LineDefined))
+	tbl.RawSetString("lastlinedefined", LNumber(dbg.LastLineDefined))
+	tbl.RawSetString("func", fn)
+	L.Push(tbl)
+	return 1
+}
+
+func debugGetLocal(L *LState) int {
+	level := L.CheckInt(1)
+	idx := L.CheckInt(2)
+	dbg, ok := L.GetStack(level)
+	if !ok {
+		L.ArgError(1, "level out of range")
+	}
+	name, value := L.GetLocal(dbg, idx)
+	if len(name) > 0 {
+		L.Push(LString(name))
+		L.Push(value)
+		return 2
+	}
+	L.Push(LNil)
+	return 1
+}
+
+func debugGetMetatable(L *LState) int {
+	L.Push(L.GetMetatable(L.CheckAny(1)))
+	return 1
+}
+
+func debugGetUpvalue(L *LState) int {
+	fn := L.CheckFunction(1)
+	idx := L.CheckInt(2)
+	name, value := L.GetUpvalue(fn, idx)
+	if len(name) > 0 {
+		L.Push(LString(name))
+		L.Push(value)
+		return 2
+	}
+	L.Push(LNil)
+	return 1
+}
+
+func debugSetFEnv(L *LState) int {
+	L.SetFEnv(L.CheckAny(1), L.CheckAny(2))
+	return 0
+}
+
+func debugSetLocal(L *LState) int {
+	level := L.CheckInt(1)
+	idx := L.CheckInt(2)
+	value := L.CheckAny(3)
+	dbg, ok := L.GetStack(level)
+	if !ok {
+		L.ArgError(1, "level out of range")
+	}
+	name := L.SetLocal(dbg, idx, value)
+	if len(name) > 0 {
+		L.Push(LString(name))
+	} else {
+		L.Push(LNil)
+	}
+	return 1
+}
+
+func debugSetMetatable(L *LState) int {
+	L.CheckTypes(2, LTNil, LTTable)
+	obj := L.Get(1)
+	mt := L.Get(2)
+	L.SetMetatable(obj, mt)
+	L.SetTop(1)
+	return 1
+}
+
+func debugSetUpvalue(L *LState) int {
+	fn := L.CheckFunction(1)
+	idx := L.CheckInt(2)
+	value := L.CheckAny(3)
+	name := L.SetUpvalue(fn, idx, value)
+	if len(name) > 0 {
+		L.Push(LString(name))
+	} else {
+		L.Push(LNil)
+	}
+	return 1
+}
+
+func debugTraceback(L *LState) int {
+	msg := ""
+	level := L.OptInt(2, 1)
+	ls := L
+	if L.GetTop() > 0 {
+		if s, ok := L.Get(1).assertString(); ok {
+			msg = s
+		}
+		if l, ok := L.Get(1).(*LState); ok {
+			ls = l
+			msg = ""
+		}
+	}
+
+	traceback := strings.TrimSpace(ls.stackTrace(level))
+	if len(msg) > 0 {
+		traceback = fmt.Sprintf("%s\n%s", msg, traceback)
+	}
+	L.Push(LString(traceback))
+	return 1
+}
diff --git a/vendor/github.com/yuin/gopher-lua/function.go b/vendor/github.com/yuin/gopher-lua/function.go
new file mode 100644
index 0000000000000..169e5407cac47
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/function.go
@@ -0,0 +1,193 @@
+package lua
+
+import (
+	"fmt"
+	"strings"
+)
+
+const (
+	VarArgHasArg   uint8 = 1
+	VarArgIsVarArg uint8 = 2
+	VarArgNeedsArg uint8 = 4
+)
+
+type DbgLocalInfo struct {
+	Name    string
+	StartPc int
+	EndPc   int
+}
+
+type DbgCall struct {
+	Name string
+	Pc   int
+}
+
+type FunctionProto struct {
+	SourceName         string
+	LineDefined        int
+	LastLineDefined    int
+	NumUpvalues        uint8
+	NumParameters      uint8
+	IsVarArg           uint8
+	NumUsedRegisters   uint8
+	Code               []uint32
+	Constants          []LValue
+	FunctionPrototypes []*FunctionProto
+
+	DbgSourcePositions []int
+	DbgLocals          []*DbgLocalInfo
+	DbgCalls           []DbgCall
+	DbgUpvalues        []string
+
+	stringConstants []string
+}
+
+/* Upvalue {{{ */
+
+type Upvalue struct {
+	next   *Upvalue
+	reg    *registry
+	index  int
+	value  LValue
+	closed bool
+}
+
+func (uv *Upvalue) Value() LValue {
+	//if uv.IsClosed() {
+	if uv.closed || uv.reg == nil {
+		return uv.value
+	}
+	//return uv.reg.Get(uv.index)
+	return uv.reg.array[uv.index]
+}
+
+func (uv *Upvalue) SetValue(value LValue) {
+	if uv.IsClosed() {
+		uv.value = value
+	} else {
+		uv.reg.Set(uv.index, value)
+	}
+}
+
+func (uv *Upvalue) Close() {
+	value := uv.Value()
+	uv.closed = true
+	uv.value = value
+}
+
+func (uv *Upvalue) IsClosed() bool {
+	return uv.closed || uv.reg == nil
+}
+
+func UpvalueIndex(i int) int {
+	return GlobalsIndex - i
+}
+
+/* }}} */
+
+/* FunctionProto {{{ */
+
+func newFunctionProto(name string) *FunctionProto {
+	return &FunctionProto{
+		SourceName:         name,
+		LineDefined:        0,
+		LastLineDefined:    0,
+		NumUpvalues:        0,
+		NumParameters:      0,
+		IsVarArg:           0,
+		NumUsedRegisters:   2,
+		Code:               make([]uint32, 0, 128),
+		Constants:          make([]LValue, 0, 32),
+		FunctionPrototypes: make([]*FunctionProto, 0, 16),
+
+		DbgSourcePositions: make([]int, 0, 128),
+		DbgLocals:          make([]*DbgLocalInfo, 0, 16),
+		DbgCalls:           make([]DbgCall, 0, 128),
+		DbgUpvalues:        make([]string, 0, 16),
+
+		stringConstants: make([]string, 0, 32),
+	}
+}
+
+func (fp *FunctionProto) String() string {
+	return fp.str(1, 0)
+}
+
+func (fp *FunctionProto) str(level int, count int) string {
+	indent := strings.Repeat("  ", level-1)
+	buf := []string{}
+	buf = append(buf, fmt.Sprintf("%v; function [%v] definition (level %v)\n",
+		indent, count, level))
+	buf = append(buf, fmt.Sprintf("%v; %v upvalues, %v params, %v stacks\n",
+		indent, fp.NumUpvalues, fp.NumParameters, fp.NumUsedRegisters))
+	for reg, linfo := range fp.DbgLocals {
+		buf = append(buf, fmt.Sprintf("%v.local %v ; %v\n", indent, linfo.Name, reg))
+	}
+	for reg, upvalue := range fp.DbgUpvalues {
+		buf = append(buf, fmt.Sprintf("%v.upvalue %v ; %v\n", indent, upvalue, reg))
+	}
+	for reg, conzt := range fp.Constants {
+		buf = append(buf, fmt.Sprintf("%v.const %v ; %v\n", indent, conzt.String(), reg))
+	}
+	buf = append(buf, "\n")
+
+	protono := 0
+	for no, code := range fp.Code {
+		inst := opGetOpCode(code)
+		if inst == OP_CLOSURE {
+			buf = append(buf, "\n")
+			buf = append(buf, fp.FunctionPrototypes[protono].str(level+1, protono))
+			buf = append(buf, "\n")
+			protono++
+		}
+		buf = append(buf, fmt.Sprintf("%v[%03d] %v (line:%v)\n",
+			indent, no+1, opToString(code), fp.DbgSourcePositions[no]))
+
+	}
+	buf = append(buf, fmt.Sprintf("%v; end of function\n", indent))
+	return strings.Join(buf, "")
+}
+
+/* }}} */
+
+/* LFunction {{{ */
+
+func newLFunctionL(proto *FunctionProto, env *LTable, nupvalue int) *LFunction {
+	return &LFunction{
+		IsG: false,
+		Env: env,
+
+		Proto:     proto,
+		GFunction: nil,
+		Upvalues:  make([]*Upvalue, nupvalue),
+	}
+}
+
+func newLFunctionG(gfunc LGFunction, env *LTable, nupvalue int) *LFunction {
+	return &LFunction{
+		IsG: true,
+		Env: env,
+
+		Proto:     nil,
+		GFunction: gfunc,
+		Upvalues:  make([]*Upvalue, nupvalue),
+	}
+}
+
+func (fn *LFunction) LocalName(regno, pc int) (string, bool) {
+	if fn.IsG {
+		return "", false
+	}
+	p := fn.Proto
+	for i := 0; i < len(p.DbgLocals) && p.DbgLocals[i].StartPc < pc; i++ {
+		if pc < p.DbgLocals[i].EndPc {
+			regno--
+			if regno == 0 {
+				return p.DbgLocals[i].Name, true
+			}
+		}
+	}
+	return "", false
+}
+
+/* }}} */
diff --git a/vendor/github.com/yuin/gopher-lua/go.mod b/vendor/github.com/yuin/gopher-lua/go.mod
new file mode 100644
index 0000000000000..a94de6e20362e
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/go.mod
@@ -0,0 +1,10 @@
+module github.com/yuin/gopher-lua
+
+go 1.14
+
+require (
+	github.com/chzyer/logex v1.1.10 // indirect
+	github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e
+	github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 // indirect
+	golang.org/x/sys v0.0.0-20190204203706-41f3e6584952 // indirect
+)
diff --git a/vendor/github.com/yuin/gopher-lua/go.sum b/vendor/github.com/yuin/gopher-lua/go.sum
new file mode 100644
index 0000000000000..ca60bd9c3df58
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/go.sum
@@ -0,0 +1,8 @@
+github.com/chzyer/logex v1.1.10 h1:Swpa1K6QvQznwJRcfTfQJmTE72DqScAa40E+fbHEXEE=
+github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e h1:fY5BOSpyZCqRo5OhCuC+XN+r/bBCmeuuJtjz+bCNIf8=
+github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1 h1:q763qf9huN11kDQavWsoZXJNW3xEE4JJyHa5Q25/sd8=
+github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
+golang.org/x/sys v0.0.0-20190204203706-41f3e6584952 h1:FDfvYgoVsA7TTZSbgiqjAbfPbK47CNHdWl3h/PJtii0=
+golang.org/x/sys v0.0.0-20190204203706-41f3e6584952/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
diff --git a/vendor/github.com/yuin/gopher-lua/iolib.go b/vendor/github.com/yuin/gopher-lua/iolib.go
new file mode 100644
index 0000000000000..4a86f89362d07
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/iolib.go
@@ -0,0 +1,746 @@
+package lua
+
+import (
+	"bufio"
+	"errors"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"syscall"
+)
+
+var ioFuncs = map[string]LGFunction{
+	"close":   ioClose,
+	"flush":   ioFlush,
+	"lines":   ioLines,
+	"input":   ioInput,
+	"output":  ioOutput,
+	"open":    ioOpenFile,
+	"popen":   ioPopen,
+	"read":    ioRead,
+	"type":    ioType,
+	"tmpfile": ioTmpFile,
+	"write":   ioWrite,
+}
+
+const lFileClass = "FILE*"
+
+type lFile struct {
+	fp     *os.File
+	pp     *exec.Cmd
+	writer io.Writer
+	reader *bufio.Reader
+	stdout io.ReadCloser
+	closed bool
+}
+
+type lFileType int
+
+const (
+	lFileFile lFileType = iota
+	lFileProcess
+)
+
+const fileDefOutIndex = 1
+const fileDefInIndex = 2
+const fileDefaultWriteBuffer = 4096
+const fileDefaultReadBuffer = 4096
+
+func checkFile(L *LState) *lFile {
+	ud := L.CheckUserData(1)
+	if file, ok := ud.Value.(*lFile); ok {
+		return file
+	}
+	L.ArgError(1, "file expected")
+	return nil
+}
+
+func errorIfFileIsClosed(L *LState, file *lFile) {
+	if file.closed {
+		L.ArgError(1, "file is closed")
+	}
+}
+
+func newFile(L *LState, file *os.File, path string, flag int, perm os.FileMode, writable, readable bool) (*LUserData, error) {
+	ud := L.NewUserData()
+	var err error
+	if file == nil {
+		file, err = os.OpenFile(path, flag, perm)
+		if err != nil {
+			return nil, err
+		}
+	}
+	lfile := &lFile{fp: file, pp: nil, writer: nil, reader: nil, stdout: nil, closed: false}
+	ud.Value = lfile
+	if writable {
+		lfile.writer = file
+	}
+	if readable {
+		lfile.reader = bufio.NewReaderSize(file, fileDefaultReadBuffer)
+	}
+	L.SetMetatable(ud, L.GetTypeMetatable(lFileClass))
+	return ud, nil
+}
+
+func newProcess(L *LState, cmd string, writable, readable bool) (*LUserData, error) {
+	ud := L.NewUserData()
+	c, args := popenArgs(cmd)
+	pp := exec.Command(c, args...)
+	lfile := &lFile{fp: nil, pp: pp, writer: nil, reader: nil, stdout: nil, closed: false}
+	ud.Value = lfile
+
+	var err error
+	if writable {
+		lfile.writer, err = pp.StdinPipe()
+	}
+	if readable {
+		lfile.stdout, err = pp.StdoutPipe()
+		lfile.reader = bufio.NewReaderSize(lfile.stdout, fileDefaultReadBuffer)
+	}
+	if err != nil {
+		return nil, err
+	}
+	err = pp.Start()
+	if err != nil {
+		return nil, err
+	}
+
+	L.SetMetatable(ud, L.GetTypeMetatable(lFileClass))
+	return ud, nil
+}
+
+func (file *lFile) Type() lFileType {
+	if file.fp == nil {
+		return lFileProcess
+	}
+	return lFileFile
+}
+
+func (file *lFile) Name() string {
+	switch file.Type() {
+	case lFileFile:
+		return fmt.Sprintf("file %s", file.fp.Name())
+	case lFileProcess:
+		return fmt.Sprintf("process %s", file.pp.Path)
+	}
+	return ""
+}
+
+func (file *lFile) AbandonReadBuffer() error {
+	if file.Type() == lFileFile && file.reader != nil {
+		_, err := file.fp.Seek(-int64(file.reader.Buffered()), 1)
+		if err != nil {
+			return err
+		}
+		file.reader = bufio.NewReaderSize(file.fp, fileDefaultReadBuffer)
+	}
+	return nil
+}
+
+func fileDefOut(L *LState) *LUserData {
+	return L.Get(UpvalueIndex(1)).(*LTable).RawGetInt(fileDefOutIndex).(*LUserData)
+}
+
+func fileDefIn(L *LState) *LUserData {
+	return L.Get(UpvalueIndex(1)).(*LTable).RawGetInt(fileDefInIndex).(*LUserData)
+}
+
+func fileIsWritable(L *LState, file *lFile) int {
+	if file.writer == nil {
+		L.Push(LNil)
+		L.Push(LString(fmt.Sprintf("%s is opened for only reading.", file.Name())))
+		L.Push(LNumber(1)) // C-Lua compatibility: Original Lua pushes errno to the stack
+		return 3
+	}
+	return 0
+}
+
+func fileIsReadable(L *LState, file *lFile) int {
+	if file.reader == nil {
+		L.Push(LNil)
+		L.Push(LString(fmt.Sprintf("%s is opened for only writing.", file.Name())))
+		L.Push(LNumber(1)) // C-Lua compatibility: Original Lua pushes errno to the stack
+		return 3
+	}
+	return 0
+}
+
+var stdFiles = []struct {
+	name     string
+	file     *os.File
+	writable bool
+	readable bool
+}{
+	{"stdout", os.Stdout, true, false},
+	{"stdin", os.Stdin, false, true},
+	{"stderr", os.Stderr, true, false},
+}
+
+func OpenIo(L *LState) int {
+	mod := L.RegisterModule(IoLibName, map[string]LGFunction{}).(*LTable)
+	mt := L.NewTypeMetatable(lFileClass)
+	mt.RawSetString("__index", mt)
+	L.SetFuncs(mt, fileMethods)
+	mt.RawSetString("lines", L.NewClosure(fileLines, L.NewFunction(fileLinesIter)))
+
+	for _, finfo := range stdFiles {
+		file, _ := newFile(L, finfo.file, "", 0, os.FileMode(0), finfo.writable, finfo.readable)
+		mod.RawSetString(finfo.name, file)
+	}
+	uv := L.CreateTable(2, 0)
+	uv.RawSetInt(fileDefOutIndex, mod.RawGetString("stdout"))
+	uv.RawSetInt(fileDefInIndex, mod.RawGetString("stdin"))
+	for name, fn := range ioFuncs {
+		mod.RawSetString(name, L.NewClosure(fn, uv))
+	}
+	mod.RawSetString("lines", L.NewClosure(ioLines, uv, L.NewClosure(ioLinesIter, uv)))
+	// Modifications are being made in-place rather than returned?
+	L.Push(mod)
+	return 1
+}
+
+var fileMethods = map[string]LGFunction{
+	"__tostring": fileToString,
+	"write":      fileWrite,
+	"close":      fileClose,
+	"flush":      fileFlush,
+	"lines":      fileLines,
+	"read":       fileRead,
+	"seek":       fileSeek,
+	"setvbuf":    fileSetVBuf,
+}
+
+func fileToString(L *LState) int {
+	file := checkFile(L)
+	if file.Type() == lFileFile {
+		if file.closed {
+			L.Push(LString("file (closed)"))
+		} else {
+			L.Push(LString("file"))
+		}
+	} else {
+		if file.closed {
+			L.Push(LString("process (closed)"))
+		} else {
+			L.Push(LString("process"))
+		}
+	}
+	return 1
+}
+
+func fileWriteAux(L *LState, file *lFile, idx int) int {
+	if n := fileIsWritable(L, file); n != 0 {
+		return n
+	}
+	errorIfFileIsClosed(L, file)
+	top := L.GetTop()
+	out := file.writer
+	var err error
+	for i := idx; i <= top; i++ {
+		L.CheckTypes(i, LTNumber, LTString)
+		s := LVAsString(L.Get(i))
+		if _, err = out.Write(unsafeFastStringToReadOnlyBytes(s)); err != nil {
+			goto errreturn
+		}
+	}
+
+	file.AbandonReadBuffer()
+	L.Push(LTrue)
+	return 1
+errreturn:
+
+	file.AbandonReadBuffer()
+	L.Push(LNil)
+	L.Push(LString(err.Error()))
+	L.Push(LNumber(1)) // C-Lua compatibility: Original Lua pushes errno to the stack
+	return 3
+}
+
+func fileCloseAux(L *LState, file *lFile) int {
+	file.closed = true
+	var err error
+	if file.writer != nil {
+		if bwriter, ok := file.writer.(*bufio.Writer); ok {
+			if err = bwriter.Flush(); err != nil {
+				goto errreturn
+			}
+		}
+	}
+	file.AbandonReadBuffer()
+
+	switch file.Type() {
+	case lFileFile:
+		if err = file.fp.Close(); err != nil {
+			goto errreturn
+		}
+		L.Push(LTrue)
+		return 1
+	case lFileProcess:
+		if file.stdout != nil {
+			file.stdout.Close() // ignore errors
+		}
+		err = file.pp.Wait()
+		var exitStatus int // Initialised to zero value = 0
+		if err != nil {
+			if e2, ok := err.(*exec.ExitError); ok {
+				if s, ok := e2.Sys().(syscall.WaitStatus); ok {
+					exitStatus = s.ExitStatus()
+				} else {
+					err = errors.New("Unimplemented for system where exec.ExitError.Sys() is not syscall.WaitStatus.")
+				}
+			}
+		} else {
+			exitStatus = 0
+		}
+		L.Push(LNumber(exitStatus))
+		return 1
+	}
+
+errreturn:
+	L.RaiseError(err.Error())
+	return 0
+}
+
+func fileFlushAux(L *LState, file *lFile) int {
+	if n := fileIsWritable(L, file); n != 0 {
+		return n
+	}
+	errorIfFileIsClosed(L, file)
+
+	if bwriter, ok := file.writer.(*bufio.Writer); ok {
+		if err := bwriter.Flush(); err != nil {
+			L.Push(LNil)
+			L.Push(LString(err.Error()))
+			return 2
+		}
+	}
+	L.Push(LTrue)
+	return 1
+}
+
+func fileReadAux(L *LState, file *lFile, idx int) int {
+	if n := fileIsReadable(L, file); n != 0 {
+		return n
+	}
+	errorIfFileIsClosed(L, file)
+	if L.GetTop() == idx-1 {
+		L.Push(LString("*l"))
+	}
+	var err error
+	top := L.GetTop()
+	for i := idx; i <= top; i++ {
+		switch lv := L.Get(i).(type) {
+		case LNumber:
+			size := int64(lv)
+			if size == 0 {
+				_, err = file.reader.ReadByte()
+				if err == io.EOF {
+					L.Push(LNil)
+					goto normalreturn
+				}
+				file.reader.UnreadByte()
+			}
+			var buf []byte
+			var iseof bool
+			buf, err, iseof = readBufioSize(file.reader, size)
+			if iseof {
+				L.Push(LNil)
+				goto normalreturn
+			}
+			if err != nil {
+				goto errreturn
+			}
+			L.Push(LString(string(buf)))
+		case LString:
+			options := L.CheckString(i)
+			if len(options) > 0 && options[0] != '*' {
+				L.ArgError(2, "invalid options:"+options)
+			}
+			for _, opt := range options[1:] {
+				switch opt {
+				case 'n':
+					var v LNumber
+					_, err = fmt.Fscanf(file.reader, LNumberScanFormat, &v)
+					if err == io.EOF {
+						L.Push(LNil)
+						goto normalreturn
+					}
+					if err != nil {
+						goto errreturn
+					}
+					L.Push(v)
+				case 'a':
+					var buf []byte
+					buf, err = ioutil.ReadAll(file.reader)
+					if err == io.EOF {
+						L.Push(emptyLString)
+						goto normalreturn
+					}
+					if err != nil {
+						goto errreturn
+					}
+					L.Push(LString(string(buf)))
+				case 'l':
+					var buf []byte
+					var iseof bool
+					buf, err, iseof = readBufioLine(file.reader)
+					if iseof {
+						L.Push(LNil)
+						goto normalreturn
+					}
+					if err != nil {
+						goto errreturn
+					}
+					L.Push(LString(string(buf)))
+				default:
+					L.ArgError(2, "invalid options:"+string(opt))
+				}
+			}
+		}
+	}
+normalreturn:
+	return L.GetTop() - top
+
+errreturn:
+	L.RaiseError(err.Error())
+	//L.Push(LNil)
+	//L.Push(LString(err.Error()))
+	return 2
+}
+
+var fileSeekOptions = []string{"set", "cur", "end"}
+
+func fileSeek(L *LState) int {
+	file := checkFile(L)
+	if file.Type() != lFileFile {
+		L.Push(LNil)
+		L.Push(LString("can not seek a process."))
+		return 2
+	}
+
+	top := L.GetTop()
+	if top == 1 {
+		L.Push(LString("cur"))
+		L.Push(LNumber(0))
+	} else if top == 2 {
+		L.Push(LNumber(0))
+	}
+
+	var pos int64
+	var err error
+
+	err = file.AbandonReadBuffer()
+	if err != nil {
+		goto errreturn
+	}
+
+	pos, err = file.fp.Seek(L.CheckInt64(3), L.CheckOption(2, fileSeekOptions))
+	if err != nil {
+		goto errreturn
+	}
+
+	L.Push(LNumber(pos))
+	return 1
+
+errreturn:
+	L.Push(LNil)
+	L.Push(LString(err.Error()))
+	return 2
+}
+
+func fileWrite(L *LState) int {
+	return fileWriteAux(L, checkFile(L), 2)
+}
+
+func fileClose(L *LState) int {
+	return fileCloseAux(L, checkFile(L))
+}
+
+func fileFlush(L *LState) int {
+	return fileFlushAux(L, checkFile(L))
+}
+
+func fileLinesIter(L *LState) int {
+	var file *lFile
+	if ud, ok := L.Get(1).(*LUserData); ok {
+		file = ud.Value.(*lFile)
+	} else {
+		file = L.Get(UpvalueIndex(2)).(*LUserData).Value.(*lFile)
+	}
+	buf, _, err := file.reader.ReadLine()
+	if err != nil {
+		if err == io.EOF {
+			L.Push(LNil)
+			return 1
+		}
+		L.RaiseError(err.Error())
+	}
+	L.Push(LString(string(buf)))
+	return 1
+}
+
+func fileLines(L *LState) int {
+	file := checkFile(L)
+	ud := L.CheckUserData(1)
+	if n := fileIsReadable(L, file); n != 0 {
+		return 0
+	}
+	L.Push(L.NewClosure(fileLinesIter, L.Get(UpvalueIndex(1)), ud))
+	return 1
+}
+
+func fileRead(L *LState) int {
+	return fileReadAux(L, checkFile(L), 2)
+}
+
+var filebufOptions = []string{"no", "full"}
+
+func fileSetVBuf(L *LState) int {
+	var err error
+	var writer io.Writer
+	file := checkFile(L)
+	if n := fileIsWritable(L, file); n != 0 {
+		return n
+	}
+	switch filebufOptions[L.CheckOption(2, filebufOptions)] {
+	case "no":
+		switch file.Type() {
+		case lFileFile:
+			file.writer = file.fp
+		case lFileProcess:
+			file.writer, err = file.pp.StdinPipe()
+			if err != nil {
+				goto errreturn
+			}
+		}
+	case "full", "line": // TODO line buffer not supported
+		bufsize := L.OptInt(3, fileDefaultWriteBuffer)
+		switch file.Type() {
+		case lFileFile:
+			file.writer = bufio.NewWriterSize(file.fp, bufsize)
+		case lFileProcess:
+			writer, err = file.pp.StdinPipe()
+			if err != nil {
+				goto errreturn
+			}
+			file.writer = bufio.NewWriterSize(writer, bufsize)
+		}
+	}
+	L.Push(LTrue)
+	return 1
+errreturn:
+	L.Push(LNil)
+	L.Push(LString(err.Error()))
+	return 2
+}
+
+func ioInput(L *LState) int {
+	if L.GetTop() == 0 {
+		L.Push(fileDefIn(L))
+		return 1
+	}
+	switch lv := L.Get(1).(type) {
+	case LString:
+		file, err := newFile(L, nil, string(lv), os.O_RDONLY, 0600, false, true)
+		if err != nil {
+			L.RaiseError(err.Error())
+		}
+		L.Get(UpvalueIndex(1)).(*LTable).RawSetInt(fileDefInIndex, file)
+		L.Push(file)
+		return 1
+	case *LUserData:
+		if _, ok := lv.Value.(*lFile); ok {
+			L.Get(UpvalueIndex(1)).(*LTable).RawSetInt(fileDefInIndex, lv)
+			L.Push(lv)
+			return 1
+		}
+
+	}
+	L.ArgError(1, "string or file expedted, but got "+L.Get(1).Type().String())
+	return 0
+}
+
+func ioClose(L *LState) int {
+	if L.GetTop() == 0 {
+		return fileCloseAux(L, fileDefOut(L).Value.(*lFile))
+	}
+	return fileClose(L)
+}
+
+func ioFlush(L *LState) int {
+	return fileFlushAux(L, fileDefOut(L).Value.(*lFile))
+}
+
+func ioLinesIter(L *LState) int {
+	var file *lFile
+	toclose := false
+	if ud, ok := L.Get(1).(*LUserData); ok {
+		file = ud.Value.(*lFile)
+	} else {
+		file = L.Get(UpvalueIndex(2)).(*LUserData).Value.(*lFile)
+		toclose = true
+	}
+	buf, _, err := file.reader.ReadLine()
+	if err != nil {
+		if err == io.EOF {
+			if toclose {
+				fileCloseAux(L, file)
+			}
+			L.Push(LNil)
+			return 1
+		}
+		L.RaiseError(err.Error())
+	}
+	L.Push(LString(string(buf)))
+	return 1
+}
+
+func ioLines(L *LState) int {
+	if L.GetTop() == 0 {
+		L.Push(L.Get(UpvalueIndex(2)))
+		L.Push(fileDefIn(L))
+		return 2
+	}
+
+	path := L.CheckString(1)
+	ud, err := newFile(L, nil, path, os.O_RDONLY, os.FileMode(0600), false, true)
+	if err != nil {
+		return 0
+	}
+	L.Push(L.NewClosure(ioLinesIter, L.Get(UpvalueIndex(1)), ud))
+	return 1
+}
+
+var ioOpenOpions = []string{"r", "rb", "w", "wb", "a", "ab", "r+", "rb+", "w+", "wb+", "a+", "ab+"}
+
+func ioOpenFile(L *LState) int {
+	path := L.CheckString(1)
+	if L.GetTop() == 1 {
+		L.Push(LString("r"))
+	}
+	mode := os.O_RDONLY
+	perm := 0600
+	writable := true
+	readable := true
+	switch ioOpenOpions[L.CheckOption(2, ioOpenOpions)] {
+	case "r", "rb":
+		mode = os.O_RDONLY
+		writable = false
+	case "w", "wb":
+		mode = os.O_WRONLY | os.O_CREATE
+		readable = false
+	case "a", "ab":
+		mode = os.O_WRONLY | os.O_APPEND | os.O_CREATE
+	case "r+", "rb+":
+		mode = os.O_RDWR
+	case "w+", "wb+":
+		mode = os.O_RDWR | os.O_TRUNC | os.O_CREATE
+	case "a+", "ab+":
+		mode = os.O_APPEND | os.O_RDWR | os.O_CREATE
+	}
+	file, err := newFile(L, nil, path, mode, os.FileMode(perm), writable, readable)
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		L.Push(LNumber(1)) // C-Lua compatibility: Original Lua pushes errno to the stack
+		return 3
+	}
+	L.Push(file)
+	return 1
+
+}
+
+var ioPopenOptions = []string{"r", "w"}
+
+func ioPopen(L *LState) int {
+	cmd := L.CheckString(1)
+	if L.GetTop() == 1 {
+		L.Push(LString("r"))
+	}
+	var file *LUserData
+	var err error
+
+	switch ioPopenOptions[L.CheckOption(2, ioPopenOptions)] {
+	case "r":
+		file, err = newProcess(L, cmd, false, true)
+	case "w":
+		file, err = newProcess(L, cmd, true, false)
+	}
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	}
+	L.Push(file)
+	return 1
+}
+
+func ioRead(L *LState) int {
+	return fileReadAux(L, fileDefIn(L).Value.(*lFile), 1)
+}
+
+func ioType(L *LState) int {
+	ud, udok := L.Get(1).(*LUserData)
+	if !udok {
+		L.Push(LNil)
+		return 1
+	}
+	file, ok := ud.Value.(*lFile)
+	if !ok {
+		L.Push(LNil)
+		return 1
+	}
+	if file.closed {
+		L.Push(LString("closed file"))
+		return 1
+	}
+	L.Push(LString("file"))
+	return 1
+}
+
+func ioTmpFile(L *LState) int {
+	file, err := ioutil.TempFile("", "")
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	}
+	L.G.tempFiles = append(L.G.tempFiles, file)
+	ud, _ := newFile(L, file, "", 0, os.FileMode(0), true, true)
+	L.Push(ud)
+	return 1
+}
+
+func ioOutput(L *LState) int {
+	if L.GetTop() == 0 {
+		L.Push(fileDefOut(L))
+		return 1
+	}
+	switch lv := L.Get(1).(type) {
+	case LString:
+		file, err := newFile(L, nil, string(lv), os.O_WRONLY|os.O_CREATE, 0600, true, false)
+		if err != nil {
+			L.RaiseError(err.Error())
+		}
+		L.Get(UpvalueIndex(1)).(*LTable).RawSetInt(fileDefOutIndex, file)
+		L.Push(file)
+		return 1
+	case *LUserData:
+		if _, ok := lv.Value.(*lFile); ok {
+			L.Get(UpvalueIndex(1)).(*LTable).RawSetInt(fileDefOutIndex, lv)
+			L.Push(lv)
+			return 1
+		}
+
+	}
+	L.ArgError(1, "string or file expedted, but got "+L.Get(1).Type().String())
+	return 0
+}
+
+func ioWrite(L *LState) int {
+	return fileWriteAux(L, fileDefOut(L).Value.(*lFile), 1)
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/linit.go b/vendor/github.com/yuin/gopher-lua/linit.go
new file mode 100644
index 0000000000000..cd96d660151e4
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/linit.go
@@ -0,0 +1,54 @@
+package lua
+
+const (
+	// BaseLibName is here for consistency; the base functions have no namespace/library.
+	BaseLibName = ""
+	// LoadLibName is here for consistency; the loading system has no namespace/library.
+	LoadLibName = "package"
+	// TabLibName is the name of the table Library.
+	TabLibName = "table"
+	// IoLibName is the name of the io Library.
+	IoLibName = "io"
+	// OsLibName is the name of the os Library.
+	OsLibName = "os"
+	// StringLibName is the name of the string Library.
+	StringLibName = "string"
+	// MathLibName is the name of the math Library.
+	MathLibName = "math"
+	// DebugLibName is the name of the debug Library.
+	DebugLibName = "debug"
+	// ChannelLibName is the name of the channel Library.
+	ChannelLibName = "channel"
+	// CoroutineLibName is the name of the coroutine Library.
+	CoroutineLibName = "coroutine"
+)
+
+type luaLib struct {
+	libName string
+	libFunc LGFunction
+}
+
+var luaLibs = []luaLib{
+	luaLib{LoadLibName, OpenPackage},
+	luaLib{BaseLibName, OpenBase},
+	luaLib{TabLibName, OpenTable},
+	luaLib{IoLibName, OpenIo},
+	luaLib{OsLibName, OpenOs},
+	luaLib{StringLibName, OpenString},
+	luaLib{MathLibName, OpenMath},
+	luaLib{DebugLibName, OpenDebug},
+	luaLib{ChannelLibName, OpenChannel},
+	luaLib{CoroutineLibName, OpenCoroutine},
+}
+
+// OpenLibs loads the built-in libraries. It is equivalent to running OpenLoad,
+// then OpenBase, then iterating over the other OpenXXX functions in any order.
+func (ls *LState) OpenLibs() {
+	// NB: Map iteration order in Go is deliberately randomised, so must open Load/Base
+	// prior to iterating.
+	for _, lib := range luaLibs {
+		ls.Push(ls.NewFunction(lib.libFunc))
+		ls.Push(LString(lib.libName))
+		ls.Call(1, 0)
+	}
+}
diff --git a/vendor/github.com/yuin/gopher-lua/loadlib.go b/vendor/github.com/yuin/gopher-lua/loadlib.go
new file mode 100644
index 0000000000000..772bb04ad8800
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/loadlib.go
@@ -0,0 +1,125 @@
+package lua
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+)
+
+/* load lib {{{ */
+
+var loLoaders = []LGFunction{loLoaderPreload, loLoaderLua}
+
+func loGetPath(env string, defpath string) string {
+	path := os.Getenv(env)
+	if len(path) == 0 {
+		path = defpath
+	}
+	path = strings.Replace(path, ";;", ";"+defpath+";", -1)
+	if os.PathSeparator != '/' {
+		dir, err := filepath.Abs(filepath.Dir(os.Args[0]))
+		if err != nil {
+			panic(err)
+		}
+		path = strings.Replace(path, "!", dir, -1)
+	}
+	return path
+}
+
+func loFindFile(L *LState, name, pname string) (string, string) {
+	name = strings.Replace(name, ".", string(os.PathSeparator), -1)
+	lv := L.GetField(L.GetField(L.Get(EnvironIndex), "package"), pname)
+	path, ok := lv.(LString)
+	if !ok {
+		L.RaiseError("package.%s must be a string", pname)
+	}
+	messages := []string{}
+	for _, pattern := range strings.Split(string(path), ";") {
+		luapath := strings.Replace(pattern, "?", name, -1)
+		if _, err := os.Stat(luapath); err == nil {
+			return luapath, ""
+		} else {
+			messages = append(messages, err.Error())
+		}
+	}
+	return "", strings.Join(messages, "\n\t")
+}
+
+func OpenPackage(L *LState) int {
+	packagemod := L.RegisterModule(LoadLibName, loFuncs)
+
+	L.SetField(packagemod, "preload", L.NewTable())
+
+	loaders := L.CreateTable(len(loLoaders), 0)
+	for i, loader := range loLoaders {
+		L.RawSetInt(loaders, i+1, L.NewFunction(loader))
+	}
+	L.SetField(packagemod, "loaders", loaders)
+	L.SetField(L.Get(RegistryIndex), "_LOADERS", loaders)
+
+	loaded := L.NewTable()
+	L.SetField(packagemod, "loaded", loaded)
+	L.SetField(L.Get(RegistryIndex), "_LOADED", loaded)
+
+	L.SetField(packagemod, "path", LString(loGetPath(LuaPath, LuaPathDefault)))
+	L.SetField(packagemod, "cpath", emptyLString)
+
+	L.Push(packagemod)
+	return 1
+}
+
+var loFuncs = map[string]LGFunction{
+	"loadlib": loLoadLib,
+	"seeall":  loSeeAll,
+}
+
+func loLoaderPreload(L *LState) int {
+	name := L.CheckString(1)
+	preload := L.GetField(L.GetField(L.Get(EnvironIndex), "package"), "preload")
+	if _, ok := preload.(*LTable); !ok {
+		L.RaiseError("package.preload must be a table")
+	}
+	lv := L.GetField(preload, name)
+	if lv == LNil {
+		L.Push(LString(fmt.Sprintf("no field package.preload['%s']", name)))
+		return 1
+	}
+	L.Push(lv)
+	return 1
+}
+
+func loLoaderLua(L *LState) int {
+	name := L.CheckString(1)
+	path, msg := loFindFile(L, name, "path")
+	if len(path) == 0 {
+		L.Push(LString(msg))
+		return 1
+	}
+	fn, err1 := L.LoadFile(path)
+	if err1 != nil {
+		L.RaiseError(err1.Error())
+	}
+	L.Push(fn)
+	return 1
+}
+
+func loLoadLib(L *LState) int {
+	L.RaiseError("loadlib is not supported")
+	return 0
+}
+
+func loSeeAll(L *LState) int {
+	mod := L.CheckTable(1)
+	mt := L.GetMetatable(mod)
+	if mt == LNil {
+		mt = L.CreateTable(0, 1)
+		L.SetMetatable(mod, mt)
+	}
+	L.SetField(mt, "__index", L.Get(GlobalsIndex))
+	return 0
+}
+
+/* }}} */
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/mathlib.go b/vendor/github.com/yuin/gopher-lua/mathlib.go
new file mode 100644
index 0000000000000..e612f2f0bf845
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/mathlib.go
@@ -0,0 +1,231 @@
+package lua
+
+import (
+	"math"
+	"math/rand"
+)
+
+func OpenMath(L *LState) int {
+	mod := L.RegisterModule(MathLibName, mathFuncs).(*LTable)
+	mod.RawSetString("pi", LNumber(math.Pi))
+	mod.RawSetString("huge", LNumber(math.MaxFloat64))
+	L.Push(mod)
+	return 1
+}
+
+var mathFuncs = map[string]LGFunction{
+	"abs":        mathAbs,
+	"acos":       mathAcos,
+	"asin":       mathAsin,
+	"atan":       mathAtan,
+	"atan2":      mathAtan2,
+	"ceil":       mathCeil,
+	"cos":        mathCos,
+	"cosh":       mathCosh,
+	"deg":        mathDeg,
+	"exp":        mathExp,
+	"floor":      mathFloor,
+	"fmod":       mathFmod,
+	"frexp":      mathFrexp,
+	"ldexp":      mathLdexp,
+	"log":        mathLog,
+	"log10":      mathLog10,
+	"max":        mathMax,
+	"min":        mathMin,
+	"mod":        mathMod,
+	"modf":       mathModf,
+	"pow":        mathPow,
+	"rad":        mathRad,
+	"random":     mathRandom,
+	"randomseed": mathRandomseed,
+	"sin":        mathSin,
+	"sinh":       mathSinh,
+	"sqrt":       mathSqrt,
+	"tan":        mathTan,
+	"tanh":       mathTanh,
+}
+
+func mathAbs(L *LState) int {
+	L.Push(LNumber(math.Abs(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathAcos(L *LState) int {
+	L.Push(LNumber(math.Acos(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathAsin(L *LState) int {
+	L.Push(LNumber(math.Asin(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathAtan(L *LState) int {
+	L.Push(LNumber(math.Atan(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathAtan2(L *LState) int {
+	L.Push(LNumber(math.Atan2(float64(L.CheckNumber(1)), float64(L.CheckNumber(2)))))
+	return 1
+}
+
+func mathCeil(L *LState) int {
+	L.Push(LNumber(math.Ceil(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathCos(L *LState) int {
+	L.Push(LNumber(math.Cos(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathCosh(L *LState) int {
+	L.Push(LNumber(math.Cosh(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathDeg(L *LState) int {
+	L.Push(LNumber(float64(L.CheckNumber(1)) * 180 / math.Pi))
+	return 1
+}
+
+func mathExp(L *LState) int {
+	L.Push(LNumber(math.Exp(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathFloor(L *LState) int {
+	L.Push(LNumber(math.Floor(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathFmod(L *LState) int {
+	L.Push(LNumber(math.Mod(float64(L.CheckNumber(1)), float64(L.CheckNumber(2)))))
+	return 1
+}
+
+func mathFrexp(L *LState) int {
+	v1, v2 := math.Frexp(float64(L.CheckNumber(1)))
+	L.Push(LNumber(v1))
+	L.Push(LNumber(v2))
+	return 2
+}
+
+func mathLdexp(L *LState) int {
+	L.Push(LNumber(math.Ldexp(float64(L.CheckNumber(1)), L.CheckInt(2))))
+	return 1
+}
+
+func mathLog(L *LState) int {
+	L.Push(LNumber(math.Log(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathLog10(L *LState) int {
+	L.Push(LNumber(math.Log10(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathMax(L *LState) int {
+	if L.GetTop() == 0 {
+		L.RaiseError("wrong number of arguments")
+	}
+	max := L.CheckNumber(1)
+	top := L.GetTop()
+	for i := 2; i <= top; i++ {
+		v := L.CheckNumber(i)
+		if v > max {
+			max = v
+		}
+	}
+	L.Push(max)
+	return 1
+}
+
+func mathMin(L *LState) int {
+	if L.GetTop() == 0 {
+		L.RaiseError("wrong number of arguments")
+	}
+	min := L.CheckNumber(1)
+	top := L.GetTop()
+	for i := 2; i <= top; i++ {
+		v := L.CheckNumber(i)
+		if v < min {
+			min = v
+		}
+	}
+	L.Push(min)
+	return 1
+}
+
+func mathMod(L *LState) int {
+	lhs := L.CheckNumber(1)
+	rhs := L.CheckNumber(2)
+	L.Push(luaModulo(lhs, rhs))
+	return 1
+}
+
+func mathModf(L *LState) int {
+	v1, v2 := math.Modf(float64(L.CheckNumber(1)))
+	L.Push(LNumber(v1))
+	L.Push(LNumber(v2))
+	return 2
+}
+
+func mathPow(L *LState) int {
+	L.Push(LNumber(math.Pow(float64(L.CheckNumber(1)), float64(L.CheckNumber(2)))))
+	return 1
+}
+
+func mathRad(L *LState) int {
+	L.Push(LNumber(float64(L.CheckNumber(1)) * math.Pi / 180))
+	return 1
+}
+
+func mathRandom(L *LState) int {
+	switch L.GetTop() {
+	case 0:
+		L.Push(LNumber(rand.Float64()))
+	case 1:
+		n := L.CheckInt(1)
+		L.Push(LNumber(rand.Intn(n) + 1))
+	default:
+		min := L.CheckInt(1)
+		max := L.CheckInt(2) + 1
+		L.Push(LNumber(rand.Intn(max-min) + min))
+	}
+	return 1
+}
+
+func mathRandomseed(L *LState) int {
+	rand.Seed(L.CheckInt64(1))
+	return 0
+}
+
+func mathSin(L *LState) int {
+	L.Push(LNumber(math.Sin(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathSinh(L *LState) int {
+	L.Push(LNumber(math.Sinh(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathSqrt(L *LState) int {
+	L.Push(LNumber(math.Sqrt(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathTan(L *LState) int {
+	L.Push(LNumber(math.Tan(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+func mathTanh(L *LState) int {
+	L.Push(LNumber(math.Tanh(float64(L.CheckNumber(1)))))
+	return 1
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/opcode.go b/vendor/github.com/yuin/gopher-lua/opcode.go
new file mode 100644
index 0000000000000..91fff1c9b4157
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/opcode.go
@@ -0,0 +1,371 @@
+package lua
+
+import (
+	"fmt"
+)
+
+/*
+  gopherlua uses Lua 5.1.4's opcodes.
+  Lua 5.1.4 opcodes layout:
+
+          instruction = 32bit(fixed length)
+
+  +---------------------------------------------+
+  |0-5(6bits)|6-13(8bit)|14-22(9bit)|23-31(9bit)|
+  |==========+==========+===========+===========|
+  |  opcode  |    A     |     C     |    B      |
+  |----------+----------+-----------+-----------|
+  |  opcode  |    A     |      Bx(unsigned)     |
+  |----------+----------+-----------+-----------|
+  |  opcode  |    A     |      sBx(signed)      |
+  +---------------------------------------------+
+*/
+
+const opInvalidInstruction = ^uint32(0)
+
+const opSizeCode = 6
+const opSizeA = 8
+const opSizeB = 9
+const opSizeC = 9
+const opSizeBx = 18
+const opSizesBx = 18
+
+const opMaxArgsA = (1 << opSizeA) - 1
+const opMaxArgsB = (1 << opSizeB) - 1
+const opMaxArgsC = (1 << opSizeC) - 1
+const opMaxArgBx = (1 << opSizeBx) - 1
+const opMaxArgSbx = opMaxArgBx >> 1
+
+const (
+	OP_MOVE     int = iota /*      A B     R(A) := R(B)                            */
+	OP_MOVEN               /*      A B     R(A) := R(B); followed by R(C) MOVE ops */
+	OP_LOADK               /*     A Bx    R(A) := Kst(Bx)                          */
+	OP_LOADBOOL            /*  A B C   R(A) := (Bool)B; if (C) pc++                */
+	OP_LOADNIL             /*   A B     R(A) := ... := R(B) := nil                 */
+	OP_GETUPVAL            /*  A B     R(A) := UpValue[B]                          */
+
+	OP_GETGLOBAL  /* A Bx    R(A) := Gbl[Kst(Bx)]                            */
+	OP_GETTABLE   /*  A B C   R(A) := R(B)[RK(C)]                             */
+	OP_GETTABLEKS /*  A B C   R(A) := R(B)[RK(C)] ; RK(C) is constant string */
+
+	OP_SETGLOBAL  /* A Bx    Gbl[Kst(Bx)] := R(A)                            */
+	OP_SETUPVAL   /*  A B     UpValue[B] := R(A)                              */
+	OP_SETTABLE   /*  A B C   R(A)[RK(B)] := RK(C)                            */
+	OP_SETTABLEKS /*  A B C   R(A)[RK(B)] := RK(C) ; RK(B) is constant string */
+
+	OP_NEWTABLE /*  A B C   R(A) := {} (size = BC)                         */
+
+	OP_SELF /*      A B C   R(A+1) := R(B); R(A) := R(B)[RK(C)]             */
+
+	OP_ADD /*       A B C   R(A) := RK(B) + RK(C)                           */
+	OP_SUB /*       A B C   R(A) := RK(B) - RK(C)                           */
+	OP_MUL /*       A B C   R(A) := RK(B) * RK(C)                           */
+	OP_DIV /*       A B C   R(A) := RK(B) / RK(C)                           */
+	OP_MOD /*       A B C   R(A) := RK(B) % RK(C)                           */
+	OP_POW /*       A B C   R(A) := RK(B) ^ RK(C)                           */
+	OP_UNM /*       A B     R(A) := -R(B)                                   */
+	OP_NOT /*       A B     R(A) := not R(B)                                */
+	OP_LEN /*       A B     R(A) := length of R(B)                          */
+
+	OP_CONCAT /*    A B C   R(A) := R(B).. ... ..R(C)                       */
+
+	OP_JMP /*       sBx     pc+=sBx                                 */
+
+	OP_EQ /*        A B C   if ((RK(B) == RK(C)) ~= A) then pc++            */
+	OP_LT /*        A B C   if ((RK(B) <  RK(C)) ~= A) then pc++            */
+	OP_LE /*        A B C   if ((RK(B) <= RK(C)) ~= A) then pc++            */
+
+	OP_TEST    /*      A C     if not (R(A) <=> C) then pc++                   */
+	OP_TESTSET /*   A B C   if (R(B) <=> C) then R(A) := R(B) else pc++     */
+
+	OP_CALL     /*      A B C   R(A) ... R(A+C-2) := R(A)(R(A+1) ... R(A+B-1)) */
+	OP_TAILCALL /*  A B C   return R(A)(R(A+1) ... R(A+B-1))              */
+	OP_RETURN   /*    A B     return R(A) ... R(A+B-2)      (see note)      */
+
+	OP_FORLOOP /*   A sBx   R(A)+=R(A+2);
+	     if R(A) <?= R(A+1) then { pc+=sBx; R(A+3)=R(A) }*/
+	OP_FORPREP /*   A sBx   R(A)-=R(A+2); pc+=sBx                           */
+
+	OP_TFORLOOP /*  A C     R(A+3) ... R(A+3+C) := R(A)(R(A+1) R(A+2));
+	    if R(A+3) ~= nil then { pc++; R(A+2)=R(A+3); }  */
+	OP_SETLIST /*   A B C   R(A)[(C-1)*FPF+i] := R(A+i) 1 <= i <= B        */
+
+	OP_CLOSE   /*     A       close all variables in the stack up to (>=) R(A)*/
+	OP_CLOSURE /*   A Bx    R(A) := closure(KPROTO[Bx] R(A) ... R(A+n))  */
+
+	OP_VARARG /*     A B     R(A) R(A+1) ... R(A+B-1) = vararg            */
+
+	OP_NOP /* NOP */
+)
+const opCodeMax = OP_NOP
+
+type opArgMode int
+
+const (
+	opArgModeN opArgMode = iota
+	opArgModeU
+	opArgModeR
+	opArgModeK
+)
+
+type opType int
+
+const (
+	opTypeABC = iota
+	opTypeABx
+	opTypeASbx
+)
+
+type opProp struct {
+	Name     string
+	IsTest   bool
+	SetRegA  bool
+	ModeArgB opArgMode
+	ModeArgC opArgMode
+	Type     opType
+}
+
+var opProps = []opProp{
+	opProp{"MOVE", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"MOVEN", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"LOADK", false, true, opArgModeK, opArgModeN, opTypeABx},
+	opProp{"LOADBOOL", false, true, opArgModeU, opArgModeU, opTypeABC},
+	opProp{"LOADNIL", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"GETUPVAL", false, true, opArgModeU, opArgModeN, opTypeABC},
+	opProp{"GETGLOBAL", false, true, opArgModeK, opArgModeN, opTypeABx},
+	opProp{"GETTABLE", false, true, opArgModeR, opArgModeK, opTypeABC},
+	opProp{"GETTABLEKS", false, true, opArgModeR, opArgModeK, opTypeABC},
+	opProp{"SETGLOBAL", false, false, opArgModeK, opArgModeN, opTypeABx},
+	opProp{"SETUPVAL", false, false, opArgModeU, opArgModeN, opTypeABC},
+	opProp{"SETTABLE", false, false, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"SETTABLEKS", false, false, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"NEWTABLE", false, true, opArgModeU, opArgModeU, opTypeABC},
+	opProp{"SELF", false, true, opArgModeR, opArgModeK, opTypeABC},
+	opProp{"ADD", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"SUB", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"MUL", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"DIV", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"MOD", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"POW", false, true, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"UNM", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"NOT", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"LEN", false, true, opArgModeR, opArgModeN, opTypeABC},
+	opProp{"CONCAT", false, true, opArgModeR, opArgModeR, opTypeABC},
+	opProp{"JMP", false, false, opArgModeR, opArgModeN, opTypeASbx},
+	opProp{"EQ", true, false, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"LT", true, false, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"LE", true, false, opArgModeK, opArgModeK, opTypeABC},
+	opProp{"TEST", true, true, opArgModeR, opArgModeU, opTypeABC},
+	opProp{"TESTSET", true, true, opArgModeR, opArgModeU, opTypeABC},
+	opProp{"CALL", false, true, opArgModeU, opArgModeU, opTypeABC},
+	opProp{"TAILCALL", false, true, opArgModeU, opArgModeU, opTypeABC},
+	opProp{"RETURN", false, false, opArgModeU, opArgModeN, opTypeABC},
+	opProp{"FORLOOP", false, true, opArgModeR, opArgModeN, opTypeASbx},
+	opProp{"FORPREP", false, true, opArgModeR, opArgModeN, opTypeASbx},
+	opProp{"TFORLOOP", true, false, opArgModeN, opArgModeU, opTypeABC},
+	opProp{"SETLIST", false, false, opArgModeU, opArgModeU, opTypeABC},
+	opProp{"CLOSE", false, false, opArgModeN, opArgModeN, opTypeABC},
+	opProp{"CLOSURE", false, true, opArgModeU, opArgModeN, opTypeABx},
+	opProp{"VARARG", false, true, opArgModeU, opArgModeN, opTypeABC},
+	opProp{"NOP", false, false, opArgModeR, opArgModeN, opTypeASbx},
+}
+
+func opGetOpCode(inst uint32) int {
+	return int(inst >> 26)
+}
+
+func opSetOpCode(inst *uint32, opcode int) {
+	*inst = (*inst & 0x3ffffff) | uint32(opcode<<26)
+}
+
+func opGetArgA(inst uint32) int {
+	return int(inst>>18) & 0xff
+}
+
+func opSetArgA(inst *uint32, arg int) {
+	*inst = (*inst & 0xfc03ffff) | uint32((arg&0xff)<<18)
+}
+
+func opGetArgB(inst uint32) int {
+	return int(inst & 0x1ff)
+}
+
+func opSetArgB(inst *uint32, arg int) {
+	*inst = (*inst & 0xfffffe00) | uint32(arg&0x1ff)
+}
+
+func opGetArgC(inst uint32) int {
+	return int(inst>>9) & 0x1ff
+}
+
+func opSetArgC(inst *uint32, arg int) {
+	*inst = (*inst & 0xfffc01ff) | uint32((arg&0x1ff)<<9)
+}
+
+func opGetArgBx(inst uint32) int {
+	return int(inst & 0x3ffff)
+}
+
+func opSetArgBx(inst *uint32, arg int) {
+	*inst = (*inst & 0xfffc0000) | uint32(arg&0x3ffff)
+}
+
+func opGetArgSbx(inst uint32) int {
+	return opGetArgBx(inst) - opMaxArgSbx
+}
+
+func opSetArgSbx(inst *uint32, arg int) {
+	opSetArgBx(inst, arg+opMaxArgSbx)
+}
+
+func opCreateABC(op int, a int, b int, c int) uint32 {
+	var inst uint32 = 0
+	opSetOpCode(&inst, op)
+	opSetArgA(&inst, a)
+	opSetArgB(&inst, b)
+	opSetArgC(&inst, c)
+	return inst
+}
+
+func opCreateABx(op int, a int, bx int) uint32 {
+	var inst uint32 = 0
+	opSetOpCode(&inst, op)
+	opSetArgA(&inst, a)
+	opSetArgBx(&inst, bx)
+	return inst
+}
+
+func opCreateASbx(op int, a int, sbx int) uint32 {
+	var inst uint32 = 0
+	opSetOpCode(&inst, op)
+	opSetArgA(&inst, a)
+	opSetArgSbx(&inst, sbx)
+	return inst
+}
+
+const opBitRk = 1 << (opSizeB - 1)
+const opMaxIndexRk = opBitRk - 1
+
+func opIsK(value int) bool {
+	return bool((value & opBitRk) != 0)
+}
+
+func opIndexK(value int) int {
+	return value & ^opBitRk
+}
+
+func opRkAsk(value int) int {
+	return value | opBitRk
+}
+
+func opToString(inst uint32) string {
+	op := opGetOpCode(inst)
+	if op > opCodeMax {
+		return ""
+	}
+	prop := &(opProps[op])
+
+	arga := opGetArgA(inst)
+	argb := opGetArgB(inst)
+	argc := opGetArgC(inst)
+	argbx := opGetArgBx(inst)
+	argsbx := opGetArgSbx(inst)
+
+	buf := ""
+	switch prop.Type {
+	case opTypeABC:
+		buf = fmt.Sprintf("%s      |  %d, %d, %d", prop.Name, arga, argb, argc)
+	case opTypeABx:
+		buf = fmt.Sprintf("%s      |  %d, %d", prop.Name, arga, argbx)
+	case opTypeASbx:
+		buf = fmt.Sprintf("%s      |  %d, %d", prop.Name, arga, argsbx)
+	}
+
+	switch op {
+	case OP_MOVE:
+		buf += fmt.Sprintf("; R(%v) := R(%v)", arga, argb)
+	case OP_MOVEN:
+		buf += fmt.Sprintf("; R(%v) := R(%v); followed by %v MOVE ops", arga, argb, argc)
+	case OP_LOADK:
+		buf += fmt.Sprintf("; R(%v) := Kst(%v)", arga, argbx)
+	case OP_LOADBOOL:
+		buf += fmt.Sprintf("; R(%v) := (Bool)%v; if (%v) pc++", arga, argb, argc)
+	case OP_LOADNIL:
+		buf += fmt.Sprintf("; R(%v) := ... := R(%v) := nil", arga, argb)
+	case OP_GETUPVAL:
+		buf += fmt.Sprintf("; R(%v) := UpValue[%v]", arga, argb)
+	case OP_GETGLOBAL:
+		buf += fmt.Sprintf("; R(%v) := Gbl[Kst(%v)]", arga, argbx)
+	case OP_GETTABLE:
+		buf += fmt.Sprintf("; R(%v) := R(%v)[RK(%v)]", arga, argb, argc)
+	case OP_GETTABLEKS:
+		buf += fmt.Sprintf("; R(%v) := R(%v)[RK(%v)] ; RK(%v) is constant string", arga, argb, argc, argc)
+	case OP_SETGLOBAL:
+		buf += fmt.Sprintf("; Gbl[Kst(%v)] := R(%v)", argbx, arga)
+	case OP_SETUPVAL:
+		buf += fmt.Sprintf("; UpValue[%v] := R(%v)", argb, arga)
+	case OP_SETTABLE:
+		buf += fmt.Sprintf("; R(%v)[RK(%v)] := RK(%v)", arga, argb, argc)
+	case OP_SETTABLEKS:
+		buf += fmt.Sprintf("; R(%v)[RK(%v)] := RK(%v) ; RK(%v) is constant string", arga, argb, argc, argb)
+	case OP_NEWTABLE:
+		buf += fmt.Sprintf("; R(%v) := {} (size = BC)", arga)
+	case OP_SELF:
+		buf += fmt.Sprintf("; R(%v+1) := R(%v); R(%v) := R(%v)[RK(%v)]", arga, argb, arga, argb, argc)
+	case OP_ADD:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) + RK(%v)", arga, argb, argc)
+	case OP_SUB:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) - RK(%v)", arga, argb, argc)
+	case OP_MUL:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) * RK(%v)", arga, argb, argc)
+	case OP_DIV:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) / RK(%v)", arga, argb, argc)
+	case OP_MOD:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) %% RK(%v)", arga, argb, argc)
+	case OP_POW:
+		buf += fmt.Sprintf("; R(%v) := RK(%v) ^ RK(%v)", arga, argb, argc)
+	case OP_UNM:
+		buf += fmt.Sprintf("; R(%v) := -R(%v)", arga, argb)
+	case OP_NOT:
+		buf += fmt.Sprintf("; R(%v) := not R(%v)", arga, argb)
+	case OP_LEN:
+		buf += fmt.Sprintf("; R(%v) := length of R(%v)", arga, argb)
+	case OP_CONCAT:
+		buf += fmt.Sprintf("; R(%v) := R(%v).. ... ..R(%v)", arga, argb, argc)
+	case OP_JMP:
+		buf += fmt.Sprintf("; pc+=%v", argsbx)
+	case OP_EQ:
+		buf += fmt.Sprintf("; if ((RK(%v) == RK(%v)) ~= %v) then pc++", argb, argc, arga)
+	case OP_LT:
+		buf += fmt.Sprintf("; if ((RK(%v) <  RK(%v)) ~= %v) then pc++", argb, argc, arga)
+	case OP_LE:
+		buf += fmt.Sprintf("; if ((RK(%v) <= RK(%v)) ~= %v) then pc++", argb, argc, arga)
+	case OP_TEST:
+		buf += fmt.Sprintf("; if not (R(%v) <=> %v) then pc++", arga, argc)
+	case OP_TESTSET:
+		buf += fmt.Sprintf("; if (R(%v) <=> %v) then R(%v) := R(%v) else pc++", argb, argc, arga, argb)
+	case OP_CALL:
+		buf += fmt.Sprintf("; R(%v) ... R(%v+%v-2) := R(%v)(R(%v+1) ... R(%v+%v-1))", arga, arga, argc, arga, arga, arga, argb)
+	case OP_TAILCALL:
+		buf += fmt.Sprintf("; return R(%v)(R(%v+1) ... R(%v+%v-1))", arga, arga, arga, argb)
+	case OP_RETURN:
+		buf += fmt.Sprintf("; return R(%v) ... R(%v+%v-2)", arga, arga, argb)
+	case OP_FORLOOP:
+		buf += fmt.Sprintf("; R(%v)+=R(%v+2); if R(%v) <?= R(%v+1) then { pc+=%v; R(%v+3)=R(%v) }", arga, arga, arga, arga, argsbx, arga, arga)
+	case OP_FORPREP:
+		buf += fmt.Sprintf("; R(%v)-=R(%v+2); pc+=%v", arga, arga, argsbx)
+	case OP_TFORLOOP:
+		buf += fmt.Sprintf("; R(%v+3) ... R(%v+3+%v) := R(%v)(R(%v+1) R(%v+2)); if R(%v+3) ~= nil then { pc++; R(%v+2)=R(%v+3); }", arga, arga, argc, arga, arga, arga, arga, arga, arga)
+	case OP_SETLIST:
+		buf += fmt.Sprintf("; R(%v)[(%v-1)*FPF+i] := R(%v+i) 1 <= i <= %v", arga, argc, arga, argb)
+	case OP_CLOSE:
+		buf += fmt.Sprintf("; close all variables in the stack up to (>=) R(%v)", arga)
+	case OP_CLOSURE:
+		buf += fmt.Sprintf("; R(%v) := closure(KPROTO[%v] R(%v) ... R(%v+n))", arga, argbx, arga, arga)
+	case OP_VARARG:
+		buf += fmt.Sprintf(";  R(%v) R(%v+1) ... R(%v+%v-1) = vararg", arga, arga, arga, argb)
+	case OP_NOP:
+		/* nothing to do */
+	}
+	return buf
+}
diff --git a/vendor/github.com/yuin/gopher-lua/oslib.go b/vendor/github.com/yuin/gopher-lua/oslib.go
new file mode 100644
index 0000000000000..c70a99bf136b1
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/oslib.go
@@ -0,0 +1,221 @@
+package lua
+
+import (
+	"io/ioutil"
+	"os"
+	"strings"
+	"time"
+)
+
+var startedAt time.Time
+
+func init() {
+	startedAt = time.Now()
+}
+
+func getIntField(L *LState, tb *LTable, key string, v int) int {
+	ret := tb.RawGetString(key)
+
+	switch lv := ret.(type) {
+	case LNumber:
+		return int(lv)
+	case LString:
+		slv := string(lv)
+		slv = strings.TrimLeft(slv, " ")
+		if strings.HasPrefix(slv, "0") && !strings.HasPrefix(slv, "0x") && !strings.HasPrefix(slv, "0X") {
+			//Standard lua interpreter only support decimal and hexadecimal
+			slv = strings.TrimLeft(slv, "0")
+		}
+		if num, err := parseNumber(slv); err == nil {
+			return int(num)
+		}
+	default:
+		return v
+	}
+
+	return v
+}
+
+func getBoolField(L *LState, tb *LTable, key string, v bool) bool {
+	ret := tb.RawGetString(key)
+	if lb, ok := ret.(LBool); ok {
+		return bool(lb)
+	}
+	return v
+}
+
+func OpenOs(L *LState) int {
+	osmod := L.RegisterModule(OsLibName, osFuncs)
+	L.Push(osmod)
+	return 1
+}
+
+var osFuncs = map[string]LGFunction{
+	"clock":     osClock,
+	"difftime":  osDiffTime,
+	"execute":   osExecute,
+	"exit":      osExit,
+	"date":      osDate,
+	"getenv":    osGetEnv,
+	"remove":    osRemove,
+	"rename":    osRename,
+	"setenv":    osSetEnv,
+	"setlocale": osSetLocale,
+	"time":      osTime,
+	"tmpname":   osTmpname,
+}
+
+func osClock(L *LState) int {
+	L.Push(LNumber(float64(time.Now().Sub(startedAt)) / float64(time.Second)))
+	return 1
+}
+
+func osDiffTime(L *LState) int {
+	L.Push(LNumber(L.CheckInt64(1) - L.CheckInt64(2)))
+	return 1
+}
+
+func osExecute(L *LState) int {
+	var procAttr os.ProcAttr
+	procAttr.Files = []*os.File{os.Stdin, os.Stdout, os.Stderr}
+	cmd, args := popenArgs(L.CheckString(1))
+	args = append([]string{cmd}, args...)
+	process, err := os.StartProcess(cmd, args, &procAttr)
+	if err != nil {
+		L.Push(LNumber(1))
+		return 1
+	}
+
+	ps, err := process.Wait()
+	if err != nil || !ps.Success() {
+		L.Push(LNumber(1))
+		return 1
+	}
+	L.Push(LNumber(0))
+	return 1
+}
+
+func osExit(L *LState) int {
+	L.Close()
+	os.Exit(L.OptInt(1, 0))
+	return 1
+}
+
+func osDate(L *LState) int {
+	t := time.Now()
+	cfmt := "%c"
+	if L.GetTop() >= 1 {
+		cfmt = L.CheckString(1)
+		if strings.HasPrefix(cfmt, "!") {
+			t = time.Now().UTC()
+			cfmt = strings.TrimLeft(cfmt, "!")
+		}
+		if L.GetTop() >= 2 {
+			t = time.Unix(L.CheckInt64(2), 0)
+		}
+		if strings.HasPrefix(cfmt, "*t") {
+			ret := L.NewTable()
+			ret.RawSetString("year", LNumber(t.Year()))
+			ret.RawSetString("month", LNumber(t.Month()))
+			ret.RawSetString("day", LNumber(t.Day()))
+			ret.RawSetString("hour", LNumber(t.Hour()))
+			ret.RawSetString("min", LNumber(t.Minute()))
+			ret.RawSetString("sec", LNumber(t.Second()))
+			ret.RawSetString("wday", LNumber(t.Weekday()+1))
+			// TODO yday & dst
+			ret.RawSetString("yday", LNumber(0))
+			ret.RawSetString("isdst", LFalse)
+			L.Push(ret)
+			return 1
+		}
+	}
+	L.Push(LString(strftime(t, cfmt)))
+	return 1
+}
+
+func osGetEnv(L *LState) int {
+	v := os.Getenv(L.CheckString(1))
+	if len(v) == 0 {
+		L.Push(LNil)
+	} else {
+		L.Push(LString(v))
+	}
+	return 1
+}
+
+func osRemove(L *LState) int {
+	err := os.Remove(L.CheckString(1))
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	} else {
+		L.Push(LTrue)
+		return 1
+	}
+}
+
+func osRename(L *LState) int {
+	err := os.Rename(L.CheckString(1), L.CheckString(2))
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	} else {
+		L.Push(LTrue)
+		return 1
+	}
+}
+
+func osSetLocale(L *LState) int {
+	// setlocale is not supported
+	L.Push(LFalse)
+	return 1
+}
+
+func osSetEnv(L *LState) int {
+	err := os.Setenv(L.CheckString(1), L.CheckString(2))
+	if err != nil {
+		L.Push(LNil)
+		L.Push(LString(err.Error()))
+		return 2
+	} else {
+		L.Push(LTrue)
+		return 1
+	}
+}
+
+func osTime(L *LState) int {
+	if L.GetTop() == 0 {
+		L.Push(LNumber(time.Now().Unix()))
+	} else {
+		tbl := L.CheckTable(1)
+		sec := getIntField(L, tbl, "sec", 0)
+		min := getIntField(L, tbl, "min", 0)
+		hour := getIntField(L, tbl, "hour", 12)
+		day := getIntField(L, tbl, "day", -1)
+		month := getIntField(L, tbl, "month", -1)
+		year := getIntField(L, tbl, "year", -1)
+		isdst := getBoolField(L, tbl, "isdst", false)
+		t := time.Date(year, time.Month(month), day, hour, min, sec, 0, time.Local)
+		// TODO dst
+		if false {
+			print(isdst)
+		}
+		L.Push(LNumber(t.Unix()))
+	}
+	return 1
+}
+
+func osTmpname(L *LState) int {
+	file, err := ioutil.TempFile("", "")
+	if err != nil {
+		L.RaiseError("unable to generate a unique filename")
+	}
+	file.Close()
+	os.Remove(file.Name()) // ignore errors
+	L.Push(LString(file.Name()))
+	return 1
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/package.go b/vendor/github.com/yuin/gopher-lua/package.go
new file mode 100644
index 0000000000000..9fde3f0c21568
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/package.go
@@ -0,0 +1,7 @@
+// GopherLua: VM and compiler for Lua in Go
+package lua
+
+const PackageName = "GopherLua"
+const PackageVersion = "0.1"
+const PackageAuthors = "Yusuke Inuzuka"
+const PackageCopyRight = PackageName + " " + PackageVersion + " Copyright (C) 2015 -2017 " + PackageAuthors
diff --git a/vendor/github.com/yuin/gopher-lua/parse/Makefile b/vendor/github.com/yuin/gopher-lua/parse/Makefile
new file mode 100644
index 0000000000000..6dd048c165ff7
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/parse/Makefile
@@ -0,0 +1,4 @@
+all : parser.go
+
+parser.go : parser.go.y
+	goyacc -o $@ parser.go.y; [ -f y.output ] && ( rm -f y.output )
diff --git a/vendor/github.com/yuin/gopher-lua/parse/lexer.go b/vendor/github.com/yuin/gopher-lua/parse/lexer.go
new file mode 100644
index 0000000000000..d711e78bc192d
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/parse/lexer.go
@@ -0,0 +1,539 @@
+package parse
+
+import (
+	"bufio"
+	"bytes"
+	"fmt"
+	"github.com/yuin/gopher-lua/ast"
+	"io"
+	"reflect"
+	"strconv"
+	"strings"
+)
+
+const EOF = -1
+const whitespace1 = 1<<'\t' | 1<<' '
+const whitespace2 = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
+
+type Error struct {
+	Pos     ast.Position
+	Message string
+	Token   string
+}
+
+func (e *Error) Error() string {
+	pos := e.Pos
+	if pos.Line == EOF {
+		return fmt.Sprintf("%v at EOF:   %s\n", pos.Source, e.Message)
+	} else {
+		return fmt.Sprintf("%v line:%d(column:%d) near '%v':   %s\n", pos.Source, pos.Line, pos.Column, e.Token, e.Message)
+	}
+}
+
+func writeChar(buf *bytes.Buffer, c int) { buf.WriteByte(byte(c)) }
+
+func isDecimal(ch int) bool { return '0' <= ch && ch <= '9' }
+
+func isIdent(ch int, pos int) bool {
+	return ch == '_' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || isDecimal(ch) && pos > 0
+}
+
+func isDigit(ch int) bool {
+	return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
+}
+
+type Scanner struct {
+	Pos    ast.Position
+	reader *bufio.Reader
+}
+
+func NewScanner(reader io.Reader, source string) *Scanner {
+	return &Scanner{
+		Pos: ast.Position{
+			Source: source,
+			Line:   1,
+			Column: 0,
+		},
+		reader: bufio.NewReaderSize(reader, 4096),
+	}
+}
+
+func (sc *Scanner) Error(tok string, msg string) *Error { return &Error{sc.Pos, msg, tok} }
+
+func (sc *Scanner) TokenError(tok ast.Token, msg string) *Error { return &Error{tok.Pos, msg, tok.Str} }
+
+func (sc *Scanner) readNext() int {
+	ch, err := sc.reader.ReadByte()
+	if err == io.EOF {
+		return EOF
+	}
+	return int(ch)
+}
+
+func (sc *Scanner) Newline(ch int) {
+	if ch < 0 {
+		return
+	}
+	sc.Pos.Line += 1
+	sc.Pos.Column = 0
+	next := sc.Peek()
+	if ch == '\n' && next == '\r' || ch == '\r' && next == '\n' {
+		sc.reader.ReadByte()
+	}
+}
+
+func (sc *Scanner) Next() int {
+	ch := sc.readNext()
+	switch ch {
+	case '\n', '\r':
+		sc.Newline(ch)
+		ch = int('\n')
+	case EOF:
+		sc.Pos.Line = EOF
+		sc.Pos.Column = 0
+	default:
+		sc.Pos.Column++
+	}
+	return ch
+}
+
+func (sc *Scanner) Peek() int {
+	ch := sc.readNext()
+	if ch != EOF {
+		sc.reader.UnreadByte()
+	}
+	return ch
+}
+
+func (sc *Scanner) skipWhiteSpace(whitespace int64) int {
+	ch := sc.Next()
+	for ; whitespace&(1<<uint(ch)) != 0; ch = sc.Next() {
+	}
+	return ch
+}
+
+func (sc *Scanner) skipComments(ch int) error {
+	// multiline comment
+	if sc.Peek() == '[' {
+		ch = sc.Next()
+		if sc.Peek() == '[' || sc.Peek() == '=' {
+			var buf bytes.Buffer
+			if err := sc.scanMultilineString(sc.Next(), &buf); err != nil {
+				return sc.Error(buf.String(), "invalid multiline comment")
+			}
+			return nil
+		}
+	}
+	for {
+		if ch == '\n' || ch == '\r' || ch < 0 {
+			break
+		}
+		ch = sc.Next()
+	}
+	return nil
+}
+
+func (sc *Scanner) scanIdent(ch int, buf *bytes.Buffer) error {
+	writeChar(buf, ch)
+	for isIdent(sc.Peek(), 1) {
+		writeChar(buf, sc.Next())
+	}
+	return nil
+}
+
+func (sc *Scanner) scanDecimal(ch int, buf *bytes.Buffer) error {
+	writeChar(buf, ch)
+	for isDecimal(sc.Peek()) {
+		writeChar(buf, sc.Next())
+	}
+	return nil
+}
+
+func (sc *Scanner) scanNumber(ch int, buf *bytes.Buffer) error {
+	if ch == '0' { // octal
+		if sc.Peek() == 'x' || sc.Peek() == 'X' {
+			writeChar(buf, ch)
+			writeChar(buf, sc.Next())
+			hasvalue := false
+			for isDigit(sc.Peek()) {
+				writeChar(buf, sc.Next())
+				hasvalue = true
+			}
+			if !hasvalue {
+				return sc.Error(buf.String(), "illegal hexadecimal number")
+			}
+			return nil
+		} else if sc.Peek() != '.' && isDecimal(sc.Peek()) {
+			ch = sc.Next()
+		}
+	}
+	sc.scanDecimal(ch, buf)
+	if sc.Peek() == '.' {
+		sc.scanDecimal(sc.Next(), buf)
+	}
+	if ch = sc.Peek(); ch == 'e' || ch == 'E' {
+		writeChar(buf, sc.Next())
+		if ch = sc.Peek(); ch == '-' || ch == '+' {
+			writeChar(buf, sc.Next())
+		}
+		sc.scanDecimal(sc.Next(), buf)
+	}
+
+	return nil
+}
+
+func (sc *Scanner) scanString(quote int, buf *bytes.Buffer) error {
+	ch := sc.Next()
+	for ch != quote {
+		if ch == '\n' || ch == '\r' || ch < 0 {
+			return sc.Error(buf.String(), "unterminated string")
+		}
+		if ch == '\\' {
+			if err := sc.scanEscape(ch, buf); err != nil {
+				return err
+			}
+		} else {
+			writeChar(buf, ch)
+		}
+		ch = sc.Next()
+	}
+	return nil
+}
+
+func (sc *Scanner) scanEscape(ch int, buf *bytes.Buffer) error {
+	ch = sc.Next()
+	switch ch {
+	case 'a':
+		buf.WriteByte('\a')
+	case 'b':
+		buf.WriteByte('\b')
+	case 'f':
+		buf.WriteByte('\f')
+	case 'n':
+		buf.WriteByte('\n')
+	case 'r':
+		buf.WriteByte('\r')
+	case 't':
+		buf.WriteByte('\t')
+	case 'v':
+		buf.WriteByte('\v')
+	case '\\':
+		buf.WriteByte('\\')
+	case '"':
+		buf.WriteByte('"')
+	case '\'':
+		buf.WriteByte('\'')
+	case '\n':
+		buf.WriteByte('\n')
+	case '\r':
+		buf.WriteByte('\n')
+		sc.Newline('\r')
+	default:
+		if '0' <= ch && ch <= '9' {
+			bytes := []byte{byte(ch)}
+			for i := 0; i < 2 && isDecimal(sc.Peek()); i++ {
+				bytes = append(bytes, byte(sc.Next()))
+			}
+			val, _ := strconv.ParseInt(string(bytes), 10, 32)
+			writeChar(buf, int(val))
+		} else {
+			writeChar(buf, ch)
+		}
+	}
+	return nil
+}
+
+func (sc *Scanner) countSep(ch int) (int, int) {
+	count := 0
+	for ; ch == '='; count = count + 1 {
+		ch = sc.Next()
+	}
+	return count, ch
+}
+
+func (sc *Scanner) scanMultilineString(ch int, buf *bytes.Buffer) error {
+	var count1, count2 int
+	count1, ch = sc.countSep(ch)
+	if ch != '[' {
+		return sc.Error(string(ch), "invalid multiline string")
+	}
+	ch = sc.Next()
+	if ch == '\n' || ch == '\r' {
+		ch = sc.Next()
+	}
+	for {
+		if ch < 0 {
+			return sc.Error(buf.String(), "unterminated multiline string")
+		} else if ch == ']' {
+			count2, ch = sc.countSep(sc.Next())
+			if count1 == count2 && ch == ']' {
+				goto finally
+			}
+			buf.WriteByte(']')
+			buf.WriteString(strings.Repeat("=", count2))
+			continue
+		}
+		writeChar(buf, ch)
+		ch = sc.Next()
+	}
+
+finally:
+	return nil
+}
+
+var reservedWords = map[string]int{
+	"and": TAnd, "break": TBreak, "do": TDo, "else": TElse, "elseif": TElseIf,
+	"end": TEnd, "false": TFalse, "for": TFor, "function": TFunction,
+	"if": TIf, "in": TIn, "local": TLocal, "nil": TNil, "not": TNot, "or": TOr,
+	"return": TReturn, "repeat": TRepeat, "then": TThen, "true": TTrue,
+	"until": TUntil, "while": TWhile}
+
+func (sc *Scanner) Scan(lexer *Lexer) (ast.Token, error) {
+redo:
+	var err error
+	tok := ast.Token{}
+	newline := false
+
+	ch := sc.skipWhiteSpace(whitespace1)
+	if ch == '\n' || ch == '\r' {
+		newline = true
+		ch = sc.skipWhiteSpace(whitespace2)
+	}
+
+	if ch == '(' && lexer.PrevTokenType == ')' {
+		lexer.PNewLine = newline
+	} else {
+		lexer.PNewLine = false
+	}
+
+	var _buf bytes.Buffer
+	buf := &_buf
+	tok.Pos = sc.Pos
+
+	switch {
+	case isIdent(ch, 0):
+		tok.Type = TIdent
+		err = sc.scanIdent(ch, buf)
+		tok.Str = buf.String()
+		if err != nil {
+			goto finally
+		}
+		if typ, ok := reservedWords[tok.Str]; ok {
+			tok.Type = typ
+		}
+	case isDecimal(ch):
+		tok.Type = TNumber
+		err = sc.scanNumber(ch, buf)
+		tok.Str = buf.String()
+	default:
+		switch ch {
+		case EOF:
+			tok.Type = EOF
+		case '-':
+			if sc.Peek() == '-' {
+				err = sc.skipComments(sc.Next())
+				if err != nil {
+					goto finally
+				}
+				goto redo
+			} else {
+				tok.Type = ch
+				tok.Str = string(ch)
+			}
+		case '"', '\'':
+			tok.Type = TString
+			err = sc.scanString(ch, buf)
+			tok.Str = buf.String()
+		case '[':
+			if c := sc.Peek(); c == '[' || c == '=' {
+				tok.Type = TString
+				err = sc.scanMultilineString(sc.Next(), buf)
+				tok.Str = buf.String()
+			} else {
+				tok.Type = ch
+				tok.Str = string(ch)
+			}
+		case '=':
+			if sc.Peek() == '=' {
+				tok.Type = TEqeq
+				tok.Str = "=="
+				sc.Next()
+			} else {
+				tok.Type = ch
+				tok.Str = string(ch)
+			}
+		case '~':
+			if sc.Peek() == '=' {
+				tok.Type = TNeq
+				tok.Str = "~="
+				sc.Next()
+			} else {
+				err = sc.Error("~", "Invalid '~' token")
+			}
+		case '<':
+			if sc.Peek() == '=' {
+				tok.Type = TLte
+				tok.Str = "<="
+				sc.Next()
+			} else {
+				tok.Type = ch
+				tok.Str = string(ch)
+			}
+		case '>':
+			if sc.Peek() == '=' {
+				tok.Type = TGte
+				tok.Str = ">="
+				sc.Next()
+			} else {
+				tok.Type = ch
+				tok.Str = string(ch)
+			}
+		case '.':
+			ch2 := sc.Peek()
+			switch {
+			case isDecimal(ch2):
+				tok.Type = TNumber
+				err = sc.scanNumber(ch, buf)
+				tok.Str = buf.String()
+			case ch2 == '.':
+				writeChar(buf, ch)
+				writeChar(buf, sc.Next())
+				if sc.Peek() == '.' {
+					writeChar(buf, sc.Next())
+					tok.Type = T3Comma
+				} else {
+					tok.Type = T2Comma
+				}
+			default:
+				tok.Type = '.'
+			}
+			tok.Str = buf.String()
+		case '+', '*', '/', '%', '^', '#', '(', ')', '{', '}', ']', ';', ':', ',':
+			tok.Type = ch
+			tok.Str = string(ch)
+		default:
+			writeChar(buf, ch)
+			err = sc.Error(buf.String(), "Invalid token")
+			goto finally
+		}
+	}
+
+finally:
+	tok.Name = TokenName(int(tok.Type))
+	return tok, err
+}
+
+// yacc interface {{{
+
+type Lexer struct {
+	scanner       *Scanner
+	Stmts         []ast.Stmt
+	PNewLine      bool
+	Token         ast.Token
+	PrevTokenType int
+}
+
+func (lx *Lexer) Lex(lval *yySymType) int {
+	lx.PrevTokenType = lx.Token.Type
+	tok, err := lx.scanner.Scan(lx)
+	if err != nil {
+		panic(err)
+	}
+	if tok.Type < 0 {
+		return 0
+	}
+	lval.token = tok
+	lx.Token = tok
+	return int(tok.Type)
+}
+
+func (lx *Lexer) Error(message string) {
+	panic(lx.scanner.Error(lx.Token.Str, message))
+}
+
+func (lx *Lexer) TokenError(tok ast.Token, message string) {
+	panic(lx.scanner.TokenError(tok, message))
+}
+
+func Parse(reader io.Reader, name string) (chunk []ast.Stmt, err error) {
+	lexer := &Lexer{NewScanner(reader, name), nil, false, ast.Token{Str: ""}, TNil}
+	chunk = nil
+	defer func() {
+		if e := recover(); e != nil {
+			err, _ = e.(error)
+		}
+	}()
+	yyParse(lexer)
+	chunk = lexer.Stmts
+	return
+}
+
+// }}}
+
+// Dump {{{
+
+func isInlineDumpNode(rv reflect.Value) bool {
+	switch rv.Kind() {
+	case reflect.Struct, reflect.Slice, reflect.Interface, reflect.Ptr:
+		return false
+	default:
+		return true
+	}
+}
+
+func dump(node interface{}, level int, s string) string {
+	rt := reflect.TypeOf(node)
+	if fmt.Sprint(rt) == "<nil>" {
+		return strings.Repeat(s, level) + "<nil>"
+	}
+
+	rv := reflect.ValueOf(node)
+	buf := []string{}
+	switch rt.Kind() {
+	case reflect.Slice:
+		if rv.Len() == 0 {
+			return strings.Repeat(s, level) + "<empty>"
+		}
+		for i := 0; i < rv.Len(); i++ {
+			buf = append(buf, dump(rv.Index(i).Interface(), level, s))
+		}
+	case reflect.Ptr:
+		vt := rv.Elem()
+		tt := rt.Elem()
+		indicies := []int{}
+		for i := 0; i < tt.NumField(); i++ {
+			if strings.Index(tt.Field(i).Name, "Base") > -1 {
+				continue
+			}
+			indicies = append(indicies, i)
+		}
+		switch {
+		case len(indicies) == 0:
+			return strings.Repeat(s, level) + "<empty>"
+		case len(indicies) == 1 && isInlineDumpNode(vt.Field(indicies[0])):
+			for _, i := range indicies {
+				buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name()+": "+dump(vt.Field(i).Interface(), 0, s))
+			}
+		default:
+			buf = append(buf, strings.Repeat(s, level)+"- Node$"+tt.Name())
+			for _, i := range indicies {
+				if isInlineDumpNode(vt.Field(i)) {
+					inf := dump(vt.Field(i).Interface(), 0, s)
+					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": "+inf)
+				} else {
+					buf = append(buf, strings.Repeat(s, level+1)+tt.Field(i).Name+": ")
+					buf = append(buf, dump(vt.Field(i).Interface(), level+2, s))
+				}
+			}
+		}
+	default:
+		buf = append(buf, strings.Repeat(s, level)+fmt.Sprint(node))
+	}
+	return strings.Join(buf, "\n")
+}
+
+func Dump(chunk []ast.Stmt) string {
+	return dump(chunk, 0, "   ")
+}
+
+// }}
diff --git a/vendor/github.com/yuin/gopher-lua/parse/parser.go b/vendor/github.com/yuin/gopher-lua/parse/parser.go
new file mode 100644
index 0000000000000..f8f59b36154c1
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/parse/parser.go
@@ -0,0 +1,1137 @@
+//line parser.go.y:2
+package parse
+
+import __yyfmt__ "fmt"
+
+//line parser.go.y:2
+import (
+	"github.com/yuin/gopher-lua/ast"
+)
+
+//line parser.go.y:34
+type yySymType struct {
+	yys   int
+	token ast.Token
+
+	stmts []ast.Stmt
+	stmt  ast.Stmt
+
+	funcname *ast.FuncName
+	funcexpr *ast.FunctionExpr
+
+	exprlist []ast.Expr
+	expr     ast.Expr
+
+	fieldlist []*ast.Field
+	field     *ast.Field
+	fieldsep  string
+
+	namelist []string
+	parlist  *ast.ParList
+}
+
+const TAnd = 57346
+const TBreak = 57347
+const TDo = 57348
+const TElse = 57349
+const TElseIf = 57350
+const TEnd = 57351
+const TFalse = 57352
+const TFor = 57353
+const TFunction = 57354
+const TIf = 57355
+const TIn = 57356
+const TLocal = 57357
+const TNil = 57358
+const TNot = 57359
+const TOr = 57360
+const TReturn = 57361
+const TRepeat = 57362
+const TThen = 57363
+const TTrue = 57364
+const TUntil = 57365
+const TWhile = 57366
+const TEqeq = 57367
+const TNeq = 57368
+const TLte = 57369
+const TGte = 57370
+const T2Comma = 57371
+const T3Comma = 57372
+const TIdent = 57373
+const TNumber = 57374
+const TString = 57375
+const UNARY = 57376
+
+var yyToknames = []string{
+	"TAnd",
+	"TBreak",
+	"TDo",
+	"TElse",
+	"TElseIf",
+	"TEnd",
+	"TFalse",
+	"TFor",
+	"TFunction",
+	"TIf",
+	"TIn",
+	"TLocal",
+	"TNil",
+	"TNot",
+	"TOr",
+	"TReturn",
+	"TRepeat",
+	"TThen",
+	"TTrue",
+	"TUntil",
+	"TWhile",
+	"TEqeq",
+	"TNeq",
+	"TLte",
+	"TGte",
+	"T2Comma",
+	"T3Comma",
+	"TIdent",
+	"TNumber",
+	"TString",
+	" {",
+	" (",
+	" >",
+	" <",
+	" +",
+	" -",
+	" *",
+	" /",
+	" %",
+	"UNARY",
+	" ^",
+}
+var yyStatenames = []string{}
+
+const yyEofCode = 1
+const yyErrCode = 2
+const yyMaxDepth = 200
+
+//line parser.go.y:514
+func TokenName(c int) string {
+	if c >= TAnd && c-TAnd < len(yyToknames) {
+		if yyToknames[c-TAnd] != "" {
+			return yyToknames[c-TAnd]
+		}
+	}
+	return string([]byte{byte(c)})
+}
+
+//line yacctab:1
+var yyExca = []int{
+	-1, 1,
+	1, -1,
+	-2, 0,
+	-1, 17,
+	46, 31,
+	47, 31,
+	-2, 68,
+	-1, 93,
+	46, 32,
+	47, 32,
+	-2, 68,
+}
+
+const yyNprod = 95
+const yyPrivate = 57344
+
+var yyTokenNames []string
+var yyStates []string
+
+const yyLast = 579
+
+var yyAct = []int{
+
+	24, 88, 50, 23, 45, 84, 56, 65, 137, 153,
+	136, 113, 52, 142, 54, 53, 33, 134, 65, 132,
+	62, 63, 32, 61, 108, 109, 48, 111, 106, 41,
+	42, 105, 49, 155, 166, 81, 82, 83, 138, 104,
+	22, 91, 131, 80, 95, 92, 162, 74, 48, 85,
+	150, 99, 165, 148, 49, 149, 75, 76, 77, 78,
+	79, 67, 80, 107, 106, 148, 114, 115, 116, 117,
+	118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+	128, 129, 72, 73, 71, 70, 74, 65, 39, 40,
+	47, 139, 133, 68, 69, 75, 76, 77, 78, 79,
+	60, 80, 141, 144, 143, 146, 145, 31, 67, 147,
+	9, 48, 110, 97, 48, 152, 151, 49, 38, 62,
+	49, 17, 66, 77, 78, 79, 96, 80, 59, 72,
+	73, 71, 70, 74, 154, 102, 91, 156, 55, 157,
+	68, 69, 75, 76, 77, 78, 79, 21, 80, 187,
+	94, 20, 26, 184, 37, 179, 163, 112, 25, 35,
+	178, 93, 170, 172, 27, 171, 164, 173, 19, 159,
+	175, 174, 29, 89, 28, 39, 40, 20, 182, 181,
+	100, 34, 135, 183, 67, 39, 40, 47, 186, 64,
+	51, 1, 90, 87, 36, 130, 86, 30, 66, 18,
+	46, 44, 43, 8, 58, 72, 73, 71, 70, 74,
+	57, 67, 168, 169, 167, 3, 68, 69, 75, 76,
+	77, 78, 79, 160, 80, 66, 4, 2, 0, 0,
+	0, 158, 72, 73, 71, 70, 74, 0, 0, 0,
+	0, 0, 0, 68, 69, 75, 76, 77, 78, 79,
+	26, 80, 37, 0, 0, 0, 25, 35, 140, 0,
+	0, 0, 27, 0, 0, 0, 0, 0, 0, 0,
+	29, 21, 28, 39, 40, 20, 26, 0, 37, 34,
+	0, 0, 25, 35, 0, 0, 0, 0, 27, 0,
+	0, 0, 36, 98, 0, 0, 29, 89, 28, 39,
+	40, 20, 26, 0, 37, 34, 0, 0, 25, 35,
+	0, 0, 0, 0, 27, 67, 90, 176, 36, 0,
+	0, 0, 29, 21, 28, 39, 40, 20, 0, 66,
+	0, 34, 0, 0, 0, 0, 72, 73, 71, 70,
+	74, 0, 67, 0, 36, 0, 0, 68, 69, 75,
+	76, 77, 78, 79, 0, 80, 66, 0, 177, 0,
+	0, 0, 0, 72, 73, 71, 70, 74, 0, 67,
+	0, 185, 0, 0, 68, 69, 75, 76, 77, 78,
+	79, 0, 80, 66, 0, 161, 0, 0, 0, 0,
+	72, 73, 71, 70, 74, 0, 67, 0, 0, 0,
+	0, 68, 69, 75, 76, 77, 78, 79, 0, 80,
+	66, 0, 0, 180, 0, 0, 0, 72, 73, 71,
+	70, 74, 0, 67, 0, 0, 0, 0, 68, 69,
+	75, 76, 77, 78, 79, 0, 80, 66, 0, 0,
+	103, 0, 0, 0, 72, 73, 71, 70, 74, 0,
+	67, 0, 101, 0, 0, 68, 69, 75, 76, 77,
+	78, 79, 0, 80, 66, 0, 0, 0, 0, 0,
+	0, 72, 73, 71, 70, 74, 0, 67, 0, 0,
+	0, 0, 68, 69, 75, 76, 77, 78, 79, 0,
+	80, 66, 0, 0, 0, 0, 0, 0, 72, 73,
+	71, 70, 74, 0, 0, 0, 0, 0, 0, 68,
+	69, 75, 76, 77, 78, 79, 0, 80, 72, 73,
+	71, 70, 74, 0, 0, 0, 0, 0, 0, 68,
+	69, 75, 76, 77, 78, 79, 0, 80, 7, 10,
+	0, 0, 0, 0, 14, 15, 13, 0, 16, 0,
+	0, 0, 6, 12, 0, 0, 0, 11, 0, 0,
+	0, 0, 0, 0, 21, 0, 0, 0, 20, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 5,
+}
+var yyPact = []int{
+
+	-1000, -1000, 533, -5, -1000, -1000, 292, -1000, -17, 152,
+	-1000, 292, -1000, 292, 107, 97, 88, -1000, -1000, -1000,
+	292, -1000, -1000, -29, 473, -1000, -1000, -1000, -1000, -1000,
+	-1000, 152, -1000, -1000, 292, 292, 292, 14, -1000, -1000,
+	142, 292, 116, 292, 95, -1000, 82, 240, -1000, -1000,
+	171, -1000, 446, 112, 419, -7, 17, 14, -24, -1000,
+	81, -19, -1000, 104, -42, 292, 292, 292, 292, 292,
+	292, 292, 292, 292, 292, 292, 292, 292, 292, 292,
+	292, -1, -1, -1, -1000, -11, -1000, -37, -1000, -8,
+	292, 473, -29, -1000, 152, 207, -1000, 55, -1000, -40,
+	-1000, -1000, 292, -1000, 292, 292, 34, -1000, 24, 19,
+	14, 292, -1000, -1000, 473, 57, 493, 18, 18, 18,
+	18, 18, 18, 18, 83, 83, -1, -1, -1, -1,
+	-44, -1000, -1000, -14, -1000, 266, -1000, -1000, 292, 180,
+	-1000, -1000, -1000, 160, 473, -1000, 338, 40, -1000, -1000,
+	-1000, -1000, -29, -1000, 157, 22, -1000, 473, -12, -1000,
+	205, 292, -1000, 154, -1000, -1000, 292, -1000, -1000, 292,
+	311, 151, -1000, 473, 146, 392, -1000, 292, -1000, -1000,
+	-1000, 144, 365, -1000, -1000, -1000, 140, -1000,
+}
+var yyPgo = []int{
+
+	0, 190, 227, 2, 226, 223, 215, 210, 204, 203,
+	118, 6, 3, 0, 22, 107, 168, 199, 4, 197,
+	5, 195, 16, 193, 1, 182,
+}
+var yyR1 = []int{
+
+	0, 1, 1, 1, 2, 2, 2, 3, 4, 4,
+	4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+	4, 4, 5, 5, 6, 6, 6, 7, 7, 8,
+	8, 9, 9, 10, 10, 10, 11, 11, 12, 12,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+	13, 13, 13, 13, 13, 13, 13, 14, 15, 15,
+	15, 15, 17, 16, 16, 18, 18, 18, 18, 19,
+	20, 20, 21, 21, 21, 22, 22, 23, 23, 23,
+	24, 24, 24, 25, 25,
+}
+var yyR2 = []int{
+
+	0, 1, 2, 3, 0, 2, 2, 1, 3, 1,
+	3, 5, 4, 6, 8, 9, 11, 7, 3, 4,
+	4, 2, 0, 5, 1, 2, 1, 1, 3, 1,
+	3, 1, 3, 1, 4, 3, 1, 3, 1, 3,
+	1, 1, 1, 1, 1, 1, 1, 1, 1, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 2, 2, 2, 1, 1, 1,
+	1, 3, 3, 2, 4, 2, 3, 1, 1, 2,
+	5, 4, 1, 1, 3, 2, 3, 1, 3, 2,
+	3, 5, 1, 1, 1,
+}
+var yyChk = []int{
+
+	-1000, -1, -2, -6, -4, 45, 19, 5, -9, -15,
+	6, 24, 20, 13, 11, 12, 15, -10, -17, -16,
+	35, 31, 45, -12, -13, 16, 10, 22, 32, 30,
+	-19, -15, -14, -22, 39, 17, 52, 12, -10, 33,
+	34, 46, 47, 50, 49, -18, 48, 35, -22, -14,
+	-3, -1, -13, -3, -13, 31, -11, -7, -8, 31,
+	12, -11, 31, -13, -16, 47, 18, 4, 36, 37,
+	28, 27, 25, 26, 29, 38, 39, 40, 41, 42,
+	44, -13, -13, -13, -20, 35, 54, -23, -24, 31,
+	50, -13, -12, -10, -15, -13, 31, 31, 53, -12,
+	9, 6, 23, 21, 46, 14, 47, -20, 48, 49,
+	31, 46, 53, 53, -13, -13, -13, -13, -13, -13,
+	-13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
+	-21, 53, 30, -11, 54, -25, 47, 45, 46, -13,
+	51, -18, 53, -3, -13, -3, -13, -12, 31, 31,
+	31, -20, -12, 53, -3, 47, -24, -13, 51, 9,
+	-5, 47, 6, -3, 9, 30, 46, 9, 7, 8,
+	-13, -3, 9, -13, -3, -13, 6, 47, 9, 9,
+	21, -3, -13, -3, 9, 6, -3, 9,
+}
+var yyDef = []int{
+
+	4, -2, 1, 2, 5, 6, 24, 26, 0, 9,
+	4, 0, 4, 0, 0, 0, 0, -2, 69, 70,
+	0, 33, 3, 25, 38, 40, 41, 42, 43, 44,
+	45, 46, 47, 48, 0, 0, 0, 0, 68, 67,
+	0, 0, 0, 0, 0, 73, 0, 0, 77, 78,
+	0, 7, 0, 0, 0, 36, 0, 0, 27, 29,
+	0, 21, 36, 0, 70, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 64, 65, 66, 79, 0, 85, 0, 87, 33,
+	0, 92, 8, -2, 0, 0, 35, 0, 75, 0,
+	10, 4, 0, 4, 0, 0, 0, 18, 0, 0,
+	0, 0, 71, 72, 39, 49, 50, 51, 52, 53,
+	54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+	0, 4, 82, 83, 86, 89, 93, 94, 0, 0,
+	34, 74, 76, 0, 12, 22, 0, 0, 37, 28,
+	30, 19, 20, 4, 0, 0, 88, 90, 0, 11,
+	0, 0, 4, 0, 81, 84, 0, 13, 4, 0,
+	0, 0, 80, 91, 0, 0, 4, 0, 17, 14,
+	4, 0, 0, 23, 15, 4, 0, 16,
+}
+var yyTok1 = []int{
+
+	1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 52, 3, 42, 3, 3,
+	35, 53, 40, 38, 47, 39, 49, 41, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 48, 45,
+	37, 46, 36, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 50, 3, 51, 44, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+	3, 3, 3, 34, 3, 54,
+}
+var yyTok2 = []int{
+
+	2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+	12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+	22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+	32, 33, 43,
+}
+var yyTok3 = []int{
+	0,
+}
+
+//line yaccpar:1
+
+/*	parser for yacc output	*/
+
+var yyDebug = 0
+
+type yyLexer interface {
+	Lex(lval *yySymType) int
+	Error(s string)
+}
+
+const yyFlag = -1000
+
+func yyTokname(c int) string {
+	// 4 is TOKSTART above
+	if c >= 4 && c-4 < len(yyToknames) {
+		if yyToknames[c-4] != "" {
+			return yyToknames[c-4]
+		}
+	}
+	return __yyfmt__.Sprintf("tok-%v", c)
+}
+
+func yyStatname(s int) string {
+	if s >= 0 && s < len(yyStatenames) {
+		if yyStatenames[s] != "" {
+			return yyStatenames[s]
+		}
+	}
+	return __yyfmt__.Sprintf("state-%v", s)
+}
+
+func yylex1(lex yyLexer, lval *yySymType) int {
+	c := 0
+	char := lex.Lex(lval)
+	if char <= 0 {
+		c = yyTok1[0]
+		goto out
+	}
+	if char < len(yyTok1) {
+		c = yyTok1[char]
+		goto out
+	}
+	if char >= yyPrivate {
+		if char < yyPrivate+len(yyTok2) {
+			c = yyTok2[char-yyPrivate]
+			goto out
+		}
+	}
+	for i := 0; i < len(yyTok3); i += 2 {
+		c = yyTok3[i+0]
+		if c == char {
+			c = yyTok3[i+1]
+			goto out
+		}
+	}
+
+out:
+	if c == 0 {
+		c = yyTok2[1] /* unknown char */
+	}
+	if yyDebug >= 3 {
+		__yyfmt__.Printf("lex %s(%d)\n", yyTokname(c), uint(char))
+	}
+	return c
+}
+
+func yyParse(yylex yyLexer) int {
+	var yyn int
+	var yylval yySymType
+	var yyVAL yySymType
+	yyS := make([]yySymType, yyMaxDepth)
+
+	Nerrs := 0   /* number of errors */
+	Errflag := 0 /* error recovery flag */
+	yystate := 0
+	yychar := -1
+	yyp := -1
+	goto yystack
+
+ret0:
+	return 0
+
+ret1:
+	return 1
+
+yystack:
+	/* put a state and value onto the stack */
+	if yyDebug >= 4 {
+		__yyfmt__.Printf("char %v in %v\n", yyTokname(yychar), yyStatname(yystate))
+	}
+
+	yyp++
+	if yyp >= len(yyS) {
+		nyys := make([]yySymType, len(yyS)*2)
+		copy(nyys, yyS)
+		yyS = nyys
+	}
+	yyS[yyp] = yyVAL
+	yyS[yyp].yys = yystate
+
+yynewstate:
+	yyn = yyPact[yystate]
+	if yyn <= yyFlag {
+		goto yydefault /* simple state */
+	}
+	if yychar < 0 {
+		yychar = yylex1(yylex, &yylval)
+	}
+	yyn += yychar
+	if yyn < 0 || yyn >= yyLast {
+		goto yydefault
+	}
+	yyn = yyAct[yyn]
+	if yyChk[yyn] == yychar { /* valid shift */
+		yychar = -1
+		yyVAL = yylval
+		yystate = yyn
+		if Errflag > 0 {
+			Errflag--
+		}
+		goto yystack
+	}
+
+yydefault:
+	/* default state action */
+	yyn = yyDef[yystate]
+	if yyn == -2 {
+		if yychar < 0 {
+			yychar = yylex1(yylex, &yylval)
+		}
+
+		/* look through exception table */
+		xi := 0
+		for {
+			if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
+				break
+			}
+			xi += 2
+		}
+		for xi += 2; ; xi += 2 {
+			yyn = yyExca[xi+0]
+			if yyn < 0 || yyn == yychar {
+				break
+			}
+		}
+		yyn = yyExca[xi+1]
+		if yyn < 0 {
+			goto ret0
+		}
+	}
+	if yyn == 0 {
+		/* error ... attempt to resume parsing */
+		switch Errflag {
+		case 0: /* brand new error */
+			yylex.Error("syntax error")
+			Nerrs++
+			if yyDebug >= 1 {
+				__yyfmt__.Printf("%s", yyStatname(yystate))
+				__yyfmt__.Printf(" saw %s\n", yyTokname(yychar))
+			}
+			fallthrough
+
+		case 1, 2: /* incompletely recovered error ... try again */
+			Errflag = 3
+
+			/* find a state where "error" is a legal shift action */
+			for yyp >= 0 {
+				yyn = yyPact[yyS[yyp].yys] + yyErrCode
+				if yyn >= 0 && yyn < yyLast {
+					yystate = yyAct[yyn] /* simulate a shift of "error" */
+					if yyChk[yystate] == yyErrCode {
+						goto yystack
+					}
+				}
+
+				/* the current p has no shift on "error", pop stack */
+				if yyDebug >= 2 {
+					__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
+				}
+				yyp--
+			}
+			/* there is no state on the stack with an error shift ... abort */
+			goto ret1
+
+		case 3: /* no shift yet; clobber input char */
+			if yyDebug >= 2 {
+				__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yychar))
+			}
+			if yychar == yyEofCode {
+				goto ret1
+			}
+			yychar = -1
+			goto yynewstate /* try again in the same state */
+		}
+	}
+
+	/* reduction by production yyn */
+	if yyDebug >= 2 {
+		__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
+	}
+
+	yynt := yyn
+	yypt := yyp
+	_ = yypt // guard against "declared and not used"
+
+	yyp -= yyR2[yyn]
+	yyVAL = yyS[yyp+1]
+
+	/* consult goto table to find next state */
+	yyn = yyR1[yyn]
+	yyg := yyPgo[yyn]
+	yyj := yyg + yyS[yyp].yys + 1
+
+	if yyj >= yyLast {
+		yystate = yyAct[yyg]
+	} else {
+		yystate = yyAct[yyj]
+		if yyChk[yystate] != -yyn {
+			yystate = yyAct[yyg]
+		}
+	}
+	// dummy call; replaced with literal code
+	switch yynt {
+
+	case 1:
+		//line parser.go.y:73
+		{
+			yyVAL.stmts = yyS[yypt-0].stmts
+			if l, ok := yylex.(*Lexer); ok {
+				l.Stmts = yyVAL.stmts
+			}
+		}
+	case 2:
+		//line parser.go.y:79
+		{
+			yyVAL.stmts = append(yyS[yypt-1].stmts, yyS[yypt-0].stmt)
+			if l, ok := yylex.(*Lexer); ok {
+				l.Stmts = yyVAL.stmts
+			}
+		}
+	case 3:
+		//line parser.go.y:85
+		{
+			yyVAL.stmts = append(yyS[yypt-2].stmts, yyS[yypt-1].stmt)
+			if l, ok := yylex.(*Lexer); ok {
+				l.Stmts = yyVAL.stmts
+			}
+		}
+	case 4:
+		//line parser.go.y:93
+		{
+			yyVAL.stmts = []ast.Stmt{}
+		}
+	case 5:
+		//line parser.go.y:96
+		{
+			yyVAL.stmts = append(yyS[yypt-1].stmts, yyS[yypt-0].stmt)
+		}
+	case 6:
+		//line parser.go.y:99
+		{
+			yyVAL.stmts = yyS[yypt-1].stmts
+		}
+	case 7:
+		//line parser.go.y:104
+		{
+			yyVAL.stmts = yyS[yypt-0].stmts
+		}
+	case 8:
+		//line parser.go.y:109
+		{
+			yyVAL.stmt = &ast.AssignStmt{Lhs: yyS[yypt-2].exprlist, Rhs: yyS[yypt-0].exprlist}
+			yyVAL.stmt.SetLine(yyS[yypt-2].exprlist[0].Line())
+		}
+	case 9:
+		//line parser.go.y:114
+		{
+			if _, ok := yyS[yypt-0].expr.(*ast.FuncCallExpr); !ok {
+				yylex.(*Lexer).Error("parse error")
+			} else {
+				yyVAL.stmt = &ast.FuncCallStmt{Expr: yyS[yypt-0].expr}
+				yyVAL.stmt.SetLine(yyS[yypt-0].expr.Line())
+			}
+		}
+	case 10:
+		//line parser.go.y:122
+		{
+			yyVAL.stmt = &ast.DoBlockStmt{Stmts: yyS[yypt-1].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-2].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 11:
+		//line parser.go.y:127
+		{
+			yyVAL.stmt = &ast.WhileStmt{Condition: yyS[yypt-3].expr, Stmts: yyS[yypt-1].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-4].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 12:
+		//line parser.go.y:132
+		{
+			yyVAL.stmt = &ast.RepeatStmt{Condition: yyS[yypt-0].expr, Stmts: yyS[yypt-2].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-3].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].expr.Line())
+		}
+	case 13:
+		//line parser.go.y:137
+		{
+			yyVAL.stmt = &ast.IfStmt{Condition: yyS[yypt-4].expr, Then: yyS[yypt-2].stmts}
+			cur := yyVAL.stmt
+			for _, elseif := range yyS[yypt-1].stmts {
+				cur.(*ast.IfStmt).Else = []ast.Stmt{elseif}
+				cur = elseif
+			}
+			yyVAL.stmt.SetLine(yyS[yypt-5].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 14:
+		//line parser.go.y:147
+		{
+			yyVAL.stmt = &ast.IfStmt{Condition: yyS[yypt-6].expr, Then: yyS[yypt-4].stmts}
+			cur := yyVAL.stmt
+			for _, elseif := range yyS[yypt-3].stmts {
+				cur.(*ast.IfStmt).Else = []ast.Stmt{elseif}
+				cur = elseif
+			}
+			cur.(*ast.IfStmt).Else = yyS[yypt-1].stmts
+			yyVAL.stmt.SetLine(yyS[yypt-7].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 15:
+		//line parser.go.y:158
+		{
+			yyVAL.stmt = &ast.NumberForStmt{Name: yyS[yypt-7].token.Str, Init: yyS[yypt-5].expr, Limit: yyS[yypt-3].expr, Stmts: yyS[yypt-1].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-8].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 16:
+		//line parser.go.y:163
+		{
+			yyVAL.stmt = &ast.NumberForStmt{Name: yyS[yypt-9].token.Str, Init: yyS[yypt-7].expr, Limit: yyS[yypt-5].expr, Step: yyS[yypt-3].expr, Stmts: yyS[yypt-1].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-10].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 17:
+		//line parser.go.y:168
+		{
+			yyVAL.stmt = &ast.GenericForStmt{Names: yyS[yypt-5].namelist, Exprs: yyS[yypt-3].exprlist, Stmts: yyS[yypt-1].stmts}
+			yyVAL.stmt.SetLine(yyS[yypt-6].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 18:
+		//line parser.go.y:173
+		{
+			yyVAL.stmt = &ast.FuncDefStmt{Name: yyS[yypt-1].funcname, Func: yyS[yypt-0].funcexpr}
+			yyVAL.stmt.SetLine(yyS[yypt-2].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].funcexpr.LastLine())
+		}
+	case 19:
+		//line parser.go.y:178
+		{
+			yyVAL.stmt = &ast.LocalAssignStmt{Names: []string{yyS[yypt-1].token.Str}, Exprs: []ast.Expr{yyS[yypt-0].funcexpr}}
+			yyVAL.stmt.SetLine(yyS[yypt-3].token.Pos.Line)
+			yyVAL.stmt.SetLastLine(yyS[yypt-0].funcexpr.LastLine())
+		}
+	case 20:
+		//line parser.go.y:183
+		{
+			yyVAL.stmt = &ast.LocalAssignStmt{Names: yyS[yypt-2].namelist, Exprs: yyS[yypt-0].exprlist}
+			yyVAL.stmt.SetLine(yyS[yypt-3].token.Pos.Line)
+		}
+	case 21:
+		//line parser.go.y:187
+		{
+			yyVAL.stmt = &ast.LocalAssignStmt{Names: yyS[yypt-0].namelist, Exprs: []ast.Expr{}}
+			yyVAL.stmt.SetLine(yyS[yypt-1].token.Pos.Line)
+		}
+	case 22:
+		//line parser.go.y:193
+		{
+			yyVAL.stmts = []ast.Stmt{}
+		}
+	case 23:
+		//line parser.go.y:196
+		{
+			yyVAL.stmts = append(yyS[yypt-4].stmts, &ast.IfStmt{Condition: yyS[yypt-2].expr, Then: yyS[yypt-0].stmts})
+			yyVAL.stmts[len(yyVAL.stmts)-1].SetLine(yyS[yypt-3].token.Pos.Line)
+		}
+	case 24:
+		//line parser.go.y:202
+		{
+			yyVAL.stmt = &ast.ReturnStmt{Exprs: nil}
+			yyVAL.stmt.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 25:
+		//line parser.go.y:206
+		{
+			yyVAL.stmt = &ast.ReturnStmt{Exprs: yyS[yypt-0].exprlist}
+			yyVAL.stmt.SetLine(yyS[yypt-1].token.Pos.Line)
+		}
+	case 26:
+		//line parser.go.y:210
+		{
+			yyVAL.stmt = &ast.BreakStmt{}
+			yyVAL.stmt.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 27:
+		//line parser.go.y:216
+		{
+			yyVAL.funcname = yyS[yypt-0].funcname
+		}
+	case 28:
+		//line parser.go.y:219
+		{
+			yyVAL.funcname = &ast.FuncName{Func: nil, Receiver: yyS[yypt-2].funcname.Func, Method: yyS[yypt-0].token.Str}
+		}
+	case 29:
+		//line parser.go.y:224
+		{
+			yyVAL.funcname = &ast.FuncName{Func: &ast.IdentExpr{Value: yyS[yypt-0].token.Str}}
+			yyVAL.funcname.Func.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 30:
+		//line parser.go.y:228
+		{
+			key := &ast.StringExpr{Value: yyS[yypt-0].token.Str}
+			key.SetLine(yyS[yypt-0].token.Pos.Line)
+			fn := &ast.AttrGetExpr{Object: yyS[yypt-2].funcname.Func, Key: key}
+			fn.SetLine(yyS[yypt-0].token.Pos.Line)
+			yyVAL.funcname = &ast.FuncName{Func: fn}
+		}
+	case 31:
+		//line parser.go.y:237
+		{
+			yyVAL.exprlist = []ast.Expr{yyS[yypt-0].expr}
+		}
+	case 32:
+		//line parser.go.y:240
+		{
+			yyVAL.exprlist = append(yyS[yypt-2].exprlist, yyS[yypt-0].expr)
+		}
+	case 33:
+		//line parser.go.y:245
+		{
+			yyVAL.expr = &ast.IdentExpr{Value: yyS[yypt-0].token.Str}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 34:
+		//line parser.go.y:249
+		{
+			yyVAL.expr = &ast.AttrGetExpr{Object: yyS[yypt-3].expr, Key: yyS[yypt-1].expr}
+			yyVAL.expr.SetLine(yyS[yypt-3].expr.Line())
+		}
+	case 35:
+		//line parser.go.y:253
+		{
+			key := &ast.StringExpr{Value: yyS[yypt-0].token.Str}
+			key.SetLine(yyS[yypt-0].token.Pos.Line)
+			yyVAL.expr = &ast.AttrGetExpr{Object: yyS[yypt-2].expr, Key: key}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 36:
+		//line parser.go.y:261
+		{
+			yyVAL.namelist = []string{yyS[yypt-0].token.Str}
+		}
+	case 37:
+		//line parser.go.y:264
+		{
+			yyVAL.namelist = append(yyS[yypt-2].namelist, yyS[yypt-0].token.Str)
+		}
+	case 38:
+		//line parser.go.y:269
+		{
+			yyVAL.exprlist = []ast.Expr{yyS[yypt-0].expr}
+		}
+	case 39:
+		//line parser.go.y:272
+		{
+			yyVAL.exprlist = append(yyS[yypt-2].exprlist, yyS[yypt-0].expr)
+		}
+	case 40:
+		//line parser.go.y:277
+		{
+			yyVAL.expr = &ast.NilExpr{}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 41:
+		//line parser.go.y:281
+		{
+			yyVAL.expr = &ast.FalseExpr{}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 42:
+		//line parser.go.y:285
+		{
+			yyVAL.expr = &ast.TrueExpr{}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 43:
+		//line parser.go.y:289
+		{
+			yyVAL.expr = &ast.NumberExpr{Value: yyS[yypt-0].token.Str}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 44:
+		//line parser.go.y:293
+		{
+			yyVAL.expr = &ast.Comma3Expr{}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 45:
+		//line parser.go.y:297
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 46:
+		//line parser.go.y:300
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 47:
+		//line parser.go.y:303
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 48:
+		//line parser.go.y:306
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 49:
+		//line parser.go.y:309
+		{
+			yyVAL.expr = &ast.LogicalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "or", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 50:
+		//line parser.go.y:313
+		{
+			yyVAL.expr = &ast.LogicalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "and", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 51:
+		//line parser.go.y:317
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: ">", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 52:
+		//line parser.go.y:321
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "<", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 53:
+		//line parser.go.y:325
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: ">=", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 54:
+		//line parser.go.y:329
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "<=", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 55:
+		//line parser.go.y:333
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "==", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 56:
+		//line parser.go.y:337
+		{
+			yyVAL.expr = &ast.RelationalOpExpr{Lhs: yyS[yypt-2].expr, Operator: "~=", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 57:
+		//line parser.go.y:341
+		{
+			yyVAL.expr = &ast.StringConcatOpExpr{Lhs: yyS[yypt-2].expr, Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 58:
+		//line parser.go.y:345
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "+", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 59:
+		//line parser.go.y:349
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "-", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 60:
+		//line parser.go.y:353
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "*", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 61:
+		//line parser.go.y:357
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "/", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 62:
+		//line parser.go.y:361
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "%", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 63:
+		//line parser.go.y:365
+		{
+			yyVAL.expr = &ast.ArithmeticOpExpr{Lhs: yyS[yypt-2].expr, Operator: "^", Rhs: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-2].expr.Line())
+		}
+	case 64:
+		//line parser.go.y:369
+		{
+			yyVAL.expr = &ast.UnaryMinusOpExpr{Expr: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-0].expr.Line())
+		}
+	case 65:
+		//line parser.go.y:373
+		{
+			yyVAL.expr = &ast.UnaryNotOpExpr{Expr: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-0].expr.Line())
+		}
+	case 66:
+		//line parser.go.y:377
+		{
+			yyVAL.expr = &ast.UnaryLenOpExpr{Expr: yyS[yypt-0].expr}
+			yyVAL.expr.SetLine(yyS[yypt-0].expr.Line())
+		}
+	case 67:
+		//line parser.go.y:383
+		{
+			yyVAL.expr = &ast.StringExpr{Value: yyS[yypt-0].token.Str}
+			yyVAL.expr.SetLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 68:
+		//line parser.go.y:389
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 69:
+		//line parser.go.y:392
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 70:
+		//line parser.go.y:395
+		{
+			yyVAL.expr = yyS[yypt-0].expr
+		}
+	case 71:
+		//line parser.go.y:398
+		{
+			yyVAL.expr = yyS[yypt-1].expr
+			yyVAL.expr.SetLine(yyS[yypt-2].token.Pos.Line)
+		}
+	case 72:
+		//line parser.go.y:404
+		{
+			yyS[yypt-1].expr.(*ast.FuncCallExpr).AdjustRet = true
+			yyVAL.expr = yyS[yypt-1].expr
+		}
+	case 73:
+		//line parser.go.y:410
+		{
+			yyVAL.expr = &ast.FuncCallExpr{Func: yyS[yypt-1].expr, Args: yyS[yypt-0].exprlist}
+			yyVAL.expr.SetLine(yyS[yypt-1].expr.Line())
+		}
+	case 74:
+		//line parser.go.y:414
+		{
+			yyVAL.expr = &ast.FuncCallExpr{Method: yyS[yypt-1].token.Str, Receiver: yyS[yypt-3].expr, Args: yyS[yypt-0].exprlist}
+			yyVAL.expr.SetLine(yyS[yypt-3].expr.Line())
+		}
+	case 75:
+		//line parser.go.y:420
+		{
+			if yylex.(*Lexer).PNewLine {
+				yylex.(*Lexer).TokenError(yyS[yypt-1].token, "ambiguous syntax (function call x new statement)")
+			}
+			yyVAL.exprlist = []ast.Expr{}
+		}
+	case 76:
+		//line parser.go.y:426
+		{
+			if yylex.(*Lexer).PNewLine {
+				yylex.(*Lexer).TokenError(yyS[yypt-2].token, "ambiguous syntax (function call x new statement)")
+			}
+			yyVAL.exprlist = yyS[yypt-1].exprlist
+		}
+	case 77:
+		//line parser.go.y:432
+		{
+			yyVAL.exprlist = []ast.Expr{yyS[yypt-0].expr}
+		}
+	case 78:
+		//line parser.go.y:435
+		{
+			yyVAL.exprlist = []ast.Expr{yyS[yypt-0].expr}
+		}
+	case 79:
+		//line parser.go.y:440
+		{
+			yyVAL.expr = &ast.FunctionExpr{ParList: yyS[yypt-0].funcexpr.ParList, Stmts: yyS[yypt-0].funcexpr.Stmts}
+			yyVAL.expr.SetLine(yyS[yypt-1].token.Pos.Line)
+			yyVAL.expr.SetLastLine(yyS[yypt-0].funcexpr.LastLine())
+		}
+	case 80:
+		//line parser.go.y:447
+		{
+			yyVAL.funcexpr = &ast.FunctionExpr{ParList: yyS[yypt-3].parlist, Stmts: yyS[yypt-1].stmts}
+			yyVAL.funcexpr.SetLine(yyS[yypt-4].token.Pos.Line)
+			yyVAL.funcexpr.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 81:
+		//line parser.go.y:452
+		{
+			yyVAL.funcexpr = &ast.FunctionExpr{ParList: &ast.ParList{HasVargs: false, Names: []string{}}, Stmts: yyS[yypt-1].stmts}
+			yyVAL.funcexpr.SetLine(yyS[yypt-3].token.Pos.Line)
+			yyVAL.funcexpr.SetLastLine(yyS[yypt-0].token.Pos.Line)
+		}
+	case 82:
+		//line parser.go.y:459
+		{
+			yyVAL.parlist = &ast.ParList{HasVargs: true, Names: []string{}}
+		}
+	case 83:
+		//line parser.go.y:462
+		{
+			yyVAL.parlist = &ast.ParList{HasVargs: false, Names: []string{}}
+			yyVAL.parlist.Names = append(yyVAL.parlist.Names, yyS[yypt-0].namelist...)
+		}
+	case 84:
+		//line parser.go.y:466
+		{
+			yyVAL.parlist = &ast.ParList{HasVargs: true, Names: []string{}}
+			yyVAL.parlist.Names = append(yyVAL.parlist.Names, yyS[yypt-2].namelist...)
+		}
+	case 85:
+		//line parser.go.y:473
+		{
+			yyVAL.expr = &ast.TableExpr{Fields: []*ast.Field{}}
+			yyVAL.expr.SetLine(yyS[yypt-1].token.Pos.Line)
+		}
+	case 86:
+		//line parser.go.y:477
+		{
+			yyVAL.expr = &ast.TableExpr{Fields: yyS[yypt-1].fieldlist}
+			yyVAL.expr.SetLine(yyS[yypt-2].token.Pos.Line)
+		}
+	case 87:
+		//line parser.go.y:484
+		{
+			yyVAL.fieldlist = []*ast.Field{yyS[yypt-0].field}
+		}
+	case 88:
+		//line parser.go.y:487
+		{
+			yyVAL.fieldlist = append(yyS[yypt-2].fieldlist, yyS[yypt-0].field)
+		}
+	case 89:
+		//line parser.go.y:490
+		{
+			yyVAL.fieldlist = yyS[yypt-1].fieldlist
+		}
+	case 90:
+		//line parser.go.y:495
+		{
+			yyVAL.field = &ast.Field{Key: &ast.StringExpr{Value: yyS[yypt-2].token.Str}, Value: yyS[yypt-0].expr}
+			yyVAL.field.Key.SetLine(yyS[yypt-2].token.Pos.Line)
+		}
+	case 91:
+		//line parser.go.y:499
+		{
+			yyVAL.field = &ast.Field{Key: yyS[yypt-3].expr, Value: yyS[yypt-0].expr}
+		}
+	case 92:
+		//line parser.go.y:502
+		{
+			yyVAL.field = &ast.Field{Value: yyS[yypt-0].expr}
+		}
+	case 93:
+		//line parser.go.y:507
+		{
+			yyVAL.fieldsep = ","
+		}
+	case 94:
+		//line parser.go.y:510
+		{
+			yyVAL.fieldsep = ";"
+		}
+	}
+	goto yystack /* stack new state and value */
+}
diff --git a/vendor/github.com/yuin/gopher-lua/parse/parser.go.y b/vendor/github.com/yuin/gopher-lua/parse/parser.go.y
new file mode 100644
index 0000000000000..956133db29242
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/parse/parser.go.y
@@ -0,0 +1,524 @@
+%{
+package parse
+
+import (
+  "github.com/yuin/gopher-lua/ast"
+)
+%}
+%type<stmts> chunk
+%type<stmts> chunk1
+%type<stmts> block
+%type<stmt>  stat
+%type<stmts> elseifs
+%type<stmt>  laststat
+%type<funcname> funcname
+%type<funcname> funcname1
+%type<exprlist> varlist
+%type<expr> var
+%type<namelist> namelist
+%type<exprlist> exprlist
+%type<expr> expr
+%type<expr> string
+%type<expr> prefixexp
+%type<expr> functioncall
+%type<expr> afunctioncall
+%type<exprlist> args
+%type<expr> function
+%type<funcexpr> funcbody
+%type<parlist> parlist
+%type<expr> tableconstructor
+%type<fieldlist> fieldlist
+%type<field> field
+%type<fieldsep> fieldsep
+
+%union {
+  token  ast.Token
+
+  stmts    []ast.Stmt
+  stmt     ast.Stmt
+
+  funcname *ast.FuncName
+  funcexpr *ast.FunctionExpr
+
+  exprlist []ast.Expr
+  expr   ast.Expr
+
+  fieldlist []*ast.Field
+  field     *ast.Field
+  fieldsep  string
+
+  namelist []string
+  parlist  *ast.ParList
+}
+
+/* Reserved words */
+%token<token> TAnd TBreak TDo TElse TElseIf TEnd TFalse TFor TFunction TIf TIn TLocal TNil TNot TOr TReturn TRepeat TThen TTrue TUntil TWhile 
+
+/* Literals */
+%token<token> TEqeq TNeq TLte TGte T2Comma T3Comma TIdent TNumber TString '{' '('
+
+/* Operators */
+%left TOr
+%left TAnd
+%left '>' '<' TGte TLte TEqeq TNeq
+%right T2Comma
+%left '+' '-'
+%left '*' '/' '%'
+%right UNARY /* not # -(unary) */
+%right '^'
+
+%%
+
+chunk: 
+        chunk1 {
+            $$ = $1
+            if l, ok := yylex.(*Lexer); ok {
+                l.Stmts = $$
+            }
+        } |
+        chunk1 laststat {
+            $$ = append($1, $2)
+            if l, ok := yylex.(*Lexer); ok {
+                l.Stmts = $$
+            }
+        } | 
+        chunk1 laststat ';' {
+            $$ = append($1, $2)
+            if l, ok := yylex.(*Lexer); ok {
+                l.Stmts = $$
+            }
+        }
+
+chunk1: 
+        {
+            $$ = []ast.Stmt{}
+        } |
+        chunk1 stat {
+            $$ = append($1, $2)
+        } | 
+        chunk1 ';' {
+            $$ = $1
+        }
+
+block: 
+        chunk {
+            $$ = $1
+        }
+
+stat:
+        varlist '=' exprlist {
+            $$ = &ast.AssignStmt{Lhs: $1, Rhs: $3}
+            $$.SetLine($1[0].Line())
+        } |
+        /* 'stat = functioncal' causes a reduce/reduce conflict */
+        prefixexp {
+            if _, ok := $1.(*ast.FuncCallExpr); !ok {
+               yylex.(*Lexer).Error("parse error")
+            } else {
+              $$ = &ast.FuncCallStmt{Expr: $1}
+              $$.SetLine($1.Line())
+            }
+        } |
+        TDo block TEnd {
+            $$ = &ast.DoBlockStmt{Stmts: $2}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($3.Pos.Line)
+        } |
+        TWhile expr TDo block TEnd {
+            $$ = &ast.WhileStmt{Condition: $2, Stmts: $4}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($5.Pos.Line)
+        } |
+        TRepeat block TUntil expr {
+            $$ = &ast.RepeatStmt{Condition: $4, Stmts: $2}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($4.Line())
+        } |
+        TIf expr TThen block elseifs TEnd {
+            $$ = &ast.IfStmt{Condition: $2, Then: $4}
+            cur := $$
+            for _, elseif := range $5 {
+                cur.(*ast.IfStmt).Else = []ast.Stmt{elseif}
+                cur = elseif
+            }
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($6.Pos.Line)
+        } |
+        TIf expr TThen block elseifs TElse block TEnd {
+            $$ = &ast.IfStmt{Condition: $2, Then: $4}
+            cur := $$
+            for _, elseif := range $5 {
+                cur.(*ast.IfStmt).Else = []ast.Stmt{elseif}
+                cur = elseif
+            }
+            cur.(*ast.IfStmt).Else = $7
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($8.Pos.Line)
+        } |
+        TFor TIdent '=' expr ',' expr TDo block TEnd {
+            $$ = &ast.NumberForStmt{Name: $2.Str, Init: $4, Limit: $6, Stmts: $8}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($9.Pos.Line)
+        } |
+        TFor TIdent '=' expr ',' expr ',' expr TDo block TEnd {
+            $$ = &ast.NumberForStmt{Name: $2.Str, Init: $4, Limit: $6, Step:$8, Stmts: $10}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($11.Pos.Line)
+        } |
+        TFor namelist TIn exprlist TDo block TEnd {
+            $$ = &ast.GenericForStmt{Names:$2, Exprs:$4, Stmts: $6}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($7.Pos.Line)
+        } |
+        TFunction funcname funcbody {
+            $$ = &ast.FuncDefStmt{Name: $2, Func: $3}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($3.LastLine())
+        } |
+        TLocal TFunction TIdent funcbody {
+            $$ = &ast.LocalAssignStmt{Names:[]string{$3.Str}, Exprs: []ast.Expr{$4}}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($4.LastLine())
+        } | 
+        TLocal namelist '=' exprlist {
+            $$ = &ast.LocalAssignStmt{Names: $2, Exprs:$4}
+            $$.SetLine($1.Pos.Line)
+        } |
+        TLocal namelist {
+            $$ = &ast.LocalAssignStmt{Names: $2, Exprs:[]ast.Expr{}}
+            $$.SetLine($1.Pos.Line)
+        }
+
+elseifs: 
+        {
+            $$ = []ast.Stmt{}
+        } | 
+        elseifs TElseIf expr TThen block {
+            $$ = append($1, &ast.IfStmt{Condition: $3, Then: $5})
+            $$[len($$)-1].SetLine($2.Pos.Line)
+        }
+
+laststat:
+        TReturn {
+            $$ = &ast.ReturnStmt{Exprs:nil}
+            $$.SetLine($1.Pos.Line)
+        } |
+        TReturn exprlist {
+            $$ = &ast.ReturnStmt{Exprs:$2}
+            $$.SetLine($1.Pos.Line)
+        } |
+        TBreak  {
+            $$ = &ast.BreakStmt{}
+            $$.SetLine($1.Pos.Line)
+        }
+
+funcname: 
+        funcname1 {
+            $$ = $1
+        } |
+        funcname1 ':' TIdent {
+            $$ = &ast.FuncName{Func:nil, Receiver:$1.Func, Method: $3.Str}
+        }
+
+funcname1:
+        TIdent {
+            $$ = &ast.FuncName{Func: &ast.IdentExpr{Value:$1.Str}}
+            $$.Func.SetLine($1.Pos.Line)
+        } | 
+        funcname1 '.' TIdent {
+            key:= &ast.StringExpr{Value:$3.Str}
+            key.SetLine($3.Pos.Line)
+            fn := &ast.AttrGetExpr{Object: $1.Func, Key: key}
+            fn.SetLine($3.Pos.Line)
+            $$ = &ast.FuncName{Func: fn}
+        }
+
+varlist:
+        var {
+            $$ = []ast.Expr{$1}
+        } | 
+        varlist ',' var {
+            $$ = append($1, $3)
+        }
+
+var:
+        TIdent {
+            $$ = &ast.IdentExpr{Value:$1.Str}
+            $$.SetLine($1.Pos.Line)
+        } |
+        prefixexp '[' expr ']' {
+            $$ = &ast.AttrGetExpr{Object: $1, Key: $3}
+            $$.SetLine($1.Line())
+        } | 
+        prefixexp '.' TIdent {
+            key := &ast.StringExpr{Value:$3.Str}
+            key.SetLine($3.Pos.Line)
+            $$ = &ast.AttrGetExpr{Object: $1, Key: key}
+            $$.SetLine($1.Line())
+        }
+
+namelist:
+        TIdent {
+            $$ = []string{$1.Str}
+        } | 
+        namelist ','  TIdent {
+            $$ = append($1, $3.Str)
+        }
+
+exprlist:
+        expr {
+            $$ = []ast.Expr{$1}
+        } |
+        exprlist ',' expr {
+            $$ = append($1, $3)
+        }
+
+expr:
+        TNil {
+            $$ = &ast.NilExpr{}
+            $$.SetLine($1.Pos.Line)
+        } | 
+        TFalse {
+            $$ = &ast.FalseExpr{}
+            $$.SetLine($1.Pos.Line)
+        } | 
+        TTrue {
+            $$ = &ast.TrueExpr{}
+            $$.SetLine($1.Pos.Line)
+        } | 
+        TNumber {
+            $$ = &ast.NumberExpr{Value: $1.Str}
+            $$.SetLine($1.Pos.Line)
+        } | 
+        T3Comma {
+            $$ = &ast.Comma3Expr{}
+            $$.SetLine($1.Pos.Line)
+        } |
+        function {
+            $$ = $1
+        } | 
+        prefixexp {
+            $$ = $1
+        } |
+        string {
+            $$ = $1
+        } |
+        tableconstructor {
+            $$ = $1
+        } |
+        expr TOr expr {
+            $$ = &ast.LogicalOpExpr{Lhs: $1, Operator: "or", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr TAnd expr {
+            $$ = &ast.LogicalOpExpr{Lhs: $1, Operator: "and", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '>' expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: ">", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '<' expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: "<", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr TGte expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: ">=", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr TLte expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: "<=", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr TEqeq expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: "==", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr TNeq expr {
+            $$ = &ast.RelationalOpExpr{Lhs: $1, Operator: "~=", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr T2Comma expr {
+            $$ = &ast.StringConcatOpExpr{Lhs: $1, Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '+' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "+", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '-' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "-", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '*' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "*", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '/' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "/", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '%' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "%", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        expr '^' expr {
+            $$ = &ast.ArithmeticOpExpr{Lhs: $1, Operator: "^", Rhs: $3}
+            $$.SetLine($1.Line())
+        } |
+        '-' expr %prec UNARY {
+            $$ = &ast.UnaryMinusOpExpr{Expr: $2}
+            $$.SetLine($2.Line())
+        } |
+        TNot expr %prec UNARY {
+            $$ = &ast.UnaryNotOpExpr{Expr: $2}
+            $$.SetLine($2.Line())
+        } |
+        '#' expr %prec UNARY {
+            $$ = &ast.UnaryLenOpExpr{Expr: $2}
+            $$.SetLine($2.Line())
+        }
+
+string: 
+        TString {
+            $$ = &ast.StringExpr{Value: $1.Str}
+            $$.SetLine($1.Pos.Line)
+        } 
+
+prefixexp:
+        var {
+            $$ = $1
+        } |
+        afunctioncall {
+            $$ = $1
+        } |
+        functioncall {
+            $$ = $1
+        } |
+        '(' expr ')' {
+            $$ = $2
+            $$.SetLine($1.Pos.Line)
+        }
+
+afunctioncall:
+        '(' functioncall ')' {
+            $2.(*ast.FuncCallExpr).AdjustRet = true
+            $$ = $2
+        }
+
+functioncall:
+        prefixexp args {
+            $$ = &ast.FuncCallExpr{Func: $1, Args: $2}
+            $$.SetLine($1.Line())
+        } |
+        prefixexp ':' TIdent args {
+            $$ = &ast.FuncCallExpr{Method: $3.Str, Receiver: $1, Args: $4}
+            $$.SetLine($1.Line())
+        }
+
+args:
+        '(' ')' {
+            if yylex.(*Lexer).PNewLine {
+               yylex.(*Lexer).TokenError($1, "ambiguous syntax (function call x new statement)")
+            }
+            $$ = []ast.Expr{}
+        } |
+        '(' exprlist ')' {
+            if yylex.(*Lexer).PNewLine {
+               yylex.(*Lexer).TokenError($1, "ambiguous syntax (function call x new statement)")
+            }
+            $$ = $2
+        } |
+        tableconstructor {
+            $$ = []ast.Expr{$1}
+        } | 
+        string {
+            $$ = []ast.Expr{$1}
+        }
+
+function:
+        TFunction funcbody {
+            $$ = &ast.FunctionExpr{ParList:$2.ParList, Stmts: $2.Stmts}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($2.LastLine())
+        }
+
+funcbody:
+        '(' parlist ')' block TEnd {
+            $$ = &ast.FunctionExpr{ParList: $2, Stmts: $4}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($5.Pos.Line)
+        } | 
+        '(' ')' block TEnd {
+            $$ = &ast.FunctionExpr{ParList: &ast.ParList{HasVargs: false, Names: []string{}}, Stmts: $3}
+            $$.SetLine($1.Pos.Line)
+            $$.SetLastLine($4.Pos.Line)
+        }
+
+parlist:
+        T3Comma {
+            $$ = &ast.ParList{HasVargs: true, Names: []string{}}
+        } | 
+        namelist {
+          $$ = &ast.ParList{HasVargs: false, Names: []string{}}
+          $$.Names = append($$.Names, $1...)
+        } | 
+        namelist ',' T3Comma {
+          $$ = &ast.ParList{HasVargs: true, Names: []string{}}
+          $$.Names = append($$.Names, $1...)
+        }
+
+
+tableconstructor:
+        '{' '}' {
+            $$ = &ast.TableExpr{Fields: []*ast.Field{}}
+            $$.SetLine($1.Pos.Line)
+        } |
+        '{' fieldlist '}' {
+            $$ = &ast.TableExpr{Fields: $2}
+            $$.SetLine($1.Pos.Line)
+        }
+
+
+fieldlist:
+        field {
+            $$ = []*ast.Field{$1}
+        } | 
+        fieldlist fieldsep field {
+            $$ = append($1, $3)
+        } | 
+        fieldlist fieldsep {
+            $$ = $1
+        }
+
+field:
+        TIdent '=' expr {
+            $$ = &ast.Field{Key: &ast.StringExpr{Value:$1.Str}, Value: $3}
+            $$.Key.SetLine($1.Pos.Line)
+        } | 
+        '[' expr ']' '=' expr {
+            $$ = &ast.Field{Key: $2, Value: $5}
+        } |
+        expr {
+            $$ = &ast.Field{Value: $1}
+        }
+
+fieldsep:
+        ',' {
+            $$ = ","
+        } | 
+        ';' {
+            $$ = ";"
+        }
+
+%%
+
+func TokenName(c int) string {
+	if c >= TAnd && c-TAnd < len(yyToknames) {
+		if yyToknames[c-TAnd] != "" {
+			return yyToknames[c-TAnd]
+		}
+	}
+    return string([]byte{byte(c)})
+}
+
diff --git a/vendor/github.com/yuin/gopher-lua/pm/pm.go b/vendor/github.com/yuin/gopher-lua/pm/pm.go
new file mode 100644
index 0000000000000..e15bc21005dd4
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/pm/pm.go
@@ -0,0 +1,638 @@
+// Lua pattern match functions for Go
+package pm
+
+import (
+	"fmt"
+)
+
+const EOS = -1
+const _UNKNOWN = -2
+
+/* Error {{{ */
+
+type Error struct {
+	Pos     int
+	Message string
+}
+
+func newError(pos int, message string, args ...interface{}) *Error {
+	if len(args) == 0 {
+		return &Error{pos, message}
+	}
+	return &Error{pos, fmt.Sprintf(message, args...)}
+}
+
+func (e *Error) Error() string {
+	switch e.Pos {
+	case EOS:
+		return fmt.Sprintf("%s at EOS", e.Message)
+	case _UNKNOWN:
+		return fmt.Sprintf("%s", e.Message)
+	default:
+		return fmt.Sprintf("%s at %d", e.Message, e.Pos)
+	}
+}
+
+/* }}} */
+
+/* MatchData {{{ */
+
+type MatchData struct {
+	// captured positions
+	// layout
+	// xxxx xxxx xxxx xxx0 : caputured positions
+	// xxxx xxxx xxxx xxx1 : position captured positions
+	captures []uint32
+}
+
+func newMatchState() *MatchData { return &MatchData{[]uint32{}} }
+
+func (st *MatchData) addPosCapture(s, pos int) {
+	for s+1 >= len(st.captures) {
+		st.captures = append(st.captures, 0)
+	}
+	st.captures[s] = (uint32(pos) << 1) | 1
+	st.captures[s+1] = (uint32(pos) << 1) | 1
+}
+
+func (st *MatchData) setCapture(s, pos int) uint32 {
+	for s >= len(st.captures) {
+		st.captures = append(st.captures, 0)
+	}
+	v := st.captures[s]
+	st.captures[s] = (uint32(pos) << 1)
+	return v
+}
+
+func (st *MatchData) restoreCapture(s int, pos uint32) { st.captures[s] = pos }
+
+func (st *MatchData) CaptureLength() int { return len(st.captures) }
+
+func (st *MatchData) IsPosCapture(idx int) bool { return (st.captures[idx] & 1) == 1 }
+
+func (st *MatchData) Capture(idx int) int { return int(st.captures[idx] >> 1) }
+
+/* }}} */
+
+/* scanner {{{ */
+
+type scannerState struct {
+	Pos     int
+	started bool
+}
+
+type scanner struct {
+	src   []byte
+	State scannerState
+	saved scannerState
+}
+
+func newScanner(src []byte) *scanner {
+	return &scanner{
+		src: src,
+		State: scannerState{
+			Pos:     0,
+			started: false,
+		},
+		saved: scannerState{},
+	}
+}
+
+func (sc *scanner) Length() int { return len(sc.src) }
+
+func (sc *scanner) Next() int {
+	if !sc.State.started {
+		sc.State.started = true
+		if len(sc.src) == 0 {
+			sc.State.Pos = EOS
+		}
+	} else {
+		sc.State.Pos = sc.NextPos()
+	}
+	if sc.State.Pos == EOS {
+		return EOS
+	}
+	return int(sc.src[sc.State.Pos])
+}
+
+func (sc *scanner) CurrentPos() int {
+	return sc.State.Pos
+}
+
+func (sc *scanner) NextPos() int {
+	if sc.State.Pos == EOS || sc.State.Pos >= len(sc.src)-1 {
+		return EOS
+	}
+	if !sc.State.started {
+		return 0
+	} else {
+		return sc.State.Pos + 1
+	}
+}
+
+func (sc *scanner) Peek() int {
+	cureof := sc.State.Pos == EOS
+	ch := sc.Next()
+	if !cureof {
+		if sc.State.Pos == EOS {
+			sc.State.Pos = len(sc.src) - 1
+		} else {
+			sc.State.Pos--
+			if sc.State.Pos < 0 {
+				sc.State.Pos = 0
+				sc.State.started = false
+			}
+		}
+	}
+	return ch
+}
+
+func (sc *scanner) Save() { sc.saved = sc.State }
+
+func (sc *scanner) Restore() { sc.State = sc.saved }
+
+/* }}} */
+
+/* bytecode {{{ */
+
+type opCode int
+
+const (
+	opChar opCode = iota
+	opMatch
+	opTailMatch
+	opJmp
+	opSplit
+	opSave
+	opPSave
+	opBrace
+	opNumber
+)
+
+type inst struct {
+	OpCode   opCode
+	Class    class
+	Operand1 int
+	Operand2 int
+}
+
+/* }}} */
+
+/* classes {{{ */
+
+type class interface {
+	Matches(ch int) bool
+}
+
+type dotClass struct{}
+
+func (pn *dotClass) Matches(ch int) bool { return true }
+
+type charClass struct {
+	Ch int
+}
+
+func (pn *charClass) Matches(ch int) bool { return pn.Ch == ch }
+
+type singleClass struct {
+	Class int
+}
+
+func (pn *singleClass) Matches(ch int) bool {
+	ret := false
+	switch pn.Class {
+	case 'a', 'A':
+		ret = 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z'
+	case 'c', 'C':
+		ret = (0x00 <= ch && ch <= 0x1F) || ch == 0x7F
+	case 'd', 'D':
+		ret = '0' <= ch && ch <= '9'
+	case 'l', 'L':
+		ret = 'a' <= ch && ch <= 'z'
+	case 'p', 'P':
+		ret = (0x21 <= ch && ch <= 0x2f) || (0x30 <= ch && ch <= 0x40) || (0x5b <= ch && ch <= 0x60) || (0x7b <= ch && ch <= 0x7e)
+	case 's', 'S':
+		switch ch {
+		case ' ', '\f', '\n', '\r', '\t', '\v':
+			ret = true
+		}
+	case 'u', 'U':
+		ret = 'A' <= ch && ch <= 'Z'
+	case 'w', 'W':
+		ret = '0' <= ch && ch <= '9' || 'A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z'
+	case 'x', 'X':
+		ret = '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
+	case 'z', 'Z':
+		ret = ch == 0
+	default:
+		return ch == pn.Class
+	}
+	if 'A' <= pn.Class && pn.Class <= 'Z' {
+		return !ret
+	}
+	return ret
+}
+
+type setClass struct {
+	IsNot   bool
+	Classes []class
+}
+
+func (pn *setClass) Matches(ch int) bool {
+	for _, class := range pn.Classes {
+		if class.Matches(ch) {
+			return !pn.IsNot
+		}
+	}
+	return pn.IsNot
+}
+
+type rangeClass struct {
+	Begin class
+	End   class
+}
+
+func (pn *rangeClass) Matches(ch int) bool {
+	switch begin := pn.Begin.(type) {
+	case *charClass:
+		end, ok := pn.End.(*charClass)
+		if !ok {
+			return false
+		}
+		return begin.Ch <= ch && ch <= end.Ch
+	}
+	return false
+}
+
+// }}}
+
+// patterns {{{
+
+type pattern interface{}
+
+type singlePattern struct {
+	Class class
+}
+
+type seqPattern struct {
+	MustHead bool
+	MustTail bool
+	Patterns []pattern
+}
+
+type repeatPattern struct {
+	Type  int
+	Class class
+}
+
+type posCapPattern struct{}
+
+type capPattern struct {
+	Pattern pattern
+}
+
+type numberPattern struct {
+	N int
+}
+
+type bracePattern struct {
+	Begin int
+	End   int
+}
+
+// }}}
+
+/* parse {{{ */
+
+func parseClass(sc *scanner, allowset bool) class {
+	ch := sc.Next()
+	switch ch {
+	case '%':
+		return &singleClass{sc.Next()}
+	case '.':
+		if allowset {
+			return &dotClass{}
+		}
+		return &charClass{ch}
+	case '[':
+		if allowset {
+			return parseClassSet(sc)
+		}
+		return &charClass{ch}
+	//case '^' '$', '(', ')', ']', '*', '+', '-', '?':
+	//	panic(newError(sc.CurrentPos(), "invalid %c", ch))
+	case EOS:
+		panic(newError(sc.CurrentPos(), "unexpected EOS"))
+	default:
+		return &charClass{ch}
+	}
+}
+
+func parseClassSet(sc *scanner) class {
+	set := &setClass{false, []class{}}
+	if sc.Peek() == '^' {
+		set.IsNot = true
+		sc.Next()
+	}
+	isrange := false
+	for {
+		ch := sc.Peek()
+		switch ch {
+		// case '[':
+		// 	panic(newError(sc.CurrentPos(), "'[' can not be nested"))
+		case EOS:
+			panic(newError(sc.CurrentPos(), "unexpected EOS"))
+		case ']':
+			if len(set.Classes) > 0 {
+				sc.Next()
+				goto exit
+			}
+			fallthrough
+		case '-':
+			if len(set.Classes) > 0 {
+				sc.Next()
+				isrange = true
+				continue
+			}
+			fallthrough
+		default:
+			set.Classes = append(set.Classes, parseClass(sc, false))
+		}
+		if isrange {
+			begin := set.Classes[len(set.Classes)-2]
+			end := set.Classes[len(set.Classes)-1]
+			set.Classes = set.Classes[0 : len(set.Classes)-2]
+			set.Classes = append(set.Classes, &rangeClass{begin, end})
+			isrange = false
+		}
+	}
+exit:
+	if isrange {
+		set.Classes = append(set.Classes, &charClass{'-'})
+	}
+
+	return set
+}
+
+func parsePattern(sc *scanner, toplevel bool) *seqPattern {
+	pat := &seqPattern{}
+	if toplevel {
+		if sc.Peek() == '^' {
+			sc.Next()
+			pat.MustHead = true
+		}
+	}
+	for {
+		ch := sc.Peek()
+		switch ch {
+		case '%':
+			sc.Save()
+			sc.Next()
+			switch sc.Peek() {
+			case '0':
+				panic(newError(sc.CurrentPos(), "invalid capture index"))
+			case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+				pat.Patterns = append(pat.Patterns, &numberPattern{sc.Next() - 48})
+			case 'b':
+				sc.Next()
+				pat.Patterns = append(pat.Patterns, &bracePattern{sc.Next(), sc.Next()})
+			default:
+				sc.Restore()
+				pat.Patterns = append(pat.Patterns, &singlePattern{parseClass(sc, true)})
+			}
+		case '.', '[', ']':
+			pat.Patterns = append(pat.Patterns, &singlePattern{parseClass(sc, true)})
+		//case ']':
+		//	panic(newError(sc.CurrentPos(), "invalid ']'"))
+		case ')':
+			if toplevel {
+				panic(newError(sc.CurrentPos(), "invalid ')'"))
+			}
+			return pat
+		case '(':
+			sc.Next()
+			if sc.Peek() == ')' {
+				sc.Next()
+				pat.Patterns = append(pat.Patterns, &posCapPattern{})
+			} else {
+				ret := &capPattern{parsePattern(sc, false)}
+				if sc.Peek() != ')' {
+					panic(newError(sc.CurrentPos(), "unfinished capture"))
+				}
+				sc.Next()
+				pat.Patterns = append(pat.Patterns, ret)
+			}
+		case '*', '+', '-', '?':
+			sc.Next()
+			if len(pat.Patterns) > 0 {
+				spat, ok := pat.Patterns[len(pat.Patterns)-1].(*singlePattern)
+				if ok {
+					pat.Patterns = pat.Patterns[0 : len(pat.Patterns)-1]
+					pat.Patterns = append(pat.Patterns, &repeatPattern{ch, spat.Class})
+					continue
+				}
+			}
+			pat.Patterns = append(pat.Patterns, &singlePattern{&charClass{ch}})
+		case '$':
+			if toplevel && (sc.NextPos() == sc.Length()-1 || sc.NextPos() == EOS) {
+				pat.MustTail = true
+			} else {
+				pat.Patterns = append(pat.Patterns, &singlePattern{&charClass{ch}})
+			}
+			sc.Next()
+		case EOS:
+			sc.Next()
+			goto exit
+		default:
+			sc.Next()
+			pat.Patterns = append(pat.Patterns, &singlePattern{&charClass{ch}})
+		}
+	}
+exit:
+	return pat
+}
+
+type iptr struct {
+	insts   []inst
+	capture int
+}
+
+func compilePattern(p pattern, ps ...*iptr) []inst {
+	var ptr *iptr
+	toplevel := false
+	if len(ps) == 0 {
+		toplevel = true
+		ptr = &iptr{[]inst{inst{opSave, nil, 0, -1}}, 2}
+	} else {
+		ptr = ps[0]
+	}
+	switch pat := p.(type) {
+	case *singlePattern:
+		ptr.insts = append(ptr.insts, inst{opChar, pat.Class, -1, -1})
+	case *seqPattern:
+		for _, cp := range pat.Patterns {
+			compilePattern(cp, ptr)
+		}
+	case *repeatPattern:
+		idx := len(ptr.insts)
+		switch pat.Type {
+		case '*':
+			ptr.insts = append(ptr.insts,
+				inst{opSplit, nil, idx + 1, idx + 3},
+				inst{opChar, pat.Class, -1, -1},
+				inst{opJmp, nil, idx, -1})
+		case '+':
+			ptr.insts = append(ptr.insts,
+				inst{opChar, pat.Class, -1, -1},
+				inst{opSplit, nil, idx, idx + 2})
+		case '-':
+			ptr.insts = append(ptr.insts,
+				inst{opSplit, nil, idx + 3, idx + 1},
+				inst{opChar, pat.Class, -1, -1},
+				inst{opJmp, nil, idx, -1})
+		case '?':
+			ptr.insts = append(ptr.insts,
+				inst{opSplit, nil, idx + 1, idx + 2},
+				inst{opChar, pat.Class, -1, -1})
+		}
+	case *posCapPattern:
+		ptr.insts = append(ptr.insts, inst{opPSave, nil, ptr.capture, -1})
+		ptr.capture += 2
+	case *capPattern:
+		c0, c1 := ptr.capture, ptr.capture+1
+		ptr.capture += 2
+		ptr.insts = append(ptr.insts, inst{opSave, nil, c0, -1})
+		compilePattern(pat.Pattern, ptr)
+		ptr.insts = append(ptr.insts, inst{opSave, nil, c1, -1})
+	case *bracePattern:
+		ptr.insts = append(ptr.insts, inst{opBrace, nil, pat.Begin, pat.End})
+	case *numberPattern:
+		ptr.insts = append(ptr.insts, inst{opNumber, nil, pat.N, -1})
+	}
+	if toplevel {
+		if p.(*seqPattern).MustTail {
+			ptr.insts = append(ptr.insts, inst{opSave, nil, 1, -1}, inst{opTailMatch, nil, -1, -1})
+		}
+		ptr.insts = append(ptr.insts, inst{opSave, nil, 1, -1}, inst{opMatch, nil, -1, -1})
+	}
+	return ptr.insts
+}
+
+/* }}} parse */
+
+/* VM {{{ */
+
+// Simple recursive virtual machine based on the
+// "Regular Expression Matching: the Virtual Machine Approach" (https://swtch.com/~rsc/regexp/regexp2.html)
+func recursiveVM(src []byte, insts []inst, pc, sp int, ms ...*MatchData) (bool, int, *MatchData) {
+	var m *MatchData
+	if len(ms) == 0 {
+		m = newMatchState()
+	} else {
+		m = ms[0]
+	}
+redo:
+	inst := insts[pc]
+	switch inst.OpCode {
+	case opChar:
+		if sp >= len(src) || !inst.Class.Matches(int(src[sp])) {
+			return false, sp, m
+		}
+		pc++
+		sp++
+		goto redo
+	case opMatch:
+		return true, sp, m
+	case opTailMatch:
+		return sp >= len(src), sp, m
+	case opJmp:
+		pc = inst.Operand1
+		goto redo
+	case opSplit:
+		if ok, nsp, _ := recursiveVM(src, insts, inst.Operand1, sp, m); ok {
+			return true, nsp, m
+		}
+		pc = inst.Operand2
+		goto redo
+	case opSave:
+		s := m.setCapture(inst.Operand1, sp)
+		if ok, nsp, _ := recursiveVM(src, insts, pc+1, sp, m); ok {
+			return true, nsp, m
+		}
+		m.restoreCapture(inst.Operand1, s)
+		return false, sp, m
+	case opPSave:
+		m.addPosCapture(inst.Operand1, sp+1)
+		pc++
+		goto redo
+	case opBrace:
+		if sp >= len(src) || int(src[sp]) != inst.Operand1 {
+			return false, sp, m
+		}
+		count := 1
+		for sp = sp + 1; sp < len(src); sp++ {
+			if int(src[sp]) == inst.Operand2 {
+				count--
+			}
+			if count == 0 {
+				pc++
+				sp++
+				goto redo
+			}
+			if int(src[sp]) == inst.Operand1 {
+				count++
+			}
+		}
+		return false, sp, m
+	case opNumber:
+		idx := inst.Operand1 * 2
+		if idx >= m.CaptureLength()-1 {
+			panic(newError(_UNKNOWN, "invalid capture index"))
+		}
+		capture := src[m.Capture(idx):m.Capture(idx+1)]
+		for i := 0; i < len(capture); i++ {
+			if i+sp >= len(src) || capture[i] != src[i+sp] {
+				return false, sp, m
+			}
+		}
+		pc++
+		sp += len(capture)
+		goto redo
+	}
+	panic("should not reach here")
+}
+
+/* }}} */
+
+/* API {{{ */
+
+func Find(p string, src []byte, offset, limit int) (matches []*MatchData, err error) {
+	defer func() {
+		if v := recover(); v != nil {
+			if perr, ok := v.(*Error); ok {
+				err = perr
+			} else {
+				panic(v)
+			}
+		}
+	}()
+	pat := parsePattern(newScanner([]byte(p)), true)
+	insts := compilePattern(pat)
+	matches = []*MatchData{}
+	for sp := offset; sp <= len(src); {
+		ok, nsp, ms := recursiveVM(src, insts, 0, sp)
+		sp++
+		if ok {
+			if sp < nsp {
+				sp = nsp
+			}
+			matches = append(matches, ms)
+		}
+		if len(matches) == limit || pat.MustHead {
+			break
+		}
+	}
+	return
+}
+
+/* }}} */
diff --git a/vendor/github.com/yuin/gopher-lua/state.go b/vendor/github.com/yuin/gopher-lua/state.go
new file mode 100644
index 0000000000000..b8d084a62e8ff
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/state.go
@@ -0,0 +1,2236 @@
+package lua
+
+////////////////////////////////////////////////////////
+// This file was generated by go-inline. DO NOT EDIT. //
+////////////////////////////////////////////////////////
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"runtime"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/yuin/gopher-lua/parse"
+)
+
+const MultRet = -1
+const RegistryIndex = -10000
+const EnvironIndex = -10001
+const GlobalsIndex = -10002
+
+/* ApiError {{{ */
+
+type ApiError struct {
+	Type       ApiErrorType
+	Object     LValue
+	StackTrace string
+	// Underlying error. This attribute is set only if the Type is ApiErrorFile or ApiErrorSyntax
+	Cause error
+}
+
+func newApiError(code ApiErrorType, object LValue) *ApiError {
+	return &ApiError{code, object, "", nil}
+}
+
+func newApiErrorS(code ApiErrorType, message string) *ApiError {
+	return newApiError(code, LString(message))
+}
+
+func newApiErrorE(code ApiErrorType, err error) *ApiError {
+	return &ApiError{code, LString(err.Error()), "", err}
+}
+
+func (e *ApiError) Error() string {
+	if len(e.StackTrace) > 0 {
+		return fmt.Sprintf("%s\n%s", e.Object.String(), e.StackTrace)
+	}
+	return e.Object.String()
+}
+
+type ApiErrorType int
+
+const (
+	ApiErrorSyntax ApiErrorType = iota
+	ApiErrorFile
+	ApiErrorRun
+	ApiErrorError
+	ApiErrorPanic
+)
+
+/* }}} */
+
+/* ResumeState {{{ */
+
+type ResumeState int
+
+const (
+	ResumeOK ResumeState = iota
+	ResumeYield
+	ResumeError
+)
+
+/* }}} */
+
+/* P {{{ */
+
+type P struct {
+	Fn      LValue
+	NRet    int
+	Protect bool
+	Handler *LFunction
+}
+
+/* }}} */
+
+/* Options {{{ */
+
+// Options is a configuration that is used to create a new LState.
+type Options struct {
+	// Call stack size. This defaults to `lua.CallStackSize`.
+	CallStackSize int
+	// Data stack size. This defaults to `lua.RegistrySize`.
+	RegistrySize int
+	// Allow the registry to grow from the registry size specified up to a value of RegistryMaxSize. A value of 0
+	// indicates no growth is permitted. The registry will not shrink again after any growth.
+	RegistryMaxSize int
+	// If growth is enabled, step up by an additional `RegistryGrowStep` each time to avoid having to resize too often.
+	// This defaults to `lua.RegistryGrowStep`
+	RegistryGrowStep int
+	// Controls whether or not libraries are opened by default
+	SkipOpenLibs bool
+	// Tells whether a Go stacktrace should be included in a Lua stacktrace when panics occur.
+	IncludeGoStackTrace bool
+	// If `MinimizeStackMemory` is set, the call stack will be automatically grown or shrank up to a limit of
+	// `CallStackSize` in order to minimize memory usage. This does incur a slight performance penalty.
+	MinimizeStackMemory bool
+}
+
+/* }}} */
+
+/* Debug {{{ */
+
+type Debug struct {
+	frame           *callFrame
+	Name            string
+	What            string
+	Source          string
+	CurrentLine     int
+	NUpvalues       int
+	LineDefined     int
+	LastLineDefined int
+}
+
+/* }}} */
+
+/* callFrame {{{ */
+
+type callFrame struct {
+	Idx        int
+	Fn         *LFunction
+	Parent     *callFrame
+	Pc         int
+	Base       int
+	LocalBase  int
+	ReturnBase int
+	NArgs      int
+	NRet       int
+	TailCall   int
+}
+
+type callFrameStack interface {
+	Push(v callFrame)
+	Pop() *callFrame
+	Last() *callFrame
+
+	SetSp(sp int)
+	Sp() int
+	At(sp int) *callFrame
+
+	IsFull() bool
+	IsEmpty() bool
+
+	FreeAll()
+}
+
+type fixedCallFrameStack struct {
+	array []callFrame
+	sp    int
+}
+
+func newFixedCallFrameStack(size int) callFrameStack {
+	return &fixedCallFrameStack{
+		array: make([]callFrame, size),
+		sp:    0,
+	}
+}
+
+func (cs *fixedCallFrameStack) IsEmpty() bool { return cs.sp == 0 }
+
+func (cs *fixedCallFrameStack) IsFull() bool { return cs.sp == len(cs.array) }
+
+func (cs *fixedCallFrameStack) Clear() {
+	cs.sp = 0
+}
+
+func (cs *fixedCallFrameStack) Push(v callFrame) {
+	cs.array[cs.sp] = v
+	cs.array[cs.sp].Idx = cs.sp
+	cs.sp++
+}
+
+func (cs *fixedCallFrameStack) Sp() int {
+	return cs.sp
+}
+
+func (cs *fixedCallFrameStack) SetSp(sp int) {
+	cs.sp = sp
+}
+
+func (cs *fixedCallFrameStack) Last() *callFrame {
+	if cs.sp == 0 {
+		return nil
+	}
+	return &cs.array[cs.sp-1]
+}
+
+func (cs *fixedCallFrameStack) At(sp int) *callFrame {
+	return &cs.array[sp]
+}
+
+func (cs *fixedCallFrameStack) Pop() *callFrame {
+	cs.sp--
+	return &cs.array[cs.sp]
+}
+
+func (cs *fixedCallFrameStack) FreeAll() {
+	// nothing to do for fixed callframestack
+}
+
+// FramesPerSegment should be a power of 2 constant for performance reasons. It will allow the go compiler to change
+// the divs and mods into bitshifts. Max is 256 due to current use of uint8 to count how many frames in a segment are
+// used.
+const FramesPerSegment = 8
+
+type callFrameStackSegment struct {
+	array [FramesPerSegment]callFrame
+}
+type segIdx uint16
+type autoGrowingCallFrameStack struct {
+	segments []*callFrameStackSegment
+	segIdx   segIdx
+	// segSp is the number of frames in the current segment which are used. Full 'sp' value is segIdx * FramesPerSegment + segSp.
+	// It points to the next stack slot to use, so 0 means to use the 0th element in the segment, and a value of
+	// FramesPerSegment indicates that the segment is full and cannot accommodate another frame.
+	segSp uint8
+}
+
+var segmentPool sync.Pool
+
+func newCallFrameStackSegment() *callFrameStackSegment {
+	seg := segmentPool.Get()
+	if seg == nil {
+		return &callFrameStackSegment{}
+	}
+	return seg.(*callFrameStackSegment)
+}
+
+func freeCallFrameStackSegment(seg *callFrameStackSegment) {
+	segmentPool.Put(seg)
+}
+
+// newCallFrameStack allocates a new stack for a lua state, which will auto grow up to a max size of at least maxSize.
+// it will actually grow up to the next segment size multiple after maxSize, where the segment size is dictated by
+// FramesPerSegment.
+func newAutoGrowingCallFrameStack(maxSize int) callFrameStack {
+	cs := &autoGrowingCallFrameStack{
+		segments: make([]*callFrameStackSegment, (maxSize+(FramesPerSegment-1))/FramesPerSegment),
+		segIdx:   0,
+	}
+	cs.segments[0] = newCallFrameStackSegment()
+	return cs
+}
+
+func (cs *autoGrowingCallFrameStack) IsEmpty() bool {
+	return cs.segIdx == 0 && cs.segSp == 0
+}
+
+// IsFull returns true if the stack cannot receive any more stack pushes without overflowing
+func (cs *autoGrowingCallFrameStack) IsFull() bool {
+	return int(cs.segIdx) == len(cs.segments) && cs.segSp >= FramesPerSegment
+}
+
+func (cs *autoGrowingCallFrameStack) Clear() {
+	for i := segIdx(1); i <= cs.segIdx; i++ {
+		freeCallFrameStackSegment(cs.segments[i])
+		cs.segments[i] = nil
+	}
+	cs.segIdx = 0
+	cs.segSp = 0
+}
+
+func (cs *autoGrowingCallFrameStack) FreeAll() {
+	for i := segIdx(0); i <= cs.segIdx; i++ {
+		freeCallFrameStackSegment(cs.segments[i])
+		cs.segments[i] = nil
+	}
+}
+
+// Push pushes the passed callFrame onto the stack. it panics if the stack is full, caller should call IsFull() before
+// invoking this to avoid this.
+func (cs *autoGrowingCallFrameStack) Push(v callFrame) {
+	curSeg := cs.segments[cs.segIdx]
+	if cs.segSp >= FramesPerSegment {
+		// segment full, push new segment if allowed
+		if cs.segIdx < segIdx(len(cs.segments)-1) {
+			curSeg = newCallFrameStackSegment()
+			cs.segIdx++
+			cs.segments[cs.segIdx] = curSeg
+			cs.segSp = 0
+		} else {
+			panic("lua callstack overflow")
+		}
+	}
+	curSeg.array[cs.segSp] = v
+	curSeg.array[cs.segSp].Idx = int(cs.segSp) + FramesPerSegment*int(cs.segIdx)
+	cs.segSp++
+}
+
+// Sp retrieves the current stack depth, which is the number of frames currently pushed on the stack.
+func (cs *autoGrowingCallFrameStack) Sp() int {
+	return int(cs.segSp) + int(cs.segIdx)*FramesPerSegment
+}
+
+// SetSp can be used to rapidly unwind the stack, freeing all stack frames on the way. It should not be used to
+// allocate new stack space, use Push() for that.
+func (cs *autoGrowingCallFrameStack) SetSp(sp int) {
+	desiredSegIdx := segIdx(sp / FramesPerSegment)
+	desiredFramesInLastSeg := uint8(sp % FramesPerSegment)
+	for {
+		if cs.segIdx <= desiredSegIdx {
+			break
+		}
+		freeCallFrameStackSegment(cs.segments[cs.segIdx])
+		cs.segments[cs.segIdx] = nil
+		cs.segIdx--
+	}
+	cs.segSp = desiredFramesInLastSeg
+}
+
+func (cs *autoGrowingCallFrameStack) Last() *callFrame {
+	curSeg := cs.segments[cs.segIdx]
+	segSp := cs.segSp
+	if segSp == 0 {
+		if cs.segIdx == 0 {
+			return nil
+		}
+		curSeg = cs.segments[cs.segIdx-1]
+		segSp = FramesPerSegment
+	}
+	return &curSeg.array[segSp-1]
+}
+
+func (cs *autoGrowingCallFrameStack) At(sp int) *callFrame {
+	segIdx := segIdx(sp / FramesPerSegment)
+	frameIdx := uint8(sp % FramesPerSegment)
+	return &cs.segments[segIdx].array[frameIdx]
+}
+
+// Pop pops off the most recent stack frame and returns it
+func (cs *autoGrowingCallFrameStack) Pop() *callFrame {
+	curSeg := cs.segments[cs.segIdx]
+	if cs.segSp == 0 {
+		if cs.segIdx == 0 {
+			// stack empty
+			return nil
+		}
+		freeCallFrameStackSegment(curSeg)
+		cs.segments[cs.segIdx] = nil
+		cs.segIdx--
+		cs.segSp = FramesPerSegment
+		curSeg = cs.segments[cs.segIdx]
+	}
+	cs.segSp--
+	return &curSeg.array[cs.segSp]
+}
+
+/* }}} */
+
+/* registry {{{ */
+
+type registryHandler interface {
+	registryOverflow()
+}
+type registry struct {
+	array   []LValue
+	top     int
+	growBy  int
+	maxSize int
+	alloc   *allocator
+	handler registryHandler
+}
+
+func newRegistry(handler registryHandler, initialSize int, growBy int, maxSize int, alloc *allocator) *registry {
+	return &registry{make([]LValue, initialSize), 0, growBy, maxSize, alloc, handler}
+}
+
+func (rg *registry) checkSize(requiredSize int) { // +inline-start
+	if requiredSize > cap(rg.array) {
+		rg.resize(requiredSize)
+	}
+} // +inline-end
+
+func (rg *registry) resize(requiredSize int) { // +inline-start
+	newSize := requiredSize + rg.growBy // give some padding
+	if newSize > rg.maxSize {
+		newSize = rg.maxSize
+	}
+	if newSize < requiredSize {
+		rg.handler.registryOverflow()
+		return
+	}
+	rg.forceResize(newSize)
+} // +inline-end
+
+func (rg *registry) forceResize(newSize int) {
+	newSlice := make([]LValue, newSize)
+	copy(newSlice, rg.array[:rg.top]) // should we copy the area beyond top? there shouldn't be any valid values there so it shouldn't be necessary.
+	rg.array = newSlice
+}
+func (rg *registry) SetTop(top int) {
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := top
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	oldtop := rg.top
+	rg.top = top
+	for i := oldtop; i < rg.top; i++ {
+		rg.array[i] = LNil
+	}
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+	//for i := rg.top; i < oldtop; i++ {
+	//	rg.array[i] = LNil
+	//}
+}
+
+func (rg *registry) Top() int {
+	return rg.top
+}
+
+func (rg *registry) Push(v LValue) {
+	newSize := rg.top + 1
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := newSize
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	rg.array[rg.top] = v
+	rg.top++
+}
+
+func (rg *registry) Pop() LValue {
+	v := rg.array[rg.top-1]
+	rg.array[rg.top-1] = LNil
+	rg.top--
+	return v
+}
+
+func (rg *registry) Get(reg int) LValue {
+	return rg.array[reg]
+}
+
+// CopyRange will move a section of values from index `start` to index `regv`
+// It will move `n` values.
+// `limit` specifies the maximum end range that can be copied from. If it's set to -1, then it defaults to stopping at
+// the top of the registry (values beyond the top are not initialized, so if specifying an alternative `limit` you should
+// pass a value <= rg.top.
+// If start+n is beyond the limit, then nil values will be copied to the destination slots.
+// After the copy, the registry is truncated to be at the end of the copied range, ie the original of the copied values
+// are nilled out. (So top will be regv+n)
+// CopyRange should ideally be renamed to MoveRange.
+func (rg *registry) CopyRange(regv, start, limit, n int) { // +inline-start
+	newSize := regv + n
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := newSize
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	if limit == -1 || limit > rg.top {
+		limit = rg.top
+	}
+	for i := 0; i < n; i++ {
+		srcIdx := start + i
+		if srcIdx >= limit || srcIdx < 0 {
+			rg.array[regv+i] = LNil
+		} else {
+			rg.array[regv+i] = rg.array[srcIdx]
+		}
+	}
+
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	oldtop := rg.top
+	rg.top = regv + n
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+} // +inline-end
+
+// FillNil fills the registry with nil values from regm to regm+n and then sets the registry top to regm+n
+func (rg *registry) FillNil(regm, n int) { // +inline-start
+	newSize := regm + n
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := newSize
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	for i := 0; i < n; i++ {
+		rg.array[regm+i] = LNil
+	}
+	// values beyond top don't need to be valid LValues, so setting them to nil is fine
+	// setting them to nil rather than LNil lets us invoke the golang memclr opto
+	oldtop := rg.top
+	rg.top = regm + n
+	if rg.top < oldtop {
+		nilRange := rg.array[rg.top:oldtop]
+		for i := range nilRange {
+			nilRange[i] = nil
+		}
+	}
+} // +inline-end
+
+func (rg *registry) Insert(value LValue, reg int) {
+	top := rg.Top()
+	if reg >= top {
+		rg.Set(reg, value)
+		return
+	}
+	top--
+	for ; top >= reg; top-- {
+		// FIXME consider using copy() here if Insert() is called enough
+		rg.Set(top+1, rg.Get(top))
+	}
+	rg.Set(reg, value)
+}
+
+func (rg *registry) Set(reg int, val LValue) {
+	newSize := reg + 1
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := newSize
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	rg.array[reg] = val
+	if reg >= rg.top {
+		rg.top = reg + 1
+	}
+}
+
+func (rg *registry) SetNumber(reg int, val LNumber) {
+	newSize := reg + 1
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+	{
+		requiredSize := newSize
+		if requiredSize > cap(rg.array) {
+			rg.resize(requiredSize)
+		}
+	}
+	rg.array[reg] = rg.alloc.LNumber2I(val)
+	if reg >= rg.top {
+		rg.top = reg + 1
+	}
+}
+
+func (rg *registry) IsFull() bool {
+	return rg.top >= cap(rg.array)
+}
+
+/* }}} */
+
+/* Global {{{ */
+
+func newGlobal() *Global {
+	return &Global{
+		MainThread: nil,
+		Registry:   newLTable(0, 32),
+		Global:     newLTable(0, 64),
+		builtinMts: make(map[int]LValue),
+		tempFiles:  make([]*os.File, 0, 10),
+	}
+}
+
+/* }}} */
+
+/* package local methods {{{ */
+
+func panicWithTraceback(L *LState) {
+	err := newApiError(ApiErrorRun, L.Get(-1))
+	err.StackTrace = L.stackTrace(0)
+	panic(err)
+}
+
+func panicWithoutTraceback(L *LState) {
+	err := newApiError(ApiErrorRun, L.Get(-1))
+	panic(err)
+}
+
+func newLState(options Options) *LState {
+	al := newAllocator(32)
+	ls := &LState{
+		G:       newGlobal(),
+		Parent:  nil,
+		Panic:   panicWithTraceback,
+		Dead:    false,
+		Options: options,
+
+		stop:         0,
+		alloc:        al,
+		currentFrame: nil,
+		wrapped:      false,
+		uvcache:      nil,
+		hasErrorFunc: false,
+		mainLoop:     mainLoop,
+		ctx:          nil,
+	}
+	if options.MinimizeStackMemory {
+		ls.stack = newAutoGrowingCallFrameStack(options.CallStackSize)
+	} else {
+		ls.stack = newFixedCallFrameStack(options.CallStackSize)
+	}
+	ls.reg = newRegistry(ls, options.RegistrySize, options.RegistryGrowStep, options.RegistryMaxSize, al)
+	ls.Env = ls.G.Global
+	return ls
+}
+
+func (ls *LState) printReg() {
+	println("-------------------------")
+	println("thread:", ls)
+	println("top:", ls.reg.Top())
+	if ls.currentFrame != nil {
+		println("function base:", ls.currentFrame.Base)
+		println("return base:", ls.currentFrame.ReturnBase)
+	} else {
+		println("(vm not started)")
+	}
+	println("local base:", ls.currentLocalBase())
+	for i := 0; i < ls.reg.Top(); i++ {
+		println(i, ls.reg.Get(i).String())
+	}
+	println("-------------------------")
+}
+
+func (ls *LState) printCallStack() {
+	println("-------------------------")
+	for i := 0; i < ls.stack.Sp(); i++ {
+		print(i)
+		print(" ")
+		frame := ls.stack.At(i)
+		if frame == nil {
+			break
+		}
+		if frame.Fn.IsG {
+			println("IsG:", true, "Frame:", frame, "Fn:", frame.Fn)
+		} else {
+			println("IsG:", false, "Frame:", frame, "Fn:", frame.Fn, "pc:", frame.Pc)
+		}
+	}
+	println("-------------------------")
+}
+
+func (ls *LState) closeAllUpvalues() { // +inline-start
+	for cf := ls.currentFrame; cf != nil; cf = cf.Parent {
+		if !cf.Fn.IsG {
+			ls.closeUpvalues(cf.LocalBase)
+		}
+	}
+} // +inline-end
+
+func (ls *LState) raiseError(level int, format string, args ...interface{}) {
+	if !ls.hasErrorFunc {
+		ls.closeAllUpvalues()
+	}
+	message := format
+	if len(args) > 0 {
+		message = fmt.Sprintf(format, args...)
+	}
+	if level > 0 {
+		message = fmt.Sprintf("%v %v", ls.where(level-1, true), message)
+	}
+	if ls.reg.IsFull() {
+		// if the registry is full then it won't be possible to push a value, in this case, force a larger size
+		ls.reg.forceResize(ls.reg.Top() + 1)
+	}
+	ls.reg.Push(LString(message))
+	ls.Panic(ls)
+}
+
+func (ls *LState) findLocal(frame *callFrame, no int) string {
+	fn := frame.Fn
+	if !fn.IsG {
+		if name, ok := fn.LocalName(no, frame.Pc-1); ok {
+			return name
+		}
+	}
+	var top int
+	if ls.currentFrame == frame {
+		top = ls.reg.Top()
+	} else if frame.Idx+1 < ls.stack.Sp() {
+		top = ls.stack.At(frame.Idx + 1).Base
+	} else {
+		return ""
+	}
+	if top-frame.LocalBase >= no {
+		return "(*temporary)"
+	}
+	return ""
+}
+
+func (ls *LState) where(level int, skipg bool) string {
+	dbg, ok := ls.GetStack(level)
+	if !ok {
+		return ""
+	}
+	cf := dbg.frame
+	proto := cf.Fn.Proto
+	sourcename := "[G]"
+	if proto != nil {
+		sourcename = proto.SourceName
+	} else if skipg {
+		return ls.where(level+1, skipg)
+	}
+	line := ""
+	if proto != nil {
+		line = fmt.Sprintf("%v:", proto.DbgSourcePositions[cf.Pc-1])
+	}
+	return fmt.Sprintf("%v:%v", sourcename, line)
+}
+
+func (ls *LState) stackTrace(level int) string {
+	buf := []string{}
+	header := "stack traceback:"
+	if ls.currentFrame != nil {
+		i := 0
+		for dbg, ok := ls.GetStack(i); ok; dbg, ok = ls.GetStack(i) {
+			cf := dbg.frame
+			buf = append(buf, fmt.Sprintf("\t%v in %v", ls.Where(i), ls.formattedFrameFuncName(cf)))
+			if !cf.Fn.IsG && cf.TailCall > 0 {
+				for tc := cf.TailCall; tc > 0; tc-- {
+					buf = append(buf, "\t(tailcall): ?")
+					i++
+				}
+			}
+			i++
+		}
+	}
+	buf = append(buf, fmt.Sprintf("\t%v: %v", "[G]", "?"))
+	buf = buf[intMax(0, intMin(level, len(buf))):len(buf)]
+	if len(buf) > 20 {
+		newbuf := make([]string, 0, 20)
+		newbuf = append(newbuf, buf[0:7]...)
+		newbuf = append(newbuf, "\t...")
+		newbuf = append(newbuf, buf[len(buf)-7:len(buf)]...)
+		buf = newbuf
+	}
+	return fmt.Sprintf("%s\n%s", header, strings.Join(buf, "\n"))
+}
+
+func (ls *LState) formattedFrameFuncName(fr *callFrame) string {
+	name, ischunk := ls.frameFuncName(fr)
+	if ischunk {
+		return name
+	}
+	if name[0] != '(' && name[0] != '<' {
+		return fmt.Sprintf("function '%s'", name)
+	}
+	return fmt.Sprintf("function %s", name)
+}
+
+func (ls *LState) rawFrameFuncName(fr *callFrame) string {
+	name, _ := ls.frameFuncName(fr)
+	return name
+}
+
+func (ls *LState) frameFuncName(fr *callFrame) (string, bool) {
+	frame := fr.Parent
+	if frame == nil {
+		if ls.Parent == nil {
+			return "main chunk", true
+		} else {
+			return "corountine", true
+		}
+	}
+	if !frame.Fn.IsG {
+		pc := frame.Pc - 1
+		for _, call := range frame.Fn.Proto.DbgCalls {
+			if call.Pc == pc {
+				name := call.Name
+				if (name == "?" || fr.TailCall > 0) && !fr.Fn.IsG {
+					name = fmt.Sprintf("<%v:%v>", fr.Fn.Proto.SourceName, fr.Fn.Proto.LineDefined)
+				}
+				return name, false
+			}
+		}
+	}
+	if !fr.Fn.IsG {
+		return fmt.Sprintf("<%v:%v>", fr.Fn.Proto.SourceName, fr.Fn.Proto.LineDefined), false
+	}
+	return "(anonymous)", false
+}
+
+func (ls *LState) isStarted() bool {
+	return ls.currentFrame != nil
+}
+
+func (ls *LState) kill() {
+	ls.Dead = true
+}
+
+func (ls *LState) indexToReg(idx int) int {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		return base + idx - 1
+	} else if idx == 0 {
+		return -1
+	} else {
+		tidx := ls.reg.Top() + idx
+		if tidx < base {
+			return -1
+		}
+		return tidx
+	}
+}
+
+func (ls *LState) currentLocalBase() int {
+	base := 0
+	if ls.currentFrame != nil {
+		base = ls.currentFrame.LocalBase
+	}
+	return base
+}
+
+func (ls *LState) currentEnv() *LTable {
+	return ls.Env
+	/*
+		if ls.currentFrame == nil {
+			return ls.Env
+		}
+		return ls.currentFrame.Fn.Env
+	*/
+}
+
+func (ls *LState) rkValue(idx int) LValue {
+	/*
+		if OpIsK(idx) {
+			return ls.currentFrame.Fn.Proto.Constants[opIndexK(idx)]
+		}
+		return ls.reg.Get(ls.currentFrame.LocalBase + idx)
+	*/
+	if (idx & opBitRk) != 0 {
+		return ls.currentFrame.Fn.Proto.Constants[idx & ^opBitRk]
+	}
+	return ls.reg.array[ls.currentFrame.LocalBase+idx]
+}
+
+func (ls *LState) rkString(idx int) string {
+	if (idx & opBitRk) != 0 {
+		return ls.currentFrame.Fn.Proto.stringConstants[idx & ^opBitRk]
+	}
+	return string(ls.reg.array[ls.currentFrame.LocalBase+idx].(LString))
+}
+
+func (ls *LState) closeUpvalues(idx int) { // +inline-start
+	if ls.uvcache != nil {
+		var prev *Upvalue
+		for uv := ls.uvcache; uv != nil; uv = uv.next {
+			if uv.index >= idx {
+				if prev != nil {
+					prev.next = nil
+				} else {
+					ls.uvcache = nil
+				}
+				uv.Close()
+			}
+			prev = uv
+		}
+	}
+} // +inline-end
+
+func (ls *LState) findUpvalue(idx int) *Upvalue {
+	var prev *Upvalue
+	var next *Upvalue
+	if ls.uvcache != nil {
+		for uv := ls.uvcache; uv != nil; uv = uv.next {
+			if uv.index == idx {
+				return uv
+			}
+			if uv.index > idx {
+				next = uv
+				break
+			}
+			prev = uv
+		}
+	}
+	uv := &Upvalue{reg: ls.reg, index: idx, closed: false}
+	if prev != nil {
+		prev.next = uv
+	} else {
+		ls.uvcache = uv
+	}
+	if next != nil {
+		uv.next = next
+	}
+	return uv
+}
+
+func (ls *LState) metatable(lvalue LValue, rawget bool) LValue {
+	var metatable LValue = LNil
+	switch obj := lvalue.(type) {
+	case *LTable:
+		metatable = obj.Metatable
+	case *LUserData:
+		metatable = obj.Metatable
+	default:
+		if table, ok := ls.G.builtinMts[int(obj.Type())]; ok {
+			metatable = table
+		}
+	}
+
+	if !rawget && metatable != LNil {
+		oldmt := metatable
+		if tb, ok := metatable.(*LTable); ok {
+			metatable = tb.RawGetString("__metatable")
+			if metatable == LNil {
+				metatable = oldmt
+			}
+		}
+	}
+
+	return metatable
+}
+
+func (ls *LState) metaOp1(lvalue LValue, event string) LValue {
+	if mt := ls.metatable(lvalue, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			return tb.RawGetString(event)
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) metaOp2(value1, value2 LValue, event string) LValue {
+	if mt := ls.metatable(value1, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			if ret := tb.RawGetString(event); ret != LNil {
+				return ret
+			}
+		}
+	}
+	if mt := ls.metatable(value2, true); mt != LNil {
+		if tb, ok := mt.(*LTable); ok {
+			return tb.RawGetString(event)
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) metaCall(lvalue LValue) (*LFunction, bool) {
+	if fn, ok := lvalue.(*LFunction); ok {
+		return fn, false
+	}
+	if fn, ok := ls.metaOp1(lvalue, "__call").(*LFunction); ok {
+		return fn, true
+	}
+	return nil, false
+}
+
+func (ls *LState) initCallFrame(cf *callFrame) { // +inline-start
+	if cf.Fn.IsG {
+		ls.reg.SetTop(cf.LocalBase + cf.NArgs)
+	} else {
+		proto := cf.Fn.Proto
+		nargs := cf.NArgs
+		np := int(proto.NumParameters)
+		newSize := cf.LocalBase + np
+		// this section is inlined by go-inline
+		// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+		{
+			rg := ls.reg
+			requiredSize := newSize
+			if requiredSize > cap(rg.array) {
+				rg.resize(requiredSize)
+			}
+		}
+		for i := nargs; i < np; i++ {
+			ls.reg.array[cf.LocalBase+i] = LNil
+			nargs = np
+		}
+
+		if (proto.IsVarArg & VarArgIsVarArg) == 0 {
+			if nargs < int(proto.NumUsedRegisters) {
+				nargs = int(proto.NumUsedRegisters)
+			}
+			newSize = cf.LocalBase + nargs
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+			{
+				rg := ls.reg
+				requiredSize := newSize
+				if requiredSize > cap(rg.array) {
+					rg.resize(requiredSize)
+				}
+			}
+			for i := np; i < nargs; i++ {
+				ls.reg.array[cf.LocalBase+i] = LNil
+			}
+			ls.reg.top = cf.LocalBase + int(proto.NumUsedRegisters)
+		} else {
+			/* swap vararg positions:
+					   closure
+					   namedparam1 <- lbase
+					   namedparam2
+					   vararg1
+					   vararg2
+
+			           TO
+
+					   closure
+					   nil
+					   nil
+					   vararg1
+					   vararg2
+					   namedparam1 <- lbase
+					   namedparam2
+			*/
+			nvarargs := nargs - np
+			if nvarargs < 0 {
+				nvarargs = 0
+			}
+
+			ls.reg.SetTop(cf.LocalBase + nargs + np)
+			for i := 0; i < np; i++ {
+				//ls.reg.Set(cf.LocalBase+nargs+i, ls.reg.Get(cf.LocalBase+i))
+				ls.reg.array[cf.LocalBase+nargs+i] = ls.reg.array[cf.LocalBase+i]
+				//ls.reg.Set(cf.LocalBase+i, LNil)
+				ls.reg.array[cf.LocalBase+i] = LNil
+			}
+
+			if CompatVarArg {
+				ls.reg.SetTop(cf.LocalBase + nargs + np + 1)
+				if (proto.IsVarArg & VarArgNeedsArg) != 0 {
+					argtb := newLTable(nvarargs, 0)
+					for i := 0; i < nvarargs; i++ {
+						argtb.RawSetInt(i+1, ls.reg.Get(cf.LocalBase+np+i))
+					}
+					argtb.RawSetString("n", LNumber(nvarargs))
+					//ls.reg.Set(cf.LocalBase+nargs+np, argtb)
+					ls.reg.array[cf.LocalBase+nargs+np] = argtb
+				} else {
+					ls.reg.array[cf.LocalBase+nargs+np] = LNil
+				}
+			}
+			cf.LocalBase += nargs
+			maxreg := cf.LocalBase + int(proto.NumUsedRegisters)
+			ls.reg.SetTop(maxreg)
+		}
+	}
+} // +inline-end
+
+func (ls *LState) pushCallFrame(cf callFrame, fn LValue, meta bool) { // +inline-start
+	if meta {
+		cf.NArgs++
+		ls.reg.Insert(fn, cf.LocalBase)
+	}
+	if cf.Fn == nil {
+		ls.RaiseError("attempt to call a non-function object")
+	}
+	if ls.stack.IsFull() {
+		ls.RaiseError("stack overflow")
+	}
+	ls.stack.Push(cf)
+	newcf := ls.stack.Last()
+	// this section is inlined by go-inline
+	// source function is 'func (ls *LState) initCallFrame(cf *callFrame) ' in '_state.go'
+	{
+		cf := newcf
+		if cf.Fn.IsG {
+			ls.reg.SetTop(cf.LocalBase + cf.NArgs)
+		} else {
+			proto := cf.Fn.Proto
+			nargs := cf.NArgs
+			np := int(proto.NumParameters)
+			newSize := cf.LocalBase + np
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+			{
+				rg := ls.reg
+				requiredSize := newSize
+				if requiredSize > cap(rg.array) {
+					rg.resize(requiredSize)
+				}
+			}
+			for i := nargs; i < np; i++ {
+				ls.reg.array[cf.LocalBase+i] = LNil
+				nargs = np
+			}
+
+			if (proto.IsVarArg & VarArgIsVarArg) == 0 {
+				if nargs < int(proto.NumUsedRegisters) {
+					nargs = int(proto.NumUsedRegisters)
+				}
+				newSize = cf.LocalBase + nargs
+				// this section is inlined by go-inline
+				// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+				{
+					rg := ls.reg
+					requiredSize := newSize
+					if requiredSize > cap(rg.array) {
+						rg.resize(requiredSize)
+					}
+				}
+				for i := np; i < nargs; i++ {
+					ls.reg.array[cf.LocalBase+i] = LNil
+				}
+				ls.reg.top = cf.LocalBase + int(proto.NumUsedRegisters)
+			} else {
+				/* swap vararg positions:
+						   closure
+						   namedparam1 <- lbase
+						   namedparam2
+						   vararg1
+						   vararg2
+
+				           TO
+
+						   closure
+						   nil
+						   nil
+						   vararg1
+						   vararg2
+						   namedparam1 <- lbase
+						   namedparam2
+				*/
+				nvarargs := nargs - np
+				if nvarargs < 0 {
+					nvarargs = 0
+				}
+
+				ls.reg.SetTop(cf.LocalBase + nargs + np)
+				for i := 0; i < np; i++ {
+					//ls.reg.Set(cf.LocalBase+nargs+i, ls.reg.Get(cf.LocalBase+i))
+					ls.reg.array[cf.LocalBase+nargs+i] = ls.reg.array[cf.LocalBase+i]
+					//ls.reg.Set(cf.LocalBase+i, LNil)
+					ls.reg.array[cf.LocalBase+i] = LNil
+				}
+
+				if CompatVarArg {
+					ls.reg.SetTop(cf.LocalBase + nargs + np + 1)
+					if (proto.IsVarArg & VarArgNeedsArg) != 0 {
+						argtb := newLTable(nvarargs, 0)
+						for i := 0; i < nvarargs; i++ {
+							argtb.RawSetInt(i+1, ls.reg.Get(cf.LocalBase+np+i))
+						}
+						argtb.RawSetString("n", LNumber(nvarargs))
+						//ls.reg.Set(cf.LocalBase+nargs+np, argtb)
+						ls.reg.array[cf.LocalBase+nargs+np] = argtb
+					} else {
+						ls.reg.array[cf.LocalBase+nargs+np] = LNil
+					}
+				}
+				cf.LocalBase += nargs
+				maxreg := cf.LocalBase + int(proto.NumUsedRegisters)
+				ls.reg.SetTop(maxreg)
+			}
+		}
+	}
+	ls.currentFrame = newcf
+} // +inline-end
+
+func (ls *LState) callR(nargs, nret, rbase int) {
+	base := ls.reg.Top() - nargs - 1
+	if rbase < 0 {
+		rbase = base
+	}
+	lv := ls.reg.Get(base)
+	fn, meta := ls.metaCall(lv)
+	ls.pushCallFrame(callFrame{
+		Fn:         fn,
+		Pc:         0,
+		Base:       base,
+		LocalBase:  base + 1,
+		ReturnBase: rbase,
+		NArgs:      nargs,
+		NRet:       nret,
+		Parent:     ls.currentFrame,
+		TailCall:   0,
+	}, lv, meta)
+	if ls.G.MainThread == nil {
+		ls.G.MainThread = ls
+		ls.G.CurrentThread = ls
+		ls.mainLoop(ls, nil)
+	} else {
+		ls.mainLoop(ls, ls.currentFrame)
+	}
+	if nret != MultRet {
+		ls.reg.SetTop(rbase + nret)
+	}
+}
+
+func (ls *LState) getField(obj LValue, key LValue) LValue {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			ret := tb.RawGet(key)
+			if ret != LNil {
+				return ret
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__index")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key.String())
+			}
+			return LNil
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(key)
+			ls.Call(2, 1)
+			return ls.reg.Pop()
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in gettable")
+	return nil
+}
+
+func (ls *LState) getFieldString(obj LValue, key string) LValue {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			ret := tb.RawGetString(key)
+			if ret != LNil {
+				return ret
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__index")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key)
+			}
+			return LNil
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(LString(key))
+			ls.Call(2, 1)
+			return ls.reg.Pop()
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in gettable")
+	return nil
+}
+
+func (ls *LState) setField(obj LValue, key LValue, value LValue) {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			if tb.RawGet(key) != LNil {
+				ls.RawSet(tb, key, value)
+				return
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__newindex")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key.String())
+			}
+			ls.RawSet(tb, key, value)
+			return
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(key)
+			ls.reg.Push(value)
+			ls.Call(3, 0)
+			return
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in settable")
+}
+
+func (ls *LState) setFieldString(obj LValue, key string, value LValue) {
+	curobj := obj
+	for i := 0; i < MaxTableGetLoop; i++ {
+		tb, istable := curobj.(*LTable)
+		if istable {
+			if tb.RawGetString(key) != LNil {
+				tb.RawSetString(key, value)
+				return
+			}
+		}
+		metaindex := ls.metaOp1(curobj, "__newindex")
+		if metaindex == LNil {
+			if !istable {
+				ls.RaiseError("attempt to index a non-table object(%v) with key '%s'", curobj.Type().String(), key)
+			}
+			tb.RawSetString(key, value)
+			return
+		}
+		if metaindex.Type() == LTFunction {
+			ls.reg.Push(metaindex)
+			ls.reg.Push(curobj)
+			ls.reg.Push(LString(key))
+			ls.reg.Push(value)
+			ls.Call(3, 0)
+			return
+		} else {
+			curobj = metaindex
+		}
+	}
+	ls.RaiseError("too many recursions in settable")
+}
+
+/* }}} */
+
+/* api methods {{{ */
+
+func NewState(opts ...Options) *LState {
+	var ls *LState
+	if len(opts) == 0 {
+		ls = newLState(Options{
+			CallStackSize: CallStackSize,
+			RegistrySize:  RegistrySize,
+		})
+		ls.OpenLibs()
+	} else {
+		if opts[0].CallStackSize < 1 {
+			opts[0].CallStackSize = CallStackSize
+		}
+		if opts[0].RegistrySize < 128 {
+			opts[0].RegistrySize = RegistrySize
+		}
+		if opts[0].RegistryMaxSize < opts[0].RegistrySize {
+			opts[0].RegistryMaxSize = 0 // disable growth if max size is smaller than initial size
+		} else {
+			// if growth enabled, grow step is set
+			if opts[0].RegistryGrowStep < 1 {
+				opts[0].RegistryGrowStep = RegistryGrowStep
+			}
+		}
+		ls = newLState(opts[0])
+		if !opts[0].SkipOpenLibs {
+			ls.OpenLibs()
+		}
+	}
+	return ls
+}
+
+func (ls *LState) IsClosed() bool {
+	return ls.stack == nil
+}
+
+func (ls *LState) Close() {
+	atomic.AddInt32(&ls.stop, 1)
+	for _, file := range ls.G.tempFiles {
+		// ignore errors in these operations
+		file.Close()
+		os.Remove(file.Name())
+	}
+	ls.stack.FreeAll()
+	ls.stack = nil
+}
+
+/* registry operations {{{ */
+
+func (ls *LState) GetTop() int {
+	return ls.reg.Top() - ls.currentLocalBase()
+}
+
+func (ls *LState) SetTop(idx int) {
+	base := ls.currentLocalBase()
+	newtop := ls.indexToReg(idx) + 1
+	if newtop < base {
+		ls.reg.SetTop(base)
+	} else {
+		ls.reg.SetTop(newtop)
+	}
+}
+
+func (ls *LState) Replace(idx int, value LValue) {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		reg := base + idx - 1
+		if reg < ls.reg.Top() {
+			ls.reg.Set(reg, value)
+		}
+	} else if idx == 0 {
+	} else if idx > RegistryIndex {
+		if tidx := ls.reg.Top() + idx; tidx >= base {
+			ls.reg.Set(tidx, value)
+		}
+	} else {
+		switch idx {
+		case RegistryIndex:
+			if tb, ok := value.(*LTable); ok {
+				ls.G.Registry = tb
+			} else {
+				ls.RaiseError("registry must be a table(%v)", value.Type().String())
+			}
+		case EnvironIndex:
+			if ls.currentFrame == nil {
+				ls.RaiseError("no calling environment")
+			}
+			if tb, ok := value.(*LTable); ok {
+				ls.currentFrame.Fn.Env = tb
+			} else {
+				ls.RaiseError("environment must be a table(%v)", value.Type().String())
+			}
+		case GlobalsIndex:
+			if tb, ok := value.(*LTable); ok {
+				ls.G.Global = tb
+			} else {
+				ls.RaiseError("_G must be a table(%v)", value.Type().String())
+			}
+		default:
+			fn := ls.currentFrame.Fn
+			index := GlobalsIndex - idx - 1
+			if index < len(fn.Upvalues) {
+				fn.Upvalues[index].SetValue(value)
+			}
+		}
+	}
+}
+
+func (ls *LState) Get(idx int) LValue {
+	base := ls.currentLocalBase()
+	if idx > 0 {
+		reg := base + idx - 1
+		if reg < ls.reg.Top() {
+			return ls.reg.Get(reg)
+		}
+		return LNil
+	} else if idx == 0 {
+		return LNil
+	} else if idx > RegistryIndex {
+		tidx := ls.reg.Top() + idx
+		if tidx < base {
+			return LNil
+		}
+		return ls.reg.Get(tidx)
+	} else {
+		switch idx {
+		case RegistryIndex:
+			return ls.G.Registry
+		case EnvironIndex:
+			if ls.currentFrame == nil {
+				return ls.Env
+			}
+			return ls.currentFrame.Fn.Env
+		case GlobalsIndex:
+			return ls.G.Global
+		default:
+			fn := ls.currentFrame.Fn
+			index := GlobalsIndex - idx - 1
+			if index < len(fn.Upvalues) {
+				return fn.Upvalues[index].Value()
+			}
+			return LNil
+		}
+	}
+	return LNil
+}
+
+func (ls *LState) Push(value LValue) {
+	ls.reg.Push(value)
+}
+
+func (ls *LState) Pop(n int) {
+	for i := 0; i < n; i++ {
+		if ls.GetTop() == 0 {
+			ls.RaiseError("register underflow")
+		}
+		ls.reg.Pop()
+	}
+}
+
+func (ls *LState) Insert(value LValue, index int) {
+	reg := ls.indexToReg(index)
+	top := ls.reg.Top()
+	if reg >= top {
+		ls.reg.Set(reg, value)
+		return
+	}
+	if reg <= ls.currentLocalBase() {
+		reg = ls.currentLocalBase()
+	}
+	top--
+	for ; top >= reg; top-- {
+		ls.reg.Set(top+1, ls.reg.Get(top))
+	}
+	ls.reg.Set(reg, value)
+}
+
+func (ls *LState) Remove(index int) {
+	reg := ls.indexToReg(index)
+	top := ls.reg.Top()
+	switch {
+	case reg >= top:
+		return
+	case reg < ls.currentLocalBase():
+		return
+	case reg == top-1:
+		ls.Pop(1)
+		return
+	}
+	for i := reg; i < top-1; i++ {
+		ls.reg.Set(i, ls.reg.Get(i+1))
+	}
+	ls.reg.SetTop(top - 1)
+}
+
+/* }}} */
+
+/* object allocation {{{ */
+
+func (ls *LState) NewTable() *LTable {
+	return newLTable(defaultArrayCap, defaultHashCap)
+}
+
+func (ls *LState) CreateTable(acap, hcap int) *LTable {
+	return newLTable(acap, hcap)
+}
+
+// NewThread returns a new LState that shares with the original state all global objects.
+// If the original state has context.Context, the new state has a new child context of the original state and this function returns its cancel function.
+func (ls *LState) NewThread() (*LState, context.CancelFunc) {
+	thread := newLState(ls.Options)
+	thread.G = ls.G
+	thread.Env = ls.Env
+	var f context.CancelFunc = nil
+	if ls.ctx != nil {
+		thread.mainLoop = mainLoopWithContext
+		thread.ctx, f = context.WithCancel(ls.ctx)
+	}
+	return thread, f
+}
+
+func (ls *LState) NewFunctionFromProto(proto *FunctionProto) *LFunction {
+	return newLFunctionL(proto, ls.Env, int(proto.NumUpvalues))
+}
+
+func (ls *LState) NewUserData() *LUserData {
+	return &LUserData{
+		Env:       ls.currentEnv(),
+		Metatable: LNil,
+	}
+}
+
+func (ls *LState) NewFunction(fn LGFunction) *LFunction {
+	return newLFunctionG(fn, ls.currentEnv(), 0)
+}
+
+func (ls *LState) NewClosure(fn LGFunction, upvalues ...LValue) *LFunction {
+	cl := newLFunctionG(fn, ls.currentEnv(), len(upvalues))
+	for i, lv := range upvalues {
+		cl.Upvalues[i] = &Upvalue{}
+		cl.Upvalues[i].Close()
+		cl.Upvalues[i].SetValue(lv)
+	}
+	return cl
+}
+
+/* }}} */
+
+/* toType {{{ */
+
+func (ls *LState) ToBool(n int) bool {
+	return LVAsBool(ls.Get(n))
+}
+
+func (ls *LState) ToInt(n int) int {
+	if lv, ok := ls.Get(n).(LNumber); ok {
+		return int(lv)
+	}
+	if lv, ok := ls.Get(n).(LString); ok {
+		if num, err := parseNumber(string(lv)); err == nil {
+			return int(num)
+		}
+	}
+	return 0
+}
+
+func (ls *LState) ToInt64(n int) int64 {
+	if lv, ok := ls.Get(n).(LNumber); ok {
+		return int64(lv)
+	}
+	if lv, ok := ls.Get(n).(LString); ok {
+		if num, err := parseNumber(string(lv)); err == nil {
+			return int64(num)
+		}
+	}
+	return 0
+}
+
+func (ls *LState) ToNumber(n int) LNumber {
+	return LVAsNumber(ls.Get(n))
+}
+
+func (ls *LState) ToString(n int) string {
+	return LVAsString(ls.Get(n))
+}
+
+func (ls *LState) ToTable(n int) *LTable {
+	if lv, ok := ls.Get(n).(*LTable); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToFunction(n int) *LFunction {
+	if lv, ok := ls.Get(n).(*LFunction); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToUserData(n int) *LUserData {
+	if lv, ok := ls.Get(n).(*LUserData); ok {
+		return lv
+	}
+	return nil
+}
+
+func (ls *LState) ToThread(n int) *LState {
+	if lv, ok := ls.Get(n).(*LState); ok {
+		return lv
+	}
+	return nil
+}
+
+/* }}} */
+
+/* error & debug operations {{{ */
+
+func (ls *LState) registryOverflow() {
+	ls.RaiseError("registry overflow")
+}
+
+// This function is equivalent to luaL_error( http://www.lua.org/manual/5.1/manual.html#luaL_error ).
+func (ls *LState) RaiseError(format string, args ...interface{}) {
+	ls.raiseError(1, format, args...)
+}
+
+// This function is equivalent to lua_error( http://www.lua.org/manual/5.1/manual.html#lua_error ).
+func (ls *LState) Error(lv LValue, level int) {
+	if str, ok := lv.(LString); ok {
+		ls.raiseError(level, string(str))
+	} else {
+		if !ls.hasErrorFunc {
+			ls.closeAllUpvalues()
+		}
+		ls.Push(lv)
+		ls.Panic(ls)
+	}
+}
+
+func (ls *LState) GetInfo(what string, dbg *Debug, fn LValue) (LValue, error) {
+	if !strings.HasPrefix(what, ">") {
+		fn = dbg.frame.Fn
+	} else {
+		what = what[1:]
+	}
+	f, ok := fn.(*LFunction)
+	if !ok {
+		return LNil, newApiErrorS(ApiErrorRun, "can not get debug info(an object in not a function)")
+	}
+
+	retfn := false
+	for _, c := range what {
+		switch c {
+		case 'f':
+			retfn = true
+		case 'S':
+			if dbg.frame != nil && dbg.frame.Parent == nil {
+				dbg.What = "main"
+			} else if f.IsG {
+				dbg.What = "G"
+			} else if dbg.frame != nil && dbg.frame.TailCall > 0 {
+				dbg.What = "tail"
+			} else {
+				dbg.What = "Lua"
+			}
+			if !f.IsG {
+				dbg.Source = f.Proto.SourceName
+				dbg.LineDefined = f.Proto.LineDefined
+				dbg.LastLineDefined = f.Proto.LastLineDefined
+			}
+		case 'l':
+			if !f.IsG && dbg.frame != nil {
+				if dbg.frame.Pc > 0 {
+					dbg.CurrentLine = f.Proto.DbgSourcePositions[dbg.frame.Pc-1]
+				}
+			} else {
+				dbg.CurrentLine = -1
+			}
+		case 'u':
+			dbg.NUpvalues = len(f.Upvalues)
+		case 'n':
+			if dbg.frame != nil {
+				dbg.Name = ls.rawFrameFuncName(dbg.frame)
+			}
+		default:
+			return LNil, newApiErrorS(ApiErrorRun, "invalid what: "+string(c))
+		}
+	}
+
+	if retfn {
+		return f, nil
+	}
+	return LNil, nil
+
+}
+
+func (ls *LState) GetStack(level int) (*Debug, bool) {
+	frame := ls.currentFrame
+	for ; level > 0 && frame != nil; frame = frame.Parent {
+		level--
+		if !frame.Fn.IsG {
+			level -= frame.TailCall
+		}
+	}
+
+	if level == 0 && frame != nil {
+		return &Debug{frame: frame}, true
+	} else if level < 0 && ls.stack.Sp() > 0 {
+		return &Debug{frame: ls.stack.At(0)}, true
+	}
+	return &Debug{}, false
+}
+
+func (ls *LState) GetLocal(dbg *Debug, no int) (string, LValue) {
+	frame := dbg.frame
+	if name := ls.findLocal(frame, no); len(name) > 0 {
+		return name, ls.reg.Get(frame.LocalBase + no - 1)
+	}
+	return "", LNil
+}
+
+func (ls *LState) SetLocal(dbg *Debug, no int, lv LValue) string {
+	frame := dbg.frame
+	if name := ls.findLocal(frame, no); len(name) > 0 {
+		ls.reg.Set(frame.LocalBase+no-1, lv)
+		return name
+	}
+	return ""
+}
+
+func (ls *LState) GetUpvalue(fn *LFunction, no int) (string, LValue) {
+	if fn.IsG {
+		return "", LNil
+	}
+
+	no--
+	if no >= 0 && no < len(fn.Upvalues) {
+		return fn.Proto.DbgUpvalues[no], fn.Upvalues[no].Value()
+	}
+	return "", LNil
+}
+
+func (ls *LState) SetUpvalue(fn *LFunction, no int, lv LValue) string {
+	if fn.IsG {
+		return ""
+	}
+
+	no--
+	if no >= 0 && no < len(fn.Upvalues) {
+		fn.Upvalues[no].SetValue(lv)
+		return fn.Proto.DbgUpvalues[no]
+	}
+	return ""
+}
+
+/* }}} */
+
+/* env operations {{{ */
+
+func (ls *LState) GetFEnv(obj LValue) LValue {
+	switch lv := obj.(type) {
+	case *LFunction:
+		return lv.Env
+	case *LUserData:
+		return lv.Env
+	case *LState:
+		return lv.Env
+	}
+	return LNil
+}
+
+func (ls *LState) SetFEnv(obj LValue, env LValue) {
+	tb, ok := env.(*LTable)
+	if !ok {
+		ls.RaiseError("cannot use %v as an environment", env.Type().String())
+	}
+
+	switch lv := obj.(type) {
+	case *LFunction:
+		lv.Env = tb
+	case *LUserData:
+		lv.Env = tb
+	case *LState:
+		lv.Env = tb
+	}
+	/* do nothing */
+}
+
+/* }}} */
+
+/* table operations {{{ */
+
+func (ls *LState) RawGet(tb *LTable, key LValue) LValue {
+	return tb.RawGet(key)
+}
+
+func (ls *LState) RawGetInt(tb *LTable, key int) LValue {
+	return tb.RawGetInt(key)
+}
+
+func (ls *LState) GetField(obj LValue, skey string) LValue {
+	return ls.getFieldString(obj, skey)
+}
+
+func (ls *LState) GetTable(obj LValue, key LValue) LValue {
+	return ls.getField(obj, key)
+}
+
+func (ls *LState) RawSet(tb *LTable, key LValue, value LValue) {
+	if n, ok := key.(LNumber); ok && math.IsNaN(float64(n)) {
+		ls.RaiseError("table index is NaN")
+	} else if key == LNil {
+		ls.RaiseError("table index is nil")
+	}
+	tb.RawSet(key, value)
+}
+
+func (ls *LState) RawSetInt(tb *LTable, key int, value LValue) {
+	tb.RawSetInt(key, value)
+}
+
+func (ls *LState) SetField(obj LValue, key string, value LValue) {
+	ls.setFieldString(obj, key, value)
+}
+
+func (ls *LState) SetTable(obj LValue, key LValue, value LValue) {
+	ls.setField(obj, key, value)
+}
+
+func (ls *LState) ForEach(tb *LTable, cb func(LValue, LValue)) {
+	tb.ForEach(cb)
+}
+
+func (ls *LState) GetGlobal(name string) LValue {
+	return ls.GetField(ls.Get(GlobalsIndex), name)
+}
+
+func (ls *LState) SetGlobal(name string, value LValue) {
+	ls.SetField(ls.Get(GlobalsIndex), name, value)
+}
+
+func (ls *LState) Next(tb *LTable, key LValue) (LValue, LValue) {
+	return tb.Next(key)
+}
+
+/* }}} */
+
+/* unary operations {{{ */
+
+func (ls *LState) ObjLen(v1 LValue) int {
+	if v1.Type() == LTString {
+		return len(string(v1.(LString)))
+	}
+	op := ls.metaOp1(v1, "__len")
+	if op.Type() == LTFunction {
+		ls.Push(op)
+		ls.Push(v1)
+		ls.Call(1, 1)
+		ret := ls.reg.Pop()
+		if ret.Type() == LTNumber {
+			return int(ret.(LNumber))
+		}
+	} else if v1.Type() == LTTable {
+		return v1.(*LTable).Len()
+	}
+	return 0
+}
+
+/* }}} */
+
+/* binary operations {{{ */
+
+func (ls *LState) Concat(values ...LValue) string {
+	top := ls.reg.Top()
+	for _, value := range values {
+		ls.reg.Push(value)
+	}
+	ret := stringConcat(ls, len(values), ls.reg.Top()-1)
+	ls.reg.SetTop(top)
+	return LVAsString(ret)
+}
+
+func (ls *LState) LessThan(lhs, rhs LValue) bool {
+	return lessThan(ls, lhs, rhs)
+}
+
+func (ls *LState) Equal(lhs, rhs LValue) bool {
+	return equals(ls, lhs, rhs, false)
+}
+
+func (ls *LState) RawEqual(lhs, rhs LValue) bool {
+	return equals(ls, lhs, rhs, true)
+}
+
+/* }}} */
+
+/* register operations {{{ */
+
+func (ls *LState) Register(name string, fn LGFunction) {
+	ls.SetGlobal(name, ls.NewFunction(fn))
+}
+
+/* }}} */
+
+/* load and function call operations {{{ */
+
+func (ls *LState) Load(reader io.Reader, name string) (*LFunction, error) {
+	chunk, err := parse.Parse(reader, name)
+	if err != nil {
+		return nil, newApiErrorE(ApiErrorSyntax, err)
+	}
+	proto, err := Compile(chunk, name)
+	if err != nil {
+		return nil, newApiErrorE(ApiErrorSyntax, err)
+	}
+	return newLFunctionL(proto, ls.currentEnv(), 0), nil
+}
+
+func (ls *LState) Call(nargs, nret int) {
+	ls.callR(nargs, nret, -1)
+}
+
+func (ls *LState) PCall(nargs, nret int, errfunc *LFunction) (err error) {
+	err = nil
+	sp := ls.stack.Sp()
+	base := ls.reg.Top() - nargs - 1
+	oldpanic := ls.Panic
+	ls.Panic = panicWithoutTraceback
+	if errfunc != nil {
+		ls.hasErrorFunc = true
+	}
+	defer func() {
+		ls.Panic = oldpanic
+		ls.hasErrorFunc = false
+		rcv := recover()
+		if rcv != nil {
+			if _, ok := rcv.(*ApiError); !ok {
+				err = newApiErrorS(ApiErrorPanic, fmt.Sprint(rcv))
+				if ls.Options.IncludeGoStackTrace {
+					buf := make([]byte, 4096)
+					runtime.Stack(buf, false)
+					err.(*ApiError).StackTrace = strings.Trim(string(buf), "\000") + "\n" + ls.stackTrace(0)
+				}
+			} else {
+				err = rcv.(*ApiError)
+			}
+			if errfunc != nil {
+				ls.Push(errfunc)
+				ls.Push(err.(*ApiError).Object)
+				ls.Panic = panicWithoutTraceback
+				defer func() {
+					ls.Panic = oldpanic
+					rcv := recover()
+					if rcv != nil {
+						if _, ok := rcv.(*ApiError); !ok {
+							err = newApiErrorS(ApiErrorPanic, fmt.Sprint(rcv))
+							if ls.Options.IncludeGoStackTrace {
+								buf := make([]byte, 4096)
+								runtime.Stack(buf, false)
+								err.(*ApiError).StackTrace = strings.Trim(string(buf), "\000") + ls.stackTrace(0)
+							}
+						} else {
+							err = rcv.(*ApiError)
+							err.(*ApiError).StackTrace = ls.stackTrace(0)
+						}
+					}
+				}()
+				ls.Call(1, 1)
+				err = newApiError(ApiErrorError, ls.Get(-1))
+			} else if len(err.(*ApiError).StackTrace) == 0 {
+				err.(*ApiError).StackTrace = ls.stackTrace(0)
+			}
+			ls.stack.SetSp(sp)
+			ls.currentFrame = ls.stack.Last()
+			ls.reg.SetTop(base)
+		}
+		ls.stack.SetSp(sp)
+		if sp == 0 {
+			ls.currentFrame = nil
+		}
+	}()
+
+	ls.Call(nargs, nret)
+
+	return
+}
+
+func (ls *LState) GPCall(fn LGFunction, data LValue) error {
+	ls.Push(newLFunctionG(fn, ls.currentEnv(), 0))
+	ls.Push(data)
+	return ls.PCall(1, MultRet, nil)
+}
+
+func (ls *LState) CallByParam(cp P, args ...LValue) error {
+	ls.Push(cp.Fn)
+	for _, arg := range args {
+		ls.Push(arg)
+	}
+
+	if cp.Protect {
+		return ls.PCall(len(args), cp.NRet, cp.Handler)
+	}
+	ls.Call(len(args), cp.NRet)
+	return nil
+}
+
+/* }}} */
+
+/* metatable operations {{{ */
+
+func (ls *LState) GetMetatable(obj LValue) LValue {
+	return ls.metatable(obj, false)
+}
+
+func (ls *LState) SetMetatable(obj LValue, mt LValue) {
+	switch mt.(type) {
+	case *LNilType, *LTable:
+	default:
+		ls.RaiseError("metatable must be a table or nil, but got %v", mt.Type().String())
+	}
+
+	switch v := obj.(type) {
+	case *LTable:
+		v.Metatable = mt
+	case *LUserData:
+		v.Metatable = mt
+	default:
+		ls.G.builtinMts[int(obj.Type())] = mt
+	}
+}
+
+/* }}} */
+
+/* coroutine operations {{{ */
+
+func (ls *LState) Status(th *LState) string {
+	status := "suspended"
+	if th.Dead {
+		status = "dead"
+	} else if ls.G.CurrentThread == th {
+		status = "running"
+	} else if ls.Parent == th {
+		status = "normal"
+	}
+	return status
+}
+
+func (ls *LState) Resume(th *LState, fn *LFunction, args ...LValue) (ResumeState, error, []LValue) {
+	isstarted := th.isStarted()
+	if !isstarted {
+		base := 0
+		th.stack.Push(callFrame{
+			Fn:         fn,
+			Pc:         0,
+			Base:       base,
+			LocalBase:  base + 1,
+			ReturnBase: base,
+			NArgs:      0,
+			NRet:       MultRet,
+			Parent:     nil,
+			TailCall:   0,
+		})
+	}
+
+	if ls.G.CurrentThread == th {
+		return ResumeError, newApiErrorS(ApiErrorRun, "can not resume a running thread"), nil
+	}
+	if th.Dead {
+		return ResumeError, newApiErrorS(ApiErrorRun, "can not resume a dead thread"), nil
+	}
+	th.Parent = ls
+	ls.G.CurrentThread = th
+	if !isstarted {
+		cf := th.stack.Last()
+		th.currentFrame = cf
+		th.SetTop(0)
+		for _, arg := range args {
+			th.Push(arg)
+		}
+		cf.NArgs = len(args)
+		th.initCallFrame(cf)
+		th.Panic = panicWithoutTraceback
+	} else {
+		for _, arg := range args {
+			th.Push(arg)
+		}
+	}
+	top := ls.GetTop()
+	threadRun(th)
+	haserror := LVIsFalse(ls.Get(top + 1))
+	ret := make([]LValue, 0, ls.GetTop())
+	for idx := top + 2; idx <= ls.GetTop(); idx++ {
+		ret = append(ret, ls.Get(idx))
+	}
+	if len(ret) == 0 {
+		ret = append(ret, LNil)
+	}
+	ls.SetTop(top)
+
+	if haserror {
+		return ResumeError, newApiError(ApiErrorRun, ret[0]), nil
+	} else if th.stack.IsEmpty() {
+		return ResumeOK, nil, ret
+	}
+	return ResumeYield, nil, ret
+}
+
+func (ls *LState) Yield(values ...LValue) int {
+	ls.SetTop(0)
+	for _, lv := range values {
+		ls.Push(lv)
+	}
+	return -1
+}
+
+func (ls *LState) XMoveTo(other *LState, n int) {
+	if ls == other {
+		return
+	}
+	top := ls.GetTop()
+	n = intMin(n, top)
+	for i := n; i > 0; i-- {
+		other.Push(ls.Get(top - i + 1))
+	}
+	ls.SetTop(top - n)
+}
+
+/* }}} */
+
+/* GopherLua original APIs {{{ */
+
+// Set maximum memory size. This function can only be called from the main thread.
+func (ls *LState) SetMx(mx int) {
+	if ls.Parent != nil {
+		ls.RaiseError("sub threads are not allowed to set a memory limit")
+	}
+	go func() {
+		limit := uint64(mx * 1024 * 1024) //MB
+		var s runtime.MemStats
+		for atomic.LoadInt32(&ls.stop) == 0 {
+			runtime.ReadMemStats(&s)
+			if s.Alloc >= limit {
+				fmt.Println("out of memory")
+				os.Exit(3)
+			}
+			time.Sleep(100 * time.Millisecond)
+		}
+	}()
+}
+
+// SetContext set a context ctx to this LState. The provided ctx must be non-nil.
+func (ls *LState) SetContext(ctx context.Context) {
+	ls.mainLoop = mainLoopWithContext
+	ls.ctx = ctx
+}
+
+// Context returns the LState's context. To change the context, use WithContext.
+func (ls *LState) Context() context.Context {
+	return ls.ctx
+}
+
+// RemoveContext removes the context associated with this LState and returns this context.
+func (ls *LState) RemoveContext() context.Context {
+	oldctx := ls.ctx
+	ls.mainLoop = mainLoop
+	ls.ctx = nil
+	return oldctx
+}
+
+// Converts the Lua value at the given acceptable index to the chan LValue.
+func (ls *LState) ToChannel(n int) chan LValue {
+	if lv, ok := ls.Get(n).(LChannel); ok {
+		return (chan LValue)(lv)
+	}
+	return nil
+}
+
+// RemoveCallerFrame removes the stack frame above the current stack frame. This is useful in tail calls. It returns
+// the new current frame.
+func (ls *LState) RemoveCallerFrame() *callFrame {
+	cs := ls.stack
+	sp := cs.Sp()
+	parentFrame := cs.At(sp - 2)
+	currentFrame := cs.At(sp - 1)
+	parentsParentFrame := parentFrame.Parent
+	*parentFrame = *currentFrame
+	parentFrame.Parent = parentsParentFrame
+	parentFrame.Idx = sp - 2
+	cs.Pop()
+	return parentFrame
+}
+
+/* }}} */
+
+/* }}} */
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/stringlib.go b/vendor/github.com/yuin/gopher-lua/stringlib.go
new file mode 100644
index 0000000000000..f484c2b33afad
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/stringlib.go
@@ -0,0 +1,448 @@
+package lua
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/yuin/gopher-lua/pm"
+)
+
+const emptyLString LString = LString("")
+
+func OpenString(L *LState) int {
+	var mod *LTable
+	//_, ok := L.G.builtinMts[int(LTString)]
+	//if !ok {
+	mod = L.RegisterModule(StringLibName, strFuncs).(*LTable)
+	gmatch := L.NewClosure(strGmatch, L.NewFunction(strGmatchIter))
+	mod.RawSetString("gmatch", gmatch)
+	mod.RawSetString("gfind", gmatch)
+	mod.RawSetString("__index", mod)
+	L.G.builtinMts[int(LTString)] = mod
+	//}
+	L.Push(mod)
+	return 1
+}
+
+var strFuncs = map[string]LGFunction{
+	"byte":    strByte,
+	"char":    strChar,
+	"dump":    strDump,
+	"find":    strFind,
+	"format":  strFormat,
+	"gsub":    strGsub,
+	"len":     strLen,
+	"lower":   strLower,
+	"match":   strMatch,
+	"rep":     strRep,
+	"reverse": strReverse,
+	"sub":     strSub,
+	"upper":   strUpper,
+}
+
+func strByte(L *LState) int {
+	str := L.CheckString(1)
+	start := L.OptInt(2, 1) - 1
+	end := L.OptInt(3, -1)
+	l := len(str)
+	if start < 0 {
+		start = l + start + 1
+	}
+	if end < 0 {
+		end = l + end + 1
+	}
+
+	if L.GetTop() == 2 {
+		if start < 0 || start >= l {
+			return 0
+		}
+		L.Push(LNumber(str[start]))
+		return 1
+	}
+
+	start = intMax(start, 0)
+	end = intMin(end, l)
+	if end < 0 || end <= start || start >= l {
+		return 0
+	}
+
+	for i := start; i < end; i++ {
+		L.Push(LNumber(str[i]))
+	}
+	return end - start
+}
+
+func strChar(L *LState) int {
+	top := L.GetTop()
+	bytes := make([]byte, L.GetTop())
+	for i := 1; i <= top; i++ {
+		bytes[i-1] = uint8(L.CheckInt(i))
+	}
+	L.Push(LString(string(bytes)))
+	return 1
+}
+
+func strDump(L *LState) int {
+	L.RaiseError("GopherLua does not support the string.dump")
+	return 0
+}
+
+func strFind(L *LState) int {
+	str := L.CheckString(1)
+	pattern := L.CheckString(2)
+	if len(pattern) == 0 {
+		L.Push(LNumber(1))
+		L.Push(LNumber(0))
+		return 2
+	}
+	init := luaIndex2StringIndex(str, L.OptInt(3, 1), true)
+	plain := false
+	if L.GetTop() == 4 {
+		plain = LVAsBool(L.Get(4))
+	}
+
+	if plain {
+		pos := strings.Index(str[init:], pattern)
+		if pos < 0 {
+			L.Push(LNil)
+			return 1
+		}
+		L.Push(LNumber(init+pos) + 1)
+		L.Push(LNumber(init + pos + len(pattern)))
+		return 2
+	}
+
+	mds, err := pm.Find(pattern, unsafeFastStringToReadOnlyBytes(str), init, 1)
+	if err != nil {
+		L.RaiseError(err.Error())
+	}
+	if len(mds) == 0 {
+		L.Push(LNil)
+		return 1
+	}
+	md := mds[0]
+	L.Push(LNumber(md.Capture(0) + 1))
+	L.Push(LNumber(md.Capture(1)))
+	for i := 2; i < md.CaptureLength(); i += 2 {
+		if md.IsPosCapture(i) {
+			L.Push(LNumber(md.Capture(i)))
+		} else {
+			L.Push(LString(str[md.Capture(i):md.Capture(i+1)]))
+		}
+	}
+	return md.CaptureLength()/2 + 1
+}
+
+func strFormat(L *LState) int {
+	str := L.CheckString(1)
+	args := make([]interface{}, L.GetTop()-1)
+	top := L.GetTop()
+	for i := 2; i <= top; i++ {
+		args[i-2] = L.Get(i)
+	}
+	npat := strings.Count(str, "%") - strings.Count(str, "%%")
+	L.Push(LString(fmt.Sprintf(str, args[:intMin(npat, len(args))]...)))
+	return 1
+}
+
+func strGsub(L *LState) int {
+	str := L.CheckString(1)
+	pat := L.CheckString(2)
+	L.CheckTypes(3, LTString, LTTable, LTFunction)
+	repl := L.CheckAny(3)
+	limit := L.OptInt(4, -1)
+
+	mds, err := pm.Find(pat, unsafeFastStringToReadOnlyBytes(str), 0, limit)
+	if err != nil {
+		L.RaiseError(err.Error())
+	}
+	if len(mds) == 0 {
+		L.SetTop(1)
+		L.Push(LNumber(0))
+		return 2
+	}
+	switch lv := repl.(type) {
+	case LString:
+		L.Push(LString(strGsubStr(L, str, string(lv), mds)))
+	case *LTable:
+		L.Push(LString(strGsubTable(L, str, lv, mds)))
+	case *LFunction:
+		L.Push(LString(strGsubFunc(L, str, lv, mds)))
+	}
+	L.Push(LNumber(len(mds)))
+	return 2
+}
+
+type replaceInfo struct {
+	Indicies []int
+	String   string
+}
+
+func checkCaptureIndex(L *LState, m *pm.MatchData, idx int) {
+	if idx <= 2 {
+		return
+	}
+	if idx >= m.CaptureLength() {
+		L.RaiseError("invalid capture index")
+	}
+}
+
+func capturedString(L *LState, m *pm.MatchData, str string, idx int) string {
+	checkCaptureIndex(L, m, idx)
+	if idx >= m.CaptureLength() && idx == 2 {
+		idx = 0
+	}
+	if m.IsPosCapture(idx) {
+		return fmt.Sprint(m.Capture(idx))
+	} else {
+		return str[m.Capture(idx):m.Capture(idx+1)]
+	}
+
+}
+
+func strGsubDoReplace(str string, info []replaceInfo) string {
+	offset := 0
+	buf := []byte(str)
+	for _, replace := range info {
+		oldlen := len(buf)
+		b1 := append([]byte(""), buf[0:offset+replace.Indicies[0]]...)
+		b2 := []byte("")
+		index2 := offset + replace.Indicies[1]
+		if index2 <= len(buf) {
+			b2 = append(b2, buf[index2:len(buf)]...)
+		}
+		buf = append(b1, replace.String...)
+		buf = append(buf, b2...)
+		offset += len(buf) - oldlen
+	}
+	return string(buf)
+}
+
+func strGsubStr(L *LState, str string, repl string, matches []*pm.MatchData) string {
+	infoList := make([]replaceInfo, 0, len(matches))
+	for _, match := range matches {
+		start, end := match.Capture(0), match.Capture(1)
+		sc := newFlagScanner('%', "", "", repl)
+		for c, eos := sc.Next(); !eos; c, eos = sc.Next() {
+			if !sc.ChangeFlag {
+				if sc.HasFlag {
+					if c >= '0' && c <= '9' {
+						sc.AppendString(capturedString(L, match, str, 2*(int(c)-48)))
+					} else {
+						sc.AppendChar('%')
+						sc.AppendChar(c)
+					}
+					sc.HasFlag = false
+				} else {
+					sc.AppendChar(c)
+				}
+			}
+		}
+		infoList = append(infoList, replaceInfo{[]int{start, end}, sc.String()})
+	}
+
+	return strGsubDoReplace(str, infoList)
+}
+
+func strGsubTable(L *LState, str string, repl *LTable, matches []*pm.MatchData) string {
+	infoList := make([]replaceInfo, 0, len(matches))
+	for _, match := range matches {
+		idx := 0
+		if match.CaptureLength() > 2 { // has captures
+			idx = 2
+		}
+		var value LValue
+		if match.IsPosCapture(idx) {
+			value = L.GetTable(repl, LNumber(match.Capture(idx)))
+		} else {
+			value = L.GetField(repl, str[match.Capture(idx):match.Capture(idx+1)])
+		}
+		if !LVIsFalse(value) {
+			infoList = append(infoList, replaceInfo{[]int{match.Capture(0), match.Capture(1)}, LVAsString(value)})
+		}
+	}
+	return strGsubDoReplace(str, infoList)
+}
+
+func strGsubFunc(L *LState, str string, repl *LFunction, matches []*pm.MatchData) string {
+	infoList := make([]replaceInfo, 0, len(matches))
+	for _, match := range matches {
+		start, end := match.Capture(0), match.Capture(1)
+		L.Push(repl)
+		nargs := 0
+		if match.CaptureLength() > 2 { // has captures
+			for i := 2; i < match.CaptureLength(); i += 2 {
+				if match.IsPosCapture(i) {
+					L.Push(LNumber(match.Capture(i)))
+				} else {
+					L.Push(LString(capturedString(L, match, str, i)))
+				}
+				nargs++
+			}
+		} else {
+			L.Push(LString(capturedString(L, match, str, 0)))
+			nargs++
+		}
+		L.Call(nargs, 1)
+		ret := L.reg.Pop()
+		if !LVIsFalse(ret) {
+			infoList = append(infoList, replaceInfo{[]int{start, end}, LVAsString(ret)})
+		}
+	}
+	return strGsubDoReplace(str, infoList)
+}
+
+type strMatchData struct {
+	str     string
+	pos     int
+	matches []*pm.MatchData
+}
+
+func strGmatchIter(L *LState) int {
+	md := L.CheckUserData(1).Value.(*strMatchData)
+	str := md.str
+	matches := md.matches
+	idx := md.pos
+	md.pos += 1
+	if idx == len(matches) {
+		return 0
+	}
+	L.Push(L.Get(1))
+	match := matches[idx]
+	if match.CaptureLength() == 2 {
+		L.Push(LString(str[match.Capture(0):match.Capture(1)]))
+		return 1
+	}
+
+	for i := 2; i < match.CaptureLength(); i += 2 {
+		if match.IsPosCapture(i) {
+			L.Push(LNumber(match.Capture(i)))
+		} else {
+			L.Push(LString(str[match.Capture(i):match.Capture(i+1)]))
+		}
+	}
+	return match.CaptureLength()/2 - 1
+}
+
+func strGmatch(L *LState) int {
+	str := L.CheckString(1)
+	pattern := L.CheckString(2)
+	mds, err := pm.Find(pattern, []byte(str), 0, -1)
+	if err != nil {
+		L.RaiseError(err.Error())
+	}
+	L.Push(L.Get(UpvalueIndex(1)))
+	ud := L.NewUserData()
+	ud.Value = &strMatchData{str, 0, mds}
+	L.Push(ud)
+	return 2
+}
+
+func strLen(L *LState) int {
+	str := L.CheckString(1)
+	L.Push(LNumber(len(str)))
+	return 1
+}
+
+func strLower(L *LState) int {
+	str := L.CheckString(1)
+	L.Push(LString(strings.ToLower(str)))
+	return 1
+}
+
+func strMatch(L *LState) int {
+	str := L.CheckString(1)
+	pattern := L.CheckString(2)
+	offset := L.OptInt(3, 1)
+	l := len(str)
+	if offset < 0 {
+		offset = l + offset + 1
+	}
+	offset--
+	if offset < 0 {
+		offset = 0
+	}
+
+	mds, err := pm.Find(pattern, unsafeFastStringToReadOnlyBytes(str), offset, 1)
+	if err != nil {
+		L.RaiseError(err.Error())
+	}
+	if len(mds) == 0 {
+		L.Push(LNil)
+		return 0
+	}
+	md := mds[0]
+	nsubs := md.CaptureLength() / 2
+	switch nsubs {
+	case 1:
+		L.Push(LString(str[md.Capture(0):md.Capture(1)]))
+		return 1
+	default:
+		for i := 2; i < md.CaptureLength(); i += 2 {
+			if md.IsPosCapture(i) {
+				L.Push(LNumber(md.Capture(i)))
+			} else {
+				L.Push(LString(str[md.Capture(i):md.Capture(i+1)]))
+			}
+		}
+		return nsubs - 1
+	}
+}
+
+func strRep(L *LState) int {
+	str := L.CheckString(1)
+	n := L.CheckInt(2)
+	if n < 0 {
+		L.Push(emptyLString)
+	} else {
+		L.Push(LString(strings.Repeat(str, n)))
+	}
+	return 1
+}
+
+func strReverse(L *LState) int {
+	str := L.CheckString(1)
+	bts := []byte(str)
+	out := make([]byte, len(bts))
+	for i, j := 0, len(bts)-1; j >= 0; i, j = i+1, j-1 {
+		out[i] = bts[j]
+	}
+	L.Push(LString(string(out)))
+	return 1
+}
+
+func strSub(L *LState) int {
+	str := L.CheckString(1)
+	start := luaIndex2StringIndex(str, L.CheckInt(2), true)
+	end := luaIndex2StringIndex(str, L.OptInt(3, -1), false)
+	l := len(str)
+	if start >= l || end < start {
+		L.Push(emptyLString)
+	} else {
+		L.Push(LString(str[start:end]))
+	}
+	return 1
+}
+
+func strUpper(L *LState) int {
+	str := L.CheckString(1)
+	L.Push(LString(strings.ToUpper(str)))
+	return 1
+}
+
+func luaIndex2StringIndex(str string, i int, start bool) int {
+	if start && i != 0 {
+		i -= 1
+	}
+	l := len(str)
+	if i < 0 {
+		i = l + i + 1
+	}
+	i = intMax(0, i)
+	if !start && i > l {
+		i = l
+	}
+	return i
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/table.go b/vendor/github.com/yuin/gopher-lua/table.go
new file mode 100644
index 0000000000000..e220bd9c3b6cc
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/table.go
@@ -0,0 +1,387 @@
+package lua
+
+const defaultArrayCap = 32
+const defaultHashCap = 32
+
+type lValueArraySorter struct {
+	L      *LState
+	Fn     *LFunction
+	Values []LValue
+}
+
+func (lv lValueArraySorter) Len() int {
+	return len(lv.Values)
+}
+
+func (lv lValueArraySorter) Swap(i, j int) {
+	lv.Values[i], lv.Values[j] = lv.Values[j], lv.Values[i]
+}
+
+func (lv lValueArraySorter) Less(i, j int) bool {
+	if lv.Fn != nil {
+		lv.L.Push(lv.Fn)
+		lv.L.Push(lv.Values[i])
+		lv.L.Push(lv.Values[j])
+		lv.L.Call(2, 1)
+		return LVAsBool(lv.L.reg.Pop())
+	}
+	return lessThan(lv.L, lv.Values[i], lv.Values[j])
+}
+
+func newLTable(acap int, hcap int) *LTable {
+	if acap < 0 {
+		acap = 0
+	}
+	if hcap < 0 {
+		hcap = 0
+	}
+	tb := &LTable{}
+	tb.Metatable = LNil
+	if acap != 0 {
+		tb.array = make([]LValue, 0, acap)
+	}
+	if hcap != 0 {
+		tb.strdict = make(map[string]LValue, hcap)
+	}
+	return tb
+}
+
+// Len returns length of this LTable.
+func (tb *LTable) Len() int {
+	if tb.array == nil {
+		return 0
+	}
+	var prev LValue = LNil
+	for i := len(tb.array) - 1; i >= 0; i-- {
+		v := tb.array[i]
+		if prev == LNil && v != LNil {
+			return i + 1
+		}
+		prev = v
+	}
+	return 0
+}
+
+// Append appends a given LValue to this LTable.
+func (tb *LTable) Append(value LValue) {
+	if value == LNil {
+		return
+	}
+	if tb.array == nil {
+		tb.array = make([]LValue, 0, defaultArrayCap)
+	}
+	if len(tb.array) == 0 || tb.array[len(tb.array)-1] != LNil {
+		tb.array = append(tb.array, value)
+	} else {
+		i := len(tb.array) - 2
+		for ; i >= 0; i-- {
+			if tb.array[i] != LNil {
+				break
+			}
+		}
+		tb.array[i+1] = value
+	}
+}
+
+// Insert inserts a given LValue at position `i` in this table.
+func (tb *LTable) Insert(i int, value LValue) {
+	if tb.array == nil {
+		tb.array = make([]LValue, 0, defaultArrayCap)
+	}
+	if i > len(tb.array) {
+		tb.RawSetInt(i, value)
+		return
+	}
+	if i <= 0 {
+		tb.RawSet(LNumber(i), value)
+		return
+	}
+	i -= 1
+	tb.array = append(tb.array, LNil)
+	copy(tb.array[i+1:], tb.array[i:])
+	tb.array[i] = value
+}
+
+// MaxN returns a maximum number key that nil value does not exist before it.
+func (tb *LTable) MaxN() int {
+	if tb.array == nil {
+		return 0
+	}
+	for i := len(tb.array) - 1; i >= 0; i-- {
+		if tb.array[i] != LNil {
+			return i + 1
+		}
+	}
+	return 0
+}
+
+// Remove removes from this table the element at a given position.
+func (tb *LTable) Remove(pos int) LValue {
+	if tb.array == nil {
+		return LNil
+	}
+	larray := len(tb.array)
+	if larray == 0 {
+		return LNil
+	}
+	i := pos - 1
+	oldval := LNil
+	switch {
+	case i >= larray:
+		// nothing to do
+	case i == larray-1 || i < 0:
+		oldval = tb.array[larray-1]
+		tb.array = tb.array[:larray-1]
+	default:
+		oldval = tb.array[i]
+		copy(tb.array[i:], tb.array[i+1:])
+		tb.array[larray-1] = nil
+		tb.array = tb.array[:larray-1]
+	}
+	return oldval
+}
+
+// RawSet sets a given LValue to a given index without the __newindex metamethod.
+// It is recommended to use `RawSetString` or `RawSetInt` for performance
+// if you already know the given LValue is a string or number.
+func (tb *LTable) RawSet(key LValue, value LValue) {
+	switch v := key.(type) {
+	case LNumber:
+		if isArrayKey(v) {
+			if tb.array == nil {
+				tb.array = make([]LValue, 0, defaultArrayCap)
+			}
+			index := int(v) - 1
+			alen := len(tb.array)
+			switch {
+			case index == alen:
+				tb.array = append(tb.array, value)
+			case index > alen:
+				for i := 0; i < (index - alen); i++ {
+					tb.array = append(tb.array, LNil)
+				}
+				tb.array = append(tb.array, value)
+			case index < alen:
+				tb.array[index] = value
+			}
+			return
+		}
+	case LString:
+		tb.RawSetString(string(v), value)
+		return
+	}
+
+	tb.RawSetH(key, value)
+}
+
+// RawSetInt sets a given LValue at a position `key` without the __newindex metamethod.
+func (tb *LTable) RawSetInt(key int, value LValue) {
+	if key < 1 || key >= MaxArrayIndex {
+		tb.RawSetH(LNumber(key), value)
+		return
+	}
+	if tb.array == nil {
+		tb.array = make([]LValue, 0, 32)
+	}
+	index := key - 1
+	alen := len(tb.array)
+	switch {
+	case index == alen:
+		tb.array = append(tb.array, value)
+	case index > alen:
+		for i := 0; i < (index - alen); i++ {
+			tb.array = append(tb.array, LNil)
+		}
+		tb.array = append(tb.array, value)
+	case index < alen:
+		tb.array[index] = value
+	}
+}
+
+// RawSetString sets a given LValue to a given string index without the __newindex metamethod.
+func (tb *LTable) RawSetString(key string, value LValue) {
+	if tb.strdict == nil {
+		tb.strdict = make(map[string]LValue, defaultHashCap)
+	}
+	if tb.keys == nil {
+		tb.keys = []LValue{}
+		tb.k2i = map[LValue]int{}
+	}
+
+	if value == LNil {
+		// TODO tb.keys and tb.k2i should also be removed
+		delete(tb.strdict, key)
+	} else {
+		tb.strdict[key] = value
+		lkey := LString(key)
+		if _, ok := tb.k2i[lkey]; !ok {
+			tb.k2i[lkey] = len(tb.keys)
+			tb.keys = append(tb.keys, lkey)
+		}
+	}
+}
+
+// RawSetH sets a given LValue to a given index without the __newindex metamethod.
+func (tb *LTable) RawSetH(key LValue, value LValue) {
+	if s, ok := key.(LString); ok {
+		tb.RawSetString(string(s), value)
+		return
+	}
+	if tb.dict == nil {
+		tb.dict = make(map[LValue]LValue, len(tb.strdict))
+	}
+	if tb.keys == nil {
+		tb.keys = []LValue{}
+		tb.k2i = map[LValue]int{}
+	}
+
+	if value == LNil {
+		// TODO tb.keys and tb.k2i should also be removed
+		delete(tb.dict, key)
+	} else {
+		tb.dict[key] = value
+		if _, ok := tb.k2i[key]; !ok {
+			tb.k2i[key] = len(tb.keys)
+			tb.keys = append(tb.keys, key)
+		}
+	}
+}
+
+// RawGet returns an LValue associated with a given key without __index metamethod.
+func (tb *LTable) RawGet(key LValue) LValue {
+	switch v := key.(type) {
+	case LNumber:
+		if isArrayKey(v) {
+			if tb.array == nil {
+				return LNil
+			}
+			index := int(v) - 1
+			if index >= len(tb.array) {
+				return LNil
+			}
+			return tb.array[index]
+		}
+	case LString:
+		if tb.strdict == nil {
+			return LNil
+		}
+		if ret, ok := tb.strdict[string(v)]; ok {
+			return ret
+		}
+		return LNil
+	}
+	if tb.dict == nil {
+		return LNil
+	}
+	if v, ok := tb.dict[key]; ok {
+		return v
+	}
+	return LNil
+}
+
+// RawGetInt returns an LValue at position `key` without __index metamethod.
+func (tb *LTable) RawGetInt(key int) LValue {
+	if tb.array == nil {
+		return LNil
+	}
+	index := int(key) - 1
+	if index >= len(tb.array) || index < 0 {
+		return LNil
+	}
+	return tb.array[index]
+}
+
+// RawGet returns an LValue associated with a given key without __index metamethod.
+func (tb *LTable) RawGetH(key LValue) LValue {
+	if s, sok := key.(LString); sok {
+		if tb.strdict == nil {
+			return LNil
+		}
+		if v, vok := tb.strdict[string(s)]; vok {
+			return v
+		}
+		return LNil
+	}
+	if tb.dict == nil {
+		return LNil
+	}
+	if v, ok := tb.dict[key]; ok {
+		return v
+	}
+	return LNil
+}
+
+// RawGetString returns an LValue associated with a given key without __index metamethod.
+func (tb *LTable) RawGetString(key string) LValue {
+	if tb.strdict == nil {
+		return LNil
+	}
+	if v, vok := tb.strdict[string(key)]; vok {
+		return v
+	}
+	return LNil
+}
+
+// ForEach iterates over this table of elements, yielding each in turn to a given function.
+func (tb *LTable) ForEach(cb func(LValue, LValue)) {
+	if tb.array != nil {
+		for i, v := range tb.array {
+			if v != LNil {
+				cb(LNumber(i+1), v)
+			}
+		}
+	}
+	if tb.strdict != nil {
+		for k, v := range tb.strdict {
+			if v != LNil {
+				cb(LString(k), v)
+			}
+		}
+	}
+	if tb.dict != nil {
+		for k, v := range tb.dict {
+			if v != LNil {
+				cb(k, v)
+			}
+		}
+	}
+}
+
+// This function is equivalent to lua_next ( http://www.lua.org/manual/5.1/manual.html#lua_next ).
+func (tb *LTable) Next(key LValue) (LValue, LValue) {
+	init := false
+	if key == LNil {
+		key = LNumber(0)
+		init = true
+	}
+
+	if init || key != LNumber(0) {
+		if kv, ok := key.(LNumber); ok && isInteger(kv) && int(kv) >= 0 && kv < LNumber(MaxArrayIndex) {
+			index := int(kv)
+			if tb.array != nil {
+				for ; index < len(tb.array); index++ {
+					if v := tb.array[index]; v != LNil {
+						return LNumber(index + 1), v
+					}
+				}
+			}
+			if tb.array == nil || index == len(tb.array) {
+				if (tb.dict == nil || len(tb.dict) == 0) && (tb.strdict == nil || len(tb.strdict) == 0) {
+					return LNil, LNil
+				}
+				key = tb.keys[0]
+				if v := tb.RawGetH(key); v != LNil {
+					return key, v
+				}
+			}
+		}
+	}
+
+	for i := tb.k2i[key] + 1; i < len(tb.keys); i++ {
+		key := tb.keys[i]
+		if v := tb.RawGetH(key); v != LNil {
+			return key, v
+		}
+	}
+	return LNil, LNil
+}
diff --git a/vendor/github.com/yuin/gopher-lua/tablelib.go b/vendor/github.com/yuin/gopher-lua/tablelib.go
new file mode 100644
index 0000000000000..f3f460702f02e
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/tablelib.go
@@ -0,0 +1,100 @@
+package lua
+
+import (
+	"sort"
+)
+
+func OpenTable(L *LState) int {
+	tabmod := L.RegisterModule(TabLibName, tableFuncs)
+	L.Push(tabmod)
+	return 1
+}
+
+var tableFuncs = map[string]LGFunction{
+	"getn":   tableGetN,
+	"concat": tableConcat,
+	"insert": tableInsert,
+	"maxn":   tableMaxN,
+	"remove": tableRemove,
+	"sort":   tableSort,
+}
+
+func tableSort(L *LState) int {
+	tbl := L.CheckTable(1)
+	sorter := lValueArraySorter{L, nil, tbl.array}
+	if L.GetTop() != 1 {
+		sorter.Fn = L.CheckFunction(2)
+	}
+	sort.Sort(sorter)
+	return 0
+}
+
+func tableGetN(L *LState) int {
+	L.Push(LNumber(L.CheckTable(1).Len()))
+	return 1
+}
+
+func tableMaxN(L *LState) int {
+	L.Push(LNumber(L.CheckTable(1).MaxN()))
+	return 1
+}
+
+func tableRemove(L *LState) int {
+	tbl := L.CheckTable(1)
+	if L.GetTop() == 1 {
+		L.Push(tbl.Remove(-1))
+	} else {
+		L.Push(tbl.Remove(L.CheckInt(2)))
+	}
+	return 1
+}
+
+func tableConcat(L *LState) int {
+	tbl := L.CheckTable(1)
+	sep := LString(L.OptString(2, ""))
+	i := L.OptInt(3, 1)
+	j := L.OptInt(4, tbl.Len())
+	if L.GetTop() == 3 {
+		if i > tbl.Len() || i < 1 {
+			L.Push(emptyLString)
+			return 1
+		}
+	}
+	i = intMax(intMin(i, tbl.Len()), 1)
+	j = intMin(intMin(j, tbl.Len()), tbl.Len())
+	if i > j {
+		L.Push(emptyLString)
+		return 1
+	}
+	//TODO should flushing?
+	retbottom := L.GetTop()
+	for ; i <= j; i++ {
+		v := tbl.RawGetInt(i)
+		if !LVCanConvToString(v) {
+			L.RaiseError("invalid value (%s) at index %d in table for concat", v.Type().String(), i)
+		}
+		L.Push(v)
+		if i != j {
+			L.Push(sep)
+		}
+	}
+	L.Push(stringConcat(L, L.GetTop()-retbottom, L.reg.Top()-1))
+	return 1
+}
+
+func tableInsert(L *LState) int {
+	tbl := L.CheckTable(1)
+	nargs := L.GetTop()
+	if nargs == 1 {
+		L.RaiseError("wrong number of arguments")
+	}
+
+	if L.GetTop() == 2 {
+		tbl.Append(L.Get(2))
+		return 0
+	}
+	tbl.Insert(int(L.CheckInt(2)), L.CheckAny(3))
+	return 0
+}
+
+//
diff --git a/vendor/github.com/yuin/gopher-lua/utils.go b/vendor/github.com/yuin/gopher-lua/utils.go
new file mode 100644
index 0000000000000..2df68dc73eb3d
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/utils.go
@@ -0,0 +1,265 @@
+package lua
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"reflect"
+	"strconv"
+	"strings"
+	"time"
+	"unsafe"
+)
+
+func intMin(a, b int) int {
+	if a < b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func intMax(a, b int) int {
+	if a > b {
+		return a
+	} else {
+		return b
+	}
+}
+
+func defaultFormat(v interface{}, f fmt.State, c rune) {
+	buf := make([]string, 0, 10)
+	buf = append(buf, "%")
+	for i := 0; i < 128; i++ {
+		if f.Flag(i) {
+			buf = append(buf, string(rune(i)))
+		}
+	}
+
+	if w, ok := f.Width(); ok {
+		buf = append(buf, strconv.Itoa(w))
+	}
+	if p, ok := f.Precision(); ok {
+		buf = append(buf, "."+strconv.Itoa(p))
+	}
+	buf = append(buf, string(c))
+	format := strings.Join(buf, "")
+	fmt.Fprintf(f, format, v)
+}
+
+type flagScanner struct {
+	flag       byte
+	start      string
+	end        string
+	buf        []byte
+	str        string
+	Length     int
+	Pos        int
+	HasFlag    bool
+	ChangeFlag bool
+}
+
+func newFlagScanner(flag byte, start, end, str string) *flagScanner {
+	return &flagScanner{flag, start, end, make([]byte, 0, len(str)), str, len(str), 0, false, false}
+}
+
+func (fs *flagScanner) AppendString(str string) { fs.buf = append(fs.buf, str...) }
+
+func (fs *flagScanner) AppendChar(ch byte) { fs.buf = append(fs.buf, ch) }
+
+func (fs *flagScanner) String() string { return string(fs.buf) }
+
+func (fs *flagScanner) Next() (byte, bool) {
+	c := byte('\000')
+	fs.ChangeFlag = false
+	if fs.Pos == fs.Length {
+		if fs.HasFlag {
+			fs.AppendString(fs.end)
+		}
+		return c, true
+	} else {
+		c = fs.str[fs.Pos]
+		if c == fs.flag {
+			if fs.Pos < (fs.Length-1) && fs.str[fs.Pos+1] == fs.flag {
+				fs.HasFlag = false
+				fs.AppendChar(fs.flag)
+				fs.Pos += 2
+				return fs.Next()
+			} else if fs.Pos != fs.Length-1 {
+				if fs.HasFlag {
+					fs.AppendString(fs.end)
+				}
+				fs.AppendString(fs.start)
+				fs.ChangeFlag = true
+				fs.HasFlag = true
+			}
+		}
+	}
+	fs.Pos++
+	return c, false
+}
+
+var cDateFlagToGo = map[byte]string{
+	'a': "mon", 'A': "Monday", 'b': "Jan", 'B': "January", 'c': "02 Jan 06 15:04 MST", 'd': "02",
+	'F': "2006-01-02", 'H': "15", 'I': "03", 'm': "01", 'M': "04", 'p': "PM", 'P': "pm", 'S': "05",
+	'x': "15/04/05", 'X': "15:04:05", 'y': "06", 'Y': "2006", 'z': "-0700", 'Z': "MST"}
+
+func strftime(t time.Time, cfmt string) string {
+	sc := newFlagScanner('%', "", "", cfmt)
+	for c, eos := sc.Next(); !eos; c, eos = sc.Next() {
+		if !sc.ChangeFlag {
+			if sc.HasFlag {
+				if v, ok := cDateFlagToGo[c]; ok {
+					sc.AppendString(t.Format(v))
+				} else {
+					switch c {
+					case 'w':
+						sc.AppendString(fmt.Sprint(int(t.Weekday())))
+					default:
+						sc.AppendChar('%')
+						sc.AppendChar(c)
+					}
+				}
+				sc.HasFlag = false
+			} else {
+				sc.AppendChar(c)
+			}
+		}
+	}
+
+	return sc.String()
+}
+
+func isInteger(v LNumber) bool {
+	return float64(v) == float64(int64(v))
+	//_, frac := math.Modf(float64(v))
+	//return frac == 0.0
+}
+
+func isArrayKey(v LNumber) bool {
+	return isInteger(v) && v < LNumber(int((^uint(0))>>1)) && v > LNumber(0) && v < LNumber(MaxArrayIndex)
+}
+
+func parseNumber(number string) (LNumber, error) {
+	var value LNumber
+	number = strings.Trim(number, " \t\n")
+	if v, err := strconv.ParseInt(number, 0, LNumberBit); err != nil {
+		if v2, err2 := strconv.ParseFloat(number, LNumberBit); err2 != nil {
+			return LNumber(0), err2
+		} else {
+			value = LNumber(v2)
+		}
+	} else {
+		value = LNumber(v)
+	}
+	return value, nil
+}
+
+func popenArgs(arg string) (string, []string) {
+	cmd := "/bin/sh"
+	args := []string{"-c"}
+	if LuaOS == "windows" {
+		cmd = "C:\\Windows\\system32\\cmd.exe"
+		args = []string{"/c"}
+	}
+	args = append(args, arg)
+	return cmd, args
+}
+
+func isGoroutineSafe(lv LValue) bool {
+	switch v := lv.(type) {
+	case *LFunction, *LUserData, *LState:
+		return false
+	case *LTable:
+		return v.Metatable == LNil
+	default:
+		return true
+	}
+}
+
+func readBufioSize(reader *bufio.Reader, size int64) ([]byte, error, bool) {
+	result := []byte{}
+	read := int64(0)
+	var err error
+	var n int
+	for read != size {
+		buf := make([]byte, size-read)
+		n, err = reader.Read(buf)
+		if err != nil {
+			break
+		}
+		read += int64(n)
+		result = append(result, buf[:n]...)
+	}
+	e := err
+	if e != nil && e == io.EOF {
+		e = nil
+	}
+
+	return result, e, len(result) == 0 && err == io.EOF
+}
+
+func readBufioLine(reader *bufio.Reader) ([]byte, error, bool) {
+	result := []byte{}
+	var buf []byte
+	var err error
+	var isprefix bool = true
+	for isprefix {
+		buf, isprefix, err = reader.ReadLine()
+		if err != nil {
+			break
+		}
+		result = append(result, buf...)
+	}
+	e := err
+	if e != nil && e == io.EOF {
+		e = nil
+	}
+
+	return result, e, len(result) == 0 && err == io.EOF
+}
+
+func int2Fb(val int) int {
+	e := 0
+	x := val
+	for x >= 16 {
+		x = (x + 1) >> 1
+		e++
+	}
+	if x < 8 {
+		return x
+	}
+	return ((e + 1) << 3) | (x - 8)
+}
+
+func strCmp(s1, s2 string) int {
+	len1 := len(s1)
+	len2 := len(s2)
+	for i := 0; ; i++ {
+		c1 := -1
+		if i < len1 {
+			c1 = int(s1[i])
+		}
+		c2 := -1
+		if i != len2 {
+			c2 = int(s2[i])
+		}
+		switch {
+		case c1 < c2:
+			return -1
+		case c1 > c2:
+			return +1
+		case c1 < 0:
+			return 0
+		}
+	}
+}
+
+func unsafeFastStringToReadOnlyBytes(s string) (bs []byte) {
+	sh := (*reflect.StringHeader)(unsafe.Pointer(&s))
+	bh := (*reflect.SliceHeader)(unsafe.Pointer(&bs))
+	bh.Data = sh.Data
+	bh.Cap = sh.Len
+	bh.Len = sh.Len
+	return
+}
diff --git a/vendor/github.com/yuin/gopher-lua/value.go b/vendor/github.com/yuin/gopher-lua/value.go
new file mode 100644
index 0000000000000..0d4af80816ec9
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/value.go
@@ -0,0 +1,247 @@
+package lua
+
+import (
+	"context"
+	"fmt"
+	"os"
+)
+
+type LValueType int
+
+const (
+	LTNil LValueType = iota
+	LTBool
+	LTNumber
+	LTString
+	LTFunction
+	LTUserData
+	LTThread
+	LTTable
+	LTChannel
+)
+
+var lValueNames = [9]string{"nil", "boolean", "number", "string", "function", "userdata", "thread", "table", "channel"}
+
+func (vt LValueType) String() string {
+	return lValueNames[int(vt)]
+}
+
+type LValue interface {
+	String() string
+	Type() LValueType
+	// to reduce `runtime.assertI2T2` costs, this method should be used instead of the type assertion in heavy paths(typically inside the VM).
+	assertFloat64() (float64, bool)
+	// to reduce `runtime.assertI2T2` costs, this method should be used instead of the type assertion in heavy paths(typically inside the VM).
+	assertString() (string, bool)
+	// to reduce `runtime.assertI2T2` costs, this method should be used instead of the type assertion in heavy paths(typically inside the VM).
+	assertFunction() (*LFunction, bool)
+}
+
+// LVIsFalse returns true if a given LValue is a nil or false otherwise false.
+func LVIsFalse(v LValue) bool { return v == LNil || v == LFalse }
+
+// LVIsFalse returns false if a given LValue is a nil or false otherwise true.
+func LVAsBool(v LValue) bool { return v != LNil && v != LFalse }
+
+// LVAsString returns string representation of a given LValue
+// if the LValue is a string or number, otherwise an empty string.
+func LVAsString(v LValue) string {
+	switch sn := v.(type) {
+	case LString, LNumber:
+		return sn.String()
+	default:
+		return ""
+	}
+}
+
+// LVCanConvToString returns true if a given LValue is a string or number
+// otherwise false.
+func LVCanConvToString(v LValue) bool {
+	switch v.(type) {
+	case LString, LNumber:
+		return true
+	default:
+		return false
+	}
+}
+
+// LVAsNumber tries to convert a given LValue to a number.
+func LVAsNumber(v LValue) LNumber {
+	switch lv := v.(type) {
+	case LNumber:
+		return lv
+	case LString:
+		if num, err := parseNumber(string(lv)); err == nil {
+			return num
+		}
+	}
+	return LNumber(0)
+}
+
+type LNilType struct{}
+
+func (nl *LNilType) String() string                     { return "nil" }
+func (nl *LNilType) Type() LValueType                   { return LTNil }
+func (nl *LNilType) assertFloat64() (float64, bool)     { return 0, false }
+func (nl *LNilType) assertString() (string, bool)       { return "", false }
+func (nl *LNilType) assertFunction() (*LFunction, bool) { return nil, false }
+
+var LNil = LValue(&LNilType{})
+
+type LBool bool
+
+func (bl LBool) String() string {
+	if bool(bl) {
+		return "true"
+	}
+	return "false"
+}
+func (bl LBool) Type() LValueType                   { return LTBool }
+func (bl LBool) assertFloat64() (float64, bool)     { return 0, false }
+func (bl LBool) assertString() (string, bool)       { return "", false }
+func (bl LBool) assertFunction() (*LFunction, bool) { return nil, false }
+
+var LTrue = LBool(true)
+var LFalse = LBool(false)
+
+type LString string
+
+func (st LString) String() string                     { return string(st) }
+func (st LString) Type() LValueType                   { return LTString }
+func (st LString) assertFloat64() (float64, bool)     { return 0, false }
+func (st LString) assertString() (string, bool)       { return string(st), true }
+func (st LString) assertFunction() (*LFunction, bool) { return nil, false }
+
+// fmt.Formatter interface
+func (st LString) Format(f fmt.State, c rune) {
+	switch c {
+	case 'd', 'i':
+		if nm, err := parseNumber(string(st)); err != nil {
+			defaultFormat(nm, f, 'd')
+		} else {
+			defaultFormat(string(st), f, 's')
+		}
+	default:
+		defaultFormat(string(st), f, c)
+	}
+}
+
+func (nm LNumber) String() string {
+	if isInteger(nm) {
+		return fmt.Sprint(int64(nm))
+	}
+	return fmt.Sprint(float64(nm))
+}
+
+func (nm LNumber) Type() LValueType                   { return LTNumber }
+func (nm LNumber) assertFloat64() (float64, bool)     { return float64(nm), true }
+func (nm LNumber) assertString() (string, bool)       { return "", false }
+func (nm LNumber) assertFunction() (*LFunction, bool) { return nil, false }
+
+// fmt.Formatter interface
+func (nm LNumber) Format(f fmt.State, c rune) {
+	switch c {
+	case 'q', 's':
+		defaultFormat(nm.String(), f, c)
+	case 'b', 'c', 'd', 'o', 'x', 'X', 'U':
+		defaultFormat(int64(nm), f, c)
+	case 'e', 'E', 'f', 'F', 'g', 'G':
+		defaultFormat(float64(nm), f, c)
+	case 'i':
+		defaultFormat(int64(nm), f, 'd')
+	default:
+		if isInteger(nm) {
+			defaultFormat(int64(nm), f, c)
+		} else {
+			defaultFormat(float64(nm), f, c)
+		}
+	}
+}
+
+type LTable struct {
+	Metatable LValue
+
+	array   []LValue
+	dict    map[LValue]LValue
+	strdict map[string]LValue
+	keys    []LValue
+	k2i     map[LValue]int
+}
+
+func (tb *LTable) String() string                     { return fmt.Sprintf("table: %p", tb) }
+func (tb *LTable) Type() LValueType                   { return LTTable }
+func (tb *LTable) assertFloat64() (float64, bool)     { return 0, false }
+func (tb *LTable) assertString() (string, bool)       { return "", false }
+func (tb *LTable) assertFunction() (*LFunction, bool) { return nil, false }
+
+type LFunction struct {
+	IsG       bool
+	Env       *LTable
+	Proto     *FunctionProto
+	GFunction LGFunction
+	Upvalues  []*Upvalue
+}
+type LGFunction func(*LState) int
+
+func (fn *LFunction) String() string                     { return fmt.Sprintf("function: %p", fn) }
+func (fn *LFunction) Type() LValueType                   { return LTFunction }
+func (fn *LFunction) assertFloat64() (float64, bool)     { return 0, false }
+func (fn *LFunction) assertString() (string, bool)       { return "", false }
+func (fn *LFunction) assertFunction() (*LFunction, bool) { return fn, true }
+
+type Global struct {
+	MainThread    *LState
+	CurrentThread *LState
+	Registry      *LTable
+	Global        *LTable
+
+	builtinMts map[int]LValue
+	tempFiles  []*os.File
+	gccount    int32
+}
+
+type LState struct {
+	G       *Global
+	Parent  *LState
+	Env     *LTable
+	Panic   func(*LState)
+	Dead    bool
+	Options Options
+
+	stop         int32
+	reg          *registry
+	stack        callFrameStack
+	alloc        *allocator
+	currentFrame *callFrame
+	wrapped      bool
+	uvcache      *Upvalue
+	hasErrorFunc bool
+	mainLoop     func(*LState, *callFrame)
+	ctx          context.Context
+}
+
+func (ls *LState) String() string                     { return fmt.Sprintf("thread: %p", ls) }
+func (ls *LState) Type() LValueType                   { return LTThread }
+func (ls *LState) assertFloat64() (float64, bool)     { return 0, false }
+func (ls *LState) assertString() (string, bool)       { return "", false }
+func (ls *LState) assertFunction() (*LFunction, bool) { return nil, false }
+
+type LUserData struct {
+	Value     interface{}
+	Env       *LTable
+	Metatable LValue
+}
+
+func (ud *LUserData) String() string                     { return fmt.Sprintf("userdata: %p", ud) }
+func (ud *LUserData) Type() LValueType                   { return LTUserData }
+func (ud *LUserData) assertFloat64() (float64, bool)     { return 0, false }
+func (ud *LUserData) assertString() (string, bool)       { return "", false }
+func (ud *LUserData) assertFunction() (*LFunction, bool) { return nil, false }
+
+type LChannel chan LValue
+
+func (ch LChannel) String() string                     { return fmt.Sprintf("channel: %p", ch) }
+func (ch LChannel) Type() LValueType                   { return LTChannel }
+func (ch LChannel) assertFloat64() (float64, bool)     { return 0, false }
+func (ch LChannel) assertString() (string, bool)       { return "", false }
+func (ch LChannel) assertFunction() (*LFunction, bool) { return nil, false }
diff --git a/vendor/github.com/yuin/gopher-lua/vm.go b/vendor/github.com/yuin/gopher-lua/vm.go
new file mode 100644
index 0000000000000..c3c17bdb776dc
--- /dev/null
+++ b/vendor/github.com/yuin/gopher-lua/vm.go
@@ -0,0 +1,1718 @@
+package lua
+
+////////////////////////////////////////////////////////
+// This file was generated by go-inline. DO NOT EDIT. //
+////////////////////////////////////////////////////////
+
+import (
+	"fmt"
+	"math"
+	"strings"
+)
+
+func mainLoop(L *LState, baseframe *callFrame) {
+	var inst uint32
+	var cf *callFrame
+
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	L.currentFrame = L.stack.Last()
+	if L.currentFrame.Fn.IsG {
+		callGFunction(L, false)
+		return
+	}
+
+	for {
+		cf = L.currentFrame
+		inst = cf.Fn.Proto.Code[cf.Pc]
+		cf.Pc++
+		if jumpTable[int(inst>>26)](L, inst, baseframe) == 1 {
+			return
+		}
+	}
+}
+
+func mainLoopWithContext(L *LState, baseframe *callFrame) {
+	var inst uint32
+	var cf *callFrame
+
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	L.currentFrame = L.stack.Last()
+	if L.currentFrame.Fn.IsG {
+		callGFunction(L, false)
+		return
+	}
+
+	for {
+		cf = L.currentFrame
+		inst = cf.Fn.Proto.Code[cf.Pc]
+		cf.Pc++
+		select {
+		case <-L.ctx.Done():
+			L.RaiseError(L.ctx.Err().Error())
+			return
+		default:
+			if jumpTable[int(inst>>26)](L, inst, baseframe) == 1 {
+				return
+			}
+		}
+	}
+}
+
+// regv is the first target register to copy the return values to.
+// It can be reg.top, indicating that the copied values are going into new registers, or it can be below reg.top
+// Indicating that the values should be within the existing registers.
+// b is the available number of return values + 1.
+// n is the desired number of return values.
+// If n more than the available return values then the extra values are set to nil.
+// When this function returns the top of the registry will be set to regv+n.
+func copyReturnValues(L *LState, regv, start, n, b int) { // +inline-start
+	if b == 1 {
+		// this section is inlined by go-inline
+		// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+		{
+			rg := L.reg
+			regm := regv
+			newSize := regm + n
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+			{
+				requiredSize := newSize
+				if requiredSize > cap(rg.array) {
+					rg.resize(requiredSize)
+				}
+			}
+			for i := 0; i < n; i++ {
+				rg.array[regm+i] = LNil
+			}
+			// values beyond top don't need to be valid LValues, so setting them to nil is fine
+			// setting them to nil rather than LNil lets us invoke the golang memclr opto
+			oldtop := rg.top
+			rg.top = regm + n
+			if rg.top < oldtop {
+				nilRange := rg.array[rg.top:oldtop]
+				for i := range nilRange {
+					nilRange[i] = nil
+				}
+			}
+		}
+	} else {
+		// this section is inlined by go-inline
+		// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+		{
+			rg := L.reg
+			limit := -1
+			newSize := regv + n
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+			{
+				requiredSize := newSize
+				if requiredSize > cap(rg.array) {
+					rg.resize(requiredSize)
+				}
+			}
+			if limit == -1 || limit > rg.top {
+				limit = rg.top
+			}
+			for i := 0; i < n; i++ {
+				srcIdx := start + i
+				if srcIdx >= limit || srcIdx < 0 {
+					rg.array[regv+i] = LNil
+				} else {
+					rg.array[regv+i] = rg.array[srcIdx]
+				}
+			}
+
+			// values beyond top don't need to be valid LValues, so setting them to nil is fine
+			// setting them to nil rather than LNil lets us invoke the golang memclr opto
+			oldtop := rg.top
+			rg.top = regv + n
+			if rg.top < oldtop {
+				nilRange := rg.array[rg.top:oldtop]
+				for i := range nilRange {
+					nilRange[i] = nil
+				}
+			}
+		}
+		if b > 1 && n > (b-1) {
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+			{
+				rg := L.reg
+				regm := regv + b - 1
+				n := n - (b - 1)
+				newSize := regm + n
+				// this section is inlined by go-inline
+				// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+				{
+					requiredSize := newSize
+					if requiredSize > cap(rg.array) {
+						rg.resize(requiredSize)
+					}
+				}
+				for i := 0; i < n; i++ {
+					rg.array[regm+i] = LNil
+				}
+				// values beyond top don't need to be valid LValues, so setting them to nil is fine
+				// setting them to nil rather than LNil lets us invoke the golang memclr opto
+				oldtop := rg.top
+				rg.top = regm + n
+				if rg.top < oldtop {
+					nilRange := rg.array[rg.top:oldtop]
+					for i := range nilRange {
+						nilRange[i] = nil
+					}
+				}
+			}
+		}
+	}
+} // +inline-end
+
+func switchToParentThread(L *LState, nargs int, haserror bool, kill bool) {
+	parent := L.Parent
+	if parent == nil {
+		L.RaiseError("can not yield from outside of a coroutine")
+	}
+	L.G.CurrentThread = parent
+	L.Parent = nil
+	if !L.wrapped {
+		if haserror {
+			parent.Push(LFalse)
+		} else {
+			parent.Push(LTrue)
+		}
+	}
+	L.XMoveTo(parent, nargs)
+	L.stack.Pop()
+	offset := L.currentFrame.LocalBase - L.currentFrame.ReturnBase
+	L.currentFrame = L.stack.Last()
+	L.reg.SetTop(L.reg.Top() - offset) // remove 'yield' function(including tailcalled functions)
+	if kill {
+		L.kill()
+	}
+}
+
+func callGFunction(L *LState, tailcall bool) bool {
+	frame := L.currentFrame
+	gfnret := frame.Fn.GFunction(L)
+	if tailcall {
+		L.currentFrame = L.RemoveCallerFrame()
+	}
+
+	if gfnret < 0 {
+		switchToParentThread(L, L.GetTop(), false, false)
+		return true
+	}
+
+	wantret := frame.NRet
+	if wantret == MultRet {
+		wantret = gfnret
+	}
+
+	if tailcall && L.Parent != nil && L.stack.Sp() == 1 {
+		switchToParentThread(L, wantret, false, true)
+		return true
+	}
+
+	// this section is inlined by go-inline
+	// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+	{
+		rg := L.reg
+		regv := frame.ReturnBase
+		start := L.reg.Top() - gfnret
+		limit := -1
+		n := wantret
+		newSize := regv + n
+		// this section is inlined by go-inline
+		// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+		{
+			requiredSize := newSize
+			if requiredSize > cap(rg.array) {
+				rg.resize(requiredSize)
+			}
+		}
+		if limit == -1 || limit > rg.top {
+			limit = rg.top
+		}
+		for i := 0; i < n; i++ {
+			srcIdx := start + i
+			if srcIdx >= limit || srcIdx < 0 {
+				rg.array[regv+i] = LNil
+			} else {
+				rg.array[regv+i] = rg.array[srcIdx]
+			}
+		}
+
+		// values beyond top don't need to be valid LValues, so setting them to nil is fine
+		// setting them to nil rather than LNil lets us invoke the golang memclr opto
+		oldtop := rg.top
+		rg.top = regv + n
+		if rg.top < oldtop {
+			nilRange := rg.array[rg.top:oldtop]
+			for i := range nilRange {
+				nilRange[i] = nil
+			}
+		}
+	}
+	L.stack.Pop()
+	L.currentFrame = L.stack.Last()
+	return false
+}
+
+func threadRun(L *LState) {
+	if L.stack.IsEmpty() {
+		return
+	}
+
+	defer func() {
+		if rcv := recover(); rcv != nil {
+			var lv LValue
+			if v, ok := rcv.(*ApiError); ok {
+				lv = v.Object
+			} else {
+				lv = LString(fmt.Sprint(rcv))
+			}
+			if parent := L.Parent; parent != nil {
+				if L.wrapped {
+					L.Push(lv)
+					parent.Panic(L)
+				} else {
+					L.SetTop(0)
+					L.Push(lv)
+					switchToParentThread(L, 1, true, true)
+				}
+			} else {
+				panic(rcv)
+			}
+		}
+	}()
+	L.mainLoop(L, nil)
+}
+
+type instFunc func(*LState, uint32, *callFrame) int
+
+var jumpTable [opCodeMax + 1]instFunc
+
+func init() {
+	jumpTable = [opCodeMax + 1]instFunc{
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_MOVE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			reg.Set(RA, reg.Get(lbase+B))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_MOVEN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(lbase+A, reg.Get(lbase+B))
+			code := cf.Fn.Proto.Code
+			pc := cf.Pc
+			for i := 0; i < C; i++ {
+				inst = code[pc]
+				pc++
+				A = int(inst>>18) & 0xff //GETA
+				B = int(inst & 0x1ff)    //GETB
+				reg.Set(lbase+A, reg.Get(lbase+B))
+			}
+			cf.Pc = pc
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADK
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			reg.Set(RA, cf.Fn.Proto.Constants[Bx])
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADBOOL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if B != 0 {
+				reg.Set(RA, LTrue)
+			} else {
+				reg.Set(RA, LFalse)
+			}
+			if C != 0 {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LOADNIL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			for i := RA; i <= lbase+B; i++ {
+				reg.Set(i, LNil)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETUPVAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			reg.Set(RA, cf.Fn.Upvalues[B].Value())
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETGLOBAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			//reg.Set(RA, L.getField(cf.Fn.Env, cf.Fn.Proto.Constants[Bx]))
+			reg.Set(RA, L.getFieldString(cf.Fn.Env, cf.Fn.Proto.stringConstants[Bx]))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, L.getField(reg.Get(lbase+B), L.rkValue(C)))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_GETTABLEKS
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, L.getFieldString(reg.Get(lbase+B), L.rkString(C)))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETGLOBAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			//L.setField(cf.Fn.Env, cf.Fn.Proto.Constants[Bx], reg.Get(RA))
+			L.setFieldString(cf.Fn.Env, cf.Fn.Proto.stringConstants[Bx], reg.Get(RA))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETUPVAL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			cf.Fn.Upvalues[B].SetValue(reg.Get(RA))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			L.setField(reg.Get(RA), L.rkValue(B), L.rkValue(C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETTABLEKS
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			L.setFieldString(reg.Get(RA), L.rkString(B), L.rkValue(C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NEWTABLE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			reg.Set(RA, newLTable(B, C))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SELF
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			selfobj := reg.Get(lbase + B)
+			reg.Set(RA, L.getFieldString(selfobj, L.rkString(C)))
+			reg.Set(RA+1, selfobj)
+			return 0
+		},
+		opArith, // OP_ADD
+		opArith, // OP_SUB
+		opArith, // OP_MUL
+		opArith, // OP_DIV
+		opArith, // OP_MOD
+		opArith, // OP_POW
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_UNM
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			unaryv := L.rkValue(B)
+			if nm, ok := unaryv.(LNumber); ok {
+				reg.SetNumber(RA, -nm)
+			} else {
+				op := L.metaOp1(unaryv, "__unm")
+				if op.Type() == LTFunction {
+					reg.Push(op)
+					reg.Push(unaryv)
+					L.Call(1, 1)
+					reg.Set(RA, reg.Pop())
+				} else if str, ok1 := unaryv.(LString); ok1 {
+					if num, err := parseNumber(string(str)); err == nil {
+						reg.Set(RA, -num)
+					} else {
+						L.RaiseError("__unm undefined")
+					}
+				} else {
+					L.RaiseError("__unm undefined")
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NOT
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			if LVIsFalse(reg.Get(lbase + B)) {
+				reg.Set(RA, LTrue)
+			} else {
+				reg.Set(RA, LFalse)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LEN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			switch lv := L.rkValue(B).(type) {
+			case LString:
+				reg.SetNumber(RA, LNumber(len(lv)))
+			default:
+				op := L.metaOp1(lv, "__len")
+				if op.Type() == LTFunction {
+					reg.Push(op)
+					reg.Push(lv)
+					L.Call(1, 1)
+					ret := reg.Pop()
+					if ret.Type() == LTNumber {
+						reg.SetNumber(RA, ret.(LNumber))
+					} else {
+						reg.SetNumber(RA, LNumber(0))
+					}
+				} else if lv.Type() == LTTable {
+					reg.SetNumber(RA, LNumber(lv.(*LTable).Len()))
+				} else {
+					L.RaiseError("__len undefined")
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CONCAT
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			RC := lbase + C
+			RB := lbase + B
+			reg.Set(RA, stringConcat(L, RC-RB+1, RC))
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_JMP
+			cf := L.currentFrame
+			Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+			cf.Pc += Sbx
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_EQ
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			ret := equals(L, L.rkValue(B), L.rkValue(C), false)
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LT
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			ret := lessThan(L, L.rkValue(B), L.rkValue(C))
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_LE
+			cf := L.currentFrame
+			A := int(inst>>18) & 0xff //GETA
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			lhs := L.rkValue(B)
+			rhs := L.rkValue(C)
+			ret := false
+
+			if v1, ok1 := lhs.assertFloat64(); ok1 {
+				if v2, ok2 := rhs.assertFloat64(); ok2 {
+					ret = v1 <= v2
+				} else {
+					L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+				}
+			} else {
+				if lhs.Type() != rhs.Type() {
+					L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+				}
+				switch lhs.Type() {
+				case LTString:
+					ret = strCmp(string(lhs.(LString)), string(rhs.(LString))) <= 0
+				default:
+					switch objectRational(L, lhs, rhs, "__le") {
+					case 1:
+						ret = true
+					case 0:
+						ret = false
+					default:
+						ret = !objectRationalWithError(L, rhs, lhs, "__lt")
+					}
+				}
+			}
+
+			v := 1
+			if ret {
+				v = 0
+			}
+			if v == A {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TEST
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			C := int(inst>>9) & 0x1ff //GETC
+			if LVAsBool(reg.Get(RA)) == (C == 0) {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TESTSET
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if value := reg.Get(lbase + B); LVAsBool(value) != (C == 0) {
+				reg.Set(RA, value)
+			} else {
+				cf.Pc++
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CALL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			nargs := B - 1
+			if B == 0 {
+				nargs = reg.Top() - (RA + 1)
+			}
+			lv := reg.Get(RA)
+			nret := C - 1
+			var callable *LFunction
+			var meta bool
+			if fn, ok := lv.assertFunction(); ok {
+				callable = fn
+				meta = false
+			} else {
+				callable, meta = L.metaCall(lv)
+			}
+			// this section is inlined by go-inline
+			// source function is 'func (ls *LState) pushCallFrame(cf callFrame, fn LValue, meta bool) ' in '_state.go'
+			{
+				ls := L
+				cf := callFrame{Fn: callable, Pc: 0, Base: RA, LocalBase: RA + 1, ReturnBase: RA, NArgs: nargs, NRet: nret, Parent: cf, TailCall: 0}
+				fn := lv
+				if meta {
+					cf.NArgs++
+					ls.reg.Insert(fn, cf.LocalBase)
+				}
+				if cf.Fn == nil {
+					ls.RaiseError("attempt to call a non-function object")
+				}
+				if ls.stack.IsFull() {
+					ls.RaiseError("stack overflow")
+				}
+				ls.stack.Push(cf)
+				newcf := ls.stack.Last()
+				// this section is inlined by go-inline
+				// source function is 'func (ls *LState) initCallFrame(cf *callFrame) ' in '_state.go'
+				{
+					cf := newcf
+					if cf.Fn.IsG {
+						ls.reg.SetTop(cf.LocalBase + cf.NArgs)
+					} else {
+						proto := cf.Fn.Proto
+						nargs := cf.NArgs
+						np := int(proto.NumParameters)
+						newSize := cf.LocalBase + np
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+						{
+							rg := ls.reg
+							requiredSize := newSize
+							if requiredSize > cap(rg.array) {
+								rg.resize(requiredSize)
+							}
+						}
+						for i := nargs; i < np; i++ {
+							ls.reg.array[cf.LocalBase+i] = LNil
+							nargs = np
+						}
+
+						if (proto.IsVarArg & VarArgIsVarArg) == 0 {
+							if nargs < int(proto.NumUsedRegisters) {
+								nargs = int(proto.NumUsedRegisters)
+							}
+							newSize = cf.LocalBase + nargs
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+							{
+								rg := ls.reg
+								requiredSize := newSize
+								if requiredSize > cap(rg.array) {
+									rg.resize(requiredSize)
+								}
+							}
+							for i := np; i < nargs; i++ {
+								ls.reg.array[cf.LocalBase+i] = LNil
+							}
+							ls.reg.top = cf.LocalBase + int(proto.NumUsedRegisters)
+						} else {
+							/* swap vararg positions:
+									   closure
+									   namedparam1 <- lbase
+									   namedparam2
+									   vararg1
+									   vararg2
+
+							           TO
+
+									   closure
+									   nil
+									   nil
+									   vararg1
+									   vararg2
+									   namedparam1 <- lbase
+									   namedparam2
+							*/
+							nvarargs := nargs - np
+							if nvarargs < 0 {
+								nvarargs = 0
+							}
+
+							ls.reg.SetTop(cf.LocalBase + nargs + np)
+							for i := 0; i < np; i++ {
+								//ls.reg.Set(cf.LocalBase+nargs+i, ls.reg.Get(cf.LocalBase+i))
+								ls.reg.array[cf.LocalBase+nargs+i] = ls.reg.array[cf.LocalBase+i]
+								//ls.reg.Set(cf.LocalBase+i, LNil)
+								ls.reg.array[cf.LocalBase+i] = LNil
+							}
+
+							if CompatVarArg {
+								ls.reg.SetTop(cf.LocalBase + nargs + np + 1)
+								if (proto.IsVarArg & VarArgNeedsArg) != 0 {
+									argtb := newLTable(nvarargs, 0)
+									for i := 0; i < nvarargs; i++ {
+										argtb.RawSetInt(i+1, ls.reg.Get(cf.LocalBase+np+i))
+									}
+									argtb.RawSetString("n", LNumber(nvarargs))
+									//ls.reg.Set(cf.LocalBase+nargs+np, argtb)
+									ls.reg.array[cf.LocalBase+nargs+np] = argtb
+								} else {
+									ls.reg.array[cf.LocalBase+nargs+np] = LNil
+								}
+							}
+							cf.LocalBase += nargs
+							maxreg := cf.LocalBase + int(proto.NumUsedRegisters)
+							ls.reg.SetTop(maxreg)
+						}
+					}
+				}
+				ls.currentFrame = newcf
+			}
+			if callable.IsG && callGFunction(L, false) {
+				return 1
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TAILCALL
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			nargs := B - 1
+			if B == 0 {
+				nargs = reg.Top() - (RA + 1)
+			}
+			lv := reg.Get(RA)
+			var callable *LFunction
+			var meta bool
+			if fn, ok := lv.assertFunction(); ok {
+				callable = fn
+				meta = false
+			} else {
+				callable, meta = L.metaCall(lv)
+			}
+			if callable == nil {
+				L.RaiseError("attempt to call a non-function object")
+			}
+			// this section is inlined by go-inline
+			// source function is 'func (ls *LState) closeUpvalues(idx int) ' in '_state.go'
+			{
+				ls := L
+				idx := lbase
+				if ls.uvcache != nil {
+					var prev *Upvalue
+					for uv := ls.uvcache; uv != nil; uv = uv.next {
+						if uv.index >= idx {
+							if prev != nil {
+								prev.next = nil
+							} else {
+								ls.uvcache = nil
+							}
+							uv.Close()
+						}
+						prev = uv
+					}
+				}
+			}
+			if callable.IsG {
+				luaframe := cf
+				L.pushCallFrame(callFrame{
+					Fn:         callable,
+					Pc:         0,
+					Base:       RA,
+					LocalBase:  RA + 1,
+					ReturnBase: cf.ReturnBase,
+					NArgs:      nargs,
+					NRet:       cf.NRet,
+					Parent:     cf,
+					TailCall:   0,
+				}, lv, meta)
+				if callGFunction(L, true) {
+					return 1
+				}
+				if L.currentFrame == nil || L.currentFrame.Fn.IsG || luaframe == baseframe {
+					return 1
+				}
+			} else {
+				base := cf.Base
+				cf.Fn = callable
+				cf.Pc = 0
+				cf.Base = RA
+				cf.LocalBase = RA + 1
+				cf.ReturnBase = cf.ReturnBase
+				cf.NArgs = nargs
+				cf.NRet = cf.NRet
+				cf.TailCall++
+				lbase := cf.LocalBase
+				if meta {
+					cf.NArgs++
+					L.reg.Insert(lv, cf.LocalBase)
+				}
+				// this section is inlined by go-inline
+				// source function is 'func (ls *LState) initCallFrame(cf *callFrame) ' in '_state.go'
+				{
+					ls := L
+					if cf.Fn.IsG {
+						ls.reg.SetTop(cf.LocalBase + cf.NArgs)
+					} else {
+						proto := cf.Fn.Proto
+						nargs := cf.NArgs
+						np := int(proto.NumParameters)
+						newSize := cf.LocalBase + np
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+						{
+							rg := ls.reg
+							requiredSize := newSize
+							if requiredSize > cap(rg.array) {
+								rg.resize(requiredSize)
+							}
+						}
+						for i := nargs; i < np; i++ {
+							ls.reg.array[cf.LocalBase+i] = LNil
+							nargs = np
+						}
+
+						if (proto.IsVarArg & VarArgIsVarArg) == 0 {
+							if nargs < int(proto.NumUsedRegisters) {
+								nargs = int(proto.NumUsedRegisters)
+							}
+							newSize = cf.LocalBase + nargs
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+							{
+								rg := ls.reg
+								requiredSize := newSize
+								if requiredSize > cap(rg.array) {
+									rg.resize(requiredSize)
+								}
+							}
+							for i := np; i < nargs; i++ {
+								ls.reg.array[cf.LocalBase+i] = LNil
+							}
+							ls.reg.top = cf.LocalBase + int(proto.NumUsedRegisters)
+						} else {
+							/* swap vararg positions:
+									   closure
+									   namedparam1 <- lbase
+									   namedparam2
+									   vararg1
+									   vararg2
+
+							           TO
+
+									   closure
+									   nil
+									   nil
+									   vararg1
+									   vararg2
+									   namedparam1 <- lbase
+									   namedparam2
+							*/
+							nvarargs := nargs - np
+							if nvarargs < 0 {
+								nvarargs = 0
+							}
+
+							ls.reg.SetTop(cf.LocalBase + nargs + np)
+							for i := 0; i < np; i++ {
+								//ls.reg.Set(cf.LocalBase+nargs+i, ls.reg.Get(cf.LocalBase+i))
+								ls.reg.array[cf.LocalBase+nargs+i] = ls.reg.array[cf.LocalBase+i]
+								//ls.reg.Set(cf.LocalBase+i, LNil)
+								ls.reg.array[cf.LocalBase+i] = LNil
+							}
+
+							if CompatVarArg {
+								ls.reg.SetTop(cf.LocalBase + nargs + np + 1)
+								if (proto.IsVarArg & VarArgNeedsArg) != 0 {
+									argtb := newLTable(nvarargs, 0)
+									for i := 0; i < nvarargs; i++ {
+										argtb.RawSetInt(i+1, ls.reg.Get(cf.LocalBase+np+i))
+									}
+									argtb.RawSetString("n", LNumber(nvarargs))
+									//ls.reg.Set(cf.LocalBase+nargs+np, argtb)
+									ls.reg.array[cf.LocalBase+nargs+np] = argtb
+								} else {
+									ls.reg.array[cf.LocalBase+nargs+np] = LNil
+								}
+							}
+							cf.LocalBase += nargs
+							maxreg := cf.LocalBase + int(proto.NumUsedRegisters)
+							ls.reg.SetTop(maxreg)
+						}
+					}
+				}
+				// this section is inlined by go-inline
+				// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+				{
+					rg := L.reg
+					regv := base
+					start := RA
+					limit := -1
+					n := reg.Top() - RA - 1
+					newSize := regv + n
+					// this section is inlined by go-inline
+					// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+					{
+						requiredSize := newSize
+						if requiredSize > cap(rg.array) {
+							rg.resize(requiredSize)
+						}
+					}
+					if limit == -1 || limit > rg.top {
+						limit = rg.top
+					}
+					for i := 0; i < n; i++ {
+						srcIdx := start + i
+						if srcIdx >= limit || srcIdx < 0 {
+							rg.array[regv+i] = LNil
+						} else {
+							rg.array[regv+i] = rg.array[srcIdx]
+						}
+					}
+
+					// values beyond top don't need to be valid LValues, so setting them to nil is fine
+					// setting them to nil rather than LNil lets us invoke the golang memclr opto
+					oldtop := rg.top
+					rg.top = regv + n
+					if rg.top < oldtop {
+						nilRange := rg.array[rg.top:oldtop]
+						for i := range nilRange {
+							nilRange[i] = nil
+						}
+					}
+				}
+				cf.Base = base
+				cf.LocalBase = base + (cf.LocalBase - lbase + 1)
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_RETURN
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			// this section is inlined by go-inline
+			// source function is 'func (ls *LState) closeUpvalues(idx int) ' in '_state.go'
+			{
+				ls := L
+				idx := lbase
+				if ls.uvcache != nil {
+					var prev *Upvalue
+					for uv := ls.uvcache; uv != nil; uv = uv.next {
+						if uv.index >= idx {
+							if prev != nil {
+								prev.next = nil
+							} else {
+								ls.uvcache = nil
+							}
+							uv.Close()
+						}
+						prev = uv
+					}
+				}
+			}
+			nret := B - 1
+			if B == 0 {
+				nret = reg.Top() - RA
+			}
+			n := cf.NRet
+			if cf.NRet == MultRet {
+				n = nret
+			}
+
+			if L.Parent != nil && L.stack.Sp() == 1 {
+				// this section is inlined by go-inline
+				// source function is 'func copyReturnValues(L *LState, regv, start, n, b int) ' in '_vm.go'
+				{
+					regv := reg.Top()
+					start := RA
+					b := B
+					if b == 1 {
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+						{
+							rg := L.reg
+							regm := regv
+							newSize := regm + n
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+							{
+								requiredSize := newSize
+								if requiredSize > cap(rg.array) {
+									rg.resize(requiredSize)
+								}
+							}
+							for i := 0; i < n; i++ {
+								rg.array[regm+i] = LNil
+							}
+							// values beyond top don't need to be valid LValues, so setting them to nil is fine
+							// setting them to nil rather than LNil lets us invoke the golang memclr opto
+							oldtop := rg.top
+							rg.top = regm + n
+							if rg.top < oldtop {
+								nilRange := rg.array[rg.top:oldtop]
+								for i := range nilRange {
+									nilRange[i] = nil
+								}
+							}
+						}
+					} else {
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+						{
+							rg := L.reg
+							limit := -1
+							newSize := regv + n
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+							{
+								requiredSize := newSize
+								if requiredSize > cap(rg.array) {
+									rg.resize(requiredSize)
+								}
+							}
+							if limit == -1 || limit > rg.top {
+								limit = rg.top
+							}
+							for i := 0; i < n; i++ {
+								srcIdx := start + i
+								if srcIdx >= limit || srcIdx < 0 {
+									rg.array[regv+i] = LNil
+								} else {
+									rg.array[regv+i] = rg.array[srcIdx]
+								}
+							}
+
+							// values beyond top don't need to be valid LValues, so setting them to nil is fine
+							// setting them to nil rather than LNil lets us invoke the golang memclr opto
+							oldtop := rg.top
+							rg.top = regv + n
+							if rg.top < oldtop {
+								nilRange := rg.array[rg.top:oldtop]
+								for i := range nilRange {
+									nilRange[i] = nil
+								}
+							}
+						}
+						if b > 1 && n > (b-1) {
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+							{
+								rg := L.reg
+								regm := regv + b - 1
+								n := n - (b - 1)
+								newSize := regm + n
+								// this section is inlined by go-inline
+								// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+								{
+									requiredSize := newSize
+									if requiredSize > cap(rg.array) {
+										rg.resize(requiredSize)
+									}
+								}
+								for i := 0; i < n; i++ {
+									rg.array[regm+i] = LNil
+								}
+								// values beyond top don't need to be valid LValues, so setting them to nil is fine
+								// setting them to nil rather than LNil lets us invoke the golang memclr opto
+								oldtop := rg.top
+								rg.top = regm + n
+								if rg.top < oldtop {
+									nilRange := rg.array[rg.top:oldtop]
+									for i := range nilRange {
+										nilRange[i] = nil
+									}
+								}
+							}
+						}
+					}
+				}
+				switchToParentThread(L, n, false, true)
+				return 1
+			}
+			islast := baseframe == L.stack.Pop() || L.stack.IsEmpty()
+			// this section is inlined by go-inline
+			// source function is 'func copyReturnValues(L *LState, regv, start, n, b int) ' in '_vm.go'
+			{
+				regv := cf.ReturnBase
+				start := RA
+				b := B
+				if b == 1 {
+					// this section is inlined by go-inline
+					// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+					{
+						rg := L.reg
+						regm := regv
+						newSize := regm + n
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+						{
+							requiredSize := newSize
+							if requiredSize > cap(rg.array) {
+								rg.resize(requiredSize)
+							}
+						}
+						for i := 0; i < n; i++ {
+							rg.array[regm+i] = LNil
+						}
+						// values beyond top don't need to be valid LValues, so setting them to nil is fine
+						// setting them to nil rather than LNil lets us invoke the golang memclr opto
+						oldtop := rg.top
+						rg.top = regm + n
+						if rg.top < oldtop {
+							nilRange := rg.array[rg.top:oldtop]
+							for i := range nilRange {
+								nilRange[i] = nil
+							}
+						}
+					}
+				} else {
+					// this section is inlined by go-inline
+					// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+					{
+						rg := L.reg
+						limit := -1
+						newSize := regv + n
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+						{
+							requiredSize := newSize
+							if requiredSize > cap(rg.array) {
+								rg.resize(requiredSize)
+							}
+						}
+						if limit == -1 || limit > rg.top {
+							limit = rg.top
+						}
+						for i := 0; i < n; i++ {
+							srcIdx := start + i
+							if srcIdx >= limit || srcIdx < 0 {
+								rg.array[regv+i] = LNil
+							} else {
+								rg.array[regv+i] = rg.array[srcIdx]
+							}
+						}
+
+						// values beyond top don't need to be valid LValues, so setting them to nil is fine
+						// setting them to nil rather than LNil lets us invoke the golang memclr opto
+						oldtop := rg.top
+						rg.top = regv + n
+						if rg.top < oldtop {
+							nilRange := rg.array[rg.top:oldtop]
+							for i := range nilRange {
+								nilRange[i] = nil
+							}
+						}
+					}
+					if b > 1 && n > (b-1) {
+						// this section is inlined by go-inline
+						// source function is 'func (rg *registry) FillNil(regm, n int) ' in '_state.go'
+						{
+							rg := L.reg
+							regm := regv + b - 1
+							n := n - (b - 1)
+							newSize := regm + n
+							// this section is inlined by go-inline
+							// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+							{
+								requiredSize := newSize
+								if requiredSize > cap(rg.array) {
+									rg.resize(requiredSize)
+								}
+							}
+							for i := 0; i < n; i++ {
+								rg.array[regm+i] = LNil
+							}
+							// values beyond top don't need to be valid LValues, so setting them to nil is fine
+							// setting them to nil rather than LNil lets us invoke the golang memclr opto
+							oldtop := rg.top
+							rg.top = regm + n
+							if rg.top < oldtop {
+								nilRange := rg.array[rg.top:oldtop]
+								for i := range nilRange {
+									nilRange[i] = nil
+								}
+							}
+						}
+					}
+				}
+			}
+			L.currentFrame = L.stack.Last()
+			if islast || L.currentFrame == nil || L.currentFrame.Fn.IsG {
+				return 1
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_FORLOOP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			if init, ok1 := reg.Get(RA).assertFloat64(); ok1 {
+				if limit, ok2 := reg.Get(RA + 1).assertFloat64(); ok2 {
+					if step, ok3 := reg.Get(RA + 2).assertFloat64(); ok3 {
+						init += step
+						reg.SetNumber(RA, LNumber(init))
+						if (step > 0 && init <= limit) || (step <= 0 && init >= limit) {
+							Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+							cf.Pc += Sbx
+							reg.SetNumber(RA+3, LNumber(init))
+						} else {
+							reg.SetTop(RA + 1)
+						}
+					} else {
+						L.RaiseError("for statement step must be a number")
+					}
+				} else {
+					L.RaiseError("for statement limit must be a number")
+				}
+			} else {
+				L.RaiseError("for statement init must be a number")
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_FORPREP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Sbx := int(inst&0x3ffff) - opMaxArgSbx //GETSBX
+			if init, ok1 := reg.Get(RA).assertFloat64(); ok1 {
+				if step, ok2 := reg.Get(RA + 2).assertFloat64(); ok2 {
+					reg.SetNumber(RA, LNumber(init-step))
+				} else {
+					L.RaiseError("for statement step must be a number")
+				}
+			} else {
+				L.RaiseError("for statement init must be a number")
+			}
+			cf.Pc += Sbx
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_TFORLOOP
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			C := int(inst>>9) & 0x1ff //GETC
+			nret := C
+			reg.SetTop(RA + 3 + 2)
+			reg.Set(RA+3+2, reg.Get(RA+2))
+			reg.Set(RA+3+1, reg.Get(RA+1))
+			reg.Set(RA+3, reg.Get(RA))
+			L.callR(2, nret, RA+3)
+			if value := reg.Get(RA + 3); value != LNil {
+				reg.Set(RA+2, value)
+				pc := cf.Fn.Proto.Code[cf.Pc]
+				cf.Pc += int(pc&0x3ffff) - opMaxArgSbx
+			}
+			cf.Pc++
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_SETLIST
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff)    //GETB
+			C := int(inst>>9) & 0x1ff //GETC
+			if C == 0 {
+				C = int(cf.Fn.Proto.Code[cf.Pc])
+				cf.Pc++
+			}
+			offset := (C - 1) * FieldsPerFlush
+			table := reg.Get(RA).(*LTable)
+			nelem := B
+			if B == 0 {
+				nelem = reg.Top() - RA - 1
+			}
+			for i := 1; i <= nelem; i++ {
+				table.RawSetInt(offset+i, reg.Get(RA+i))
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CLOSE
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			// this section is inlined by go-inline
+			// source function is 'func (ls *LState) closeUpvalues(idx int) ' in '_state.go'
+			{
+				ls := L
+				idx := RA
+				if ls.uvcache != nil {
+					var prev *Upvalue
+					for uv := ls.uvcache; uv != nil; uv = uv.next {
+						if uv.index >= idx {
+							if prev != nil {
+								prev.next = nil
+							} else {
+								ls.uvcache = nil
+							}
+							uv.Close()
+						}
+						prev = uv
+					}
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_CLOSURE
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			Bx := int(inst & 0x3ffff) //GETBX
+			proto := cf.Fn.Proto.FunctionPrototypes[Bx]
+			closure := newLFunctionL(proto, cf.Fn.Env, int(proto.NumUpvalues))
+			reg.Set(RA, closure)
+			for i := 0; i < int(proto.NumUpvalues); i++ {
+				inst = cf.Fn.Proto.Code[cf.Pc]
+				cf.Pc++
+				B := opGetArgB(inst)
+				switch opGetOpCode(inst) {
+				case OP_MOVE:
+					closure.Upvalues[i] = L.findUpvalue(lbase + B)
+				case OP_GETUPVAL:
+					closure.Upvalues[i] = cf.Fn.Upvalues[B]
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_VARARG
+			reg := L.reg
+			cf := L.currentFrame
+			lbase := cf.LocalBase
+			A := int(inst>>18) & 0xff //GETA
+			RA := lbase + A
+			B := int(inst & 0x1ff) //GETB
+			nparams := int(cf.Fn.Proto.NumParameters)
+			nvarargs := cf.NArgs - nparams
+			if nvarargs < 0 {
+				nvarargs = 0
+			}
+			nwant := B - 1
+			if B == 0 {
+				nwant = nvarargs
+			}
+			// this section is inlined by go-inline
+			// source function is 'func (rg *registry) CopyRange(regv, start, limit, n int) ' in '_state.go'
+			{
+				rg := reg
+				regv := RA
+				start := cf.Base + nparams + 1
+				limit := cf.LocalBase
+				n := nwant
+				newSize := regv + n
+				// this section is inlined by go-inline
+				// source function is 'func (rg *registry) checkSize(requiredSize int) ' in '_state.go'
+				{
+					requiredSize := newSize
+					if requiredSize > cap(rg.array) {
+						rg.resize(requiredSize)
+					}
+				}
+				if limit == -1 || limit > rg.top {
+					limit = rg.top
+				}
+				for i := 0; i < n; i++ {
+					srcIdx := start + i
+					if srcIdx >= limit || srcIdx < 0 {
+						rg.array[regv+i] = LNil
+					} else {
+						rg.array[regv+i] = rg.array[srcIdx]
+					}
+				}
+
+				// values beyond top don't need to be valid LValues, so setting them to nil is fine
+				// setting them to nil rather than LNil lets us invoke the golang memclr opto
+				oldtop := rg.top
+				rg.top = regv + n
+				if rg.top < oldtop {
+					nilRange := rg.array[rg.top:oldtop]
+					for i := range nilRange {
+						nilRange[i] = nil
+					}
+				}
+			}
+			return 0
+		},
+		func(L *LState, inst uint32, baseframe *callFrame) int { //OP_NOP
+			return 0
+		},
+	}
+}
+
+func opArith(L *LState, inst uint32, baseframe *callFrame) int { //OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, OP_POW
+	reg := L.reg
+	cf := L.currentFrame
+	lbase := cf.LocalBase
+	A := int(inst>>18) & 0xff //GETA
+	RA := lbase + A
+	opcode := int(inst >> 26) //GETOPCODE
+	B := int(inst & 0x1ff)    //GETB
+	C := int(inst>>9) & 0x1ff //GETC
+	lhs := L.rkValue(B)
+	rhs := L.rkValue(C)
+	v1, ok1 := lhs.assertFloat64()
+	v2, ok2 := rhs.assertFloat64()
+	if ok1 && ok2 {
+		reg.SetNumber(RA, numberArith(L, opcode, LNumber(v1), LNumber(v2)))
+	} else {
+		reg.Set(RA, objectArith(L, opcode, lhs, rhs))
+	}
+	return 0
+}
+
+func luaModulo(lhs, rhs LNumber) LNumber {
+	flhs := float64(lhs)
+	frhs := float64(rhs)
+	v := math.Mod(flhs, frhs)
+	if flhs < 0 || frhs < 0 && !(flhs < 0 && frhs < 0) {
+		v += frhs
+	}
+	return LNumber(v)
+}
+
+func numberArith(L *LState, opcode int, lhs, rhs LNumber) LNumber {
+	switch opcode {
+	case OP_ADD:
+		return lhs + rhs
+	case OP_SUB:
+		return lhs - rhs
+	case OP_MUL:
+		return lhs * rhs
+	case OP_DIV:
+		return lhs / rhs
+	case OP_MOD:
+		return luaModulo(lhs, rhs)
+	case OP_POW:
+		flhs := float64(lhs)
+		frhs := float64(rhs)
+		return LNumber(math.Pow(flhs, frhs))
+	}
+	panic("should not reach here")
+	return LNumber(0)
+}
+
+func objectArith(L *LState, opcode int, lhs, rhs LValue) LValue {
+	event := ""
+	switch opcode {
+	case OP_ADD:
+		event = "__add"
+	case OP_SUB:
+		event = "__sub"
+	case OP_MUL:
+		event = "__mul"
+	case OP_DIV:
+		event = "__div"
+	case OP_MOD:
+		event = "__mod"
+	case OP_POW:
+		event = "__pow"
+	}
+	op := L.metaOp2(lhs, rhs, event)
+	if op.Type() == LTFunction {
+		L.reg.Push(op)
+		L.reg.Push(lhs)
+		L.reg.Push(rhs)
+		L.Call(2, 1)
+		return L.reg.Pop()
+	}
+	if str, ok := lhs.(LString); ok {
+		if lnum, err := parseNumber(string(str)); err == nil {
+			lhs = lnum
+		}
+	}
+	if str, ok := rhs.(LString); ok {
+		if rnum, err := parseNumber(string(str)); err == nil {
+			rhs = rnum
+		}
+	}
+	if v1, ok1 := lhs.assertFloat64(); ok1 {
+		if v2, ok2 := rhs.assertFloat64(); ok2 {
+			return numberArith(L, opcode, LNumber(v1), LNumber(v2))
+		}
+	}
+	L.RaiseError(fmt.Sprintf("cannot perform %v operation between %v and %v",
+		strings.TrimLeft(event, "_"), lhs.Type().String(), rhs.Type().String()))
+
+	return LNil
+}
+
+func stringConcat(L *LState, total, last int) LValue {
+	rhs := L.reg.Get(last)
+	total--
+	for i := last - 1; total > 0; {
+		lhs := L.reg.Get(i)
+		if !(LVCanConvToString(lhs) && LVCanConvToString(rhs)) {
+			op := L.metaOp2(lhs, rhs, "__concat")
+			if op.Type() == LTFunction {
+				L.reg.Push(op)
+				L.reg.Push(lhs)
+				L.reg.Push(rhs)
+				L.Call(2, 1)
+				rhs = L.reg.Pop()
+				total--
+				i--
+			} else {
+				L.RaiseError("cannot perform concat operation between %v and %v", lhs.Type().String(), rhs.Type().String())
+				return LNil
+			}
+		} else {
+			buf := make([]string, total+1)
+			buf[total] = LVAsString(rhs)
+			for total > 0 {
+				lhs = L.reg.Get(i)
+				if !LVCanConvToString(lhs) {
+					break
+				}
+				buf[total-1] = LVAsString(lhs)
+				i--
+				total--
+			}
+			rhs = LString(strings.Join(buf, ""))
+		}
+	}
+	return rhs
+}
+
+func lessThan(L *LState, lhs, rhs LValue) bool {
+	// optimization for numbers
+	if v1, ok1 := lhs.assertFloat64(); ok1 {
+		if v2, ok2 := rhs.assertFloat64(); ok2 {
+			return v1 < v2
+		}
+		L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+	}
+	if lhs.Type() != rhs.Type() {
+		L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+		return false
+	}
+	ret := false
+	switch lhs.Type() {
+	case LTString:
+		ret = strCmp(string(lhs.(LString)), string(rhs.(LString))) < 0
+	default:
+		ret = objectRationalWithError(L, lhs, rhs, "__lt")
+	}
+	return ret
+}
+
+func equals(L *LState, lhs, rhs LValue, raw bool) bool {
+	if lhs.Type() != rhs.Type() {
+		return false
+	}
+
+	ret := false
+	switch lhs.Type() {
+	case LTNil:
+		ret = true
+	case LTNumber:
+		v1, _ := lhs.assertFloat64()
+		v2, _ := rhs.assertFloat64()
+		ret = v1 == v2
+	case LTBool:
+		ret = bool(lhs.(LBool)) == bool(rhs.(LBool))
+	case LTString:
+		ret = string(lhs.(LString)) == string(rhs.(LString))
+	case LTUserData, LTTable:
+		if lhs == rhs {
+			ret = true
+		} else if !raw {
+			switch objectRational(L, lhs, rhs, "__eq") {
+			case 1:
+				ret = true
+			default:
+				ret = false
+			}
+		}
+	default:
+		ret = lhs == rhs
+	}
+	return ret
+}
+
+func objectRationalWithError(L *LState, lhs, rhs LValue, event string) bool {
+	switch objectRational(L, lhs, rhs, event) {
+	case 1:
+		return true
+	case 0:
+		return false
+	}
+	L.RaiseError("attempt to compare %v with %v", lhs.Type().String(), rhs.Type().String())
+	return false
+}
+
+func objectRational(L *LState, lhs, rhs LValue, event string) int {
+	m1 := L.metaOp1(lhs, event)
+	m2 := L.metaOp1(rhs, event)
+	if m1.Type() == LTFunction && m1 == m2 {
+		L.reg.Push(m1)
+		L.reg.Push(lhs)
+		L.reg.Push(rhs)
+		L.Call(2, 1)
+		if LVAsBool(L.reg.Pop()) {
+			return 1
+		}
+		return 0
+	}
+	return -1
+}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index af109839fe2f3..5563a7f4a9087 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -10,6 +10,7 @@ cloud.google.com/go/internal/version
 cloud.google.com/go/longrunning
 cloud.google.com/go/longrunning/autogen
 # cloud.google.com/go/bigtable v1.2.0
+## explicit
 cloud.google.com/go/bigtable
 cloud.google.com/go/bigtable/bttest
 cloud.google.com/go/bigtable/internal/option
@@ -20,14 +21,17 @@ cloud.google.com/go/pubsub/apiv1
 cloud.google.com/go/pubsub/internal/distribution
 cloud.google.com/go/pubsub/pstest
 # cloud.google.com/go/storage v1.10.0
+## explicit
 cloud.google.com/go/storage
 # github.com/Azure/azure-pipeline-go v0.2.2
+## explicit
 github.com/Azure/azure-pipeline-go/pipeline
 # github.com/Azure/azure-sdk-for-go v54.0.0+incompatible => github.com/Azure/azure-sdk-for-go v36.2.0+incompatible
 github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute
 github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-10-01/network
 github.com/Azure/azure-sdk-for-go/version
 # github.com/Azure/azure-storage-blob-go v0.8.0
+## explicit
 github.com/Azure/azure-storage-blob-go/azblob
 # github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78
 github.com/Azure/go-ansiterm
@@ -73,6 +77,13 @@ github.com/alecthomas/template
 github.com/alecthomas/template/parse
 # github.com/alecthomas/units v0.0.0-20210208195552-ff826a37aa15
 github.com/alecthomas/units
+# github.com/alicebob/gopher-json v0.0.0-20200520072559-a9ecdc9d1d3a
+github.com/alicebob/gopher-json
+# github.com/alicebob/miniredis/v2 v2.14.3
+## explicit
+github.com/alicebob/miniredis/v2
+github.com/alicebob/miniredis/v2/geohash
+github.com/alicebob/miniredis/v2/server
 # github.com/armon/go-metrics v0.3.6
 github.com/armon/go-metrics
 github.com/armon/go-metrics/prometheus
@@ -86,6 +97,7 @@ github.com/aws/aws-lambda-go/lambda/handlertrace
 github.com/aws/aws-lambda-go/lambda/messages
 github.com/aws/aws-lambda-go/lambdacontext
 # github.com/aws/aws-sdk-go v1.38.35
+## explicit
 github.com/aws/aws-sdk-go/aws
 github.com/aws/aws-sdk-go/aws/arn
 github.com/aws/aws-sdk-go/aws/awserr
@@ -148,6 +160,7 @@ github.com/beorn7/perks/quantile
 ## explicit
 github.com/bmatcuk/doublestar
 # github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b => github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab
+## explicit
 github.com/bradfitz/gomemcache/memcache
 # github.com/c2h5oh/datasize v0.0.0-20200112174442-28bbd4740fee
 ## explicit
@@ -155,6 +168,7 @@ github.com/c2h5oh/datasize
 # github.com/cenkalti/backoff/v4 v4.1.0
 github.com/cenkalti/backoff/v4
 # github.com/cespare/xxhash v1.1.0
+## explicit
 github.com/cespare/xxhash
 # github.com/cespare/xxhash/v2 v2.1.1
 ## explicit
@@ -366,6 +380,7 @@ github.com/dustin/go-humanize
 # github.com/edsrzf/mmap-go v1.0.0
 github.com/edsrzf/mmap-go
 # github.com/facette/natsort v0.0.0-20181210072756-2cd4dd1e2dcb
+## explicit
 github.com/facette/natsort
 # github.com/fatih/color v1.9.0
 ## explicit
@@ -383,6 +398,7 @@ github.com/form3tech-oss/jwt-go
 # github.com/fsnotify/fsnotify v1.4.9
 github.com/fsnotify/fsnotify
 # github.com/fsouza/fake-gcs-server v1.7.0
+## explicit
 github.com/fsouza/fake-gcs-server/fakestorage
 github.com/fsouza/fake-gcs-server/internal/backend
 # github.com/go-kit/kit v0.10.0
@@ -423,6 +439,7 @@ github.com/go-openapi/swag
 # github.com/go-openapi/validate v0.20.2
 github.com/go-openapi/validate
 # github.com/go-redis/redis/v8 v8.2.3
+## explicit
 github.com/go-redis/redis/v8
 github.com/go-redis/redis/v8/internal
 github.com/go-redis/redis/v8/internal/hashtag
@@ -435,6 +452,7 @@ github.com/go-stack/stack
 # github.com/go-zookeeper/zk v1.0.2
 github.com/go-zookeeper/zk
 # github.com/gocql/gocql v0.0.0-20200526081602-cd04bd7f22a7 => github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85
+## explicit
 github.com/gocql/gocql
 github.com/gocql/gocql/internal/lru
 github.com/gocql/gocql/internal/murmur
@@ -469,6 +487,7 @@ github.com/golang-migrate/migrate/v4/source/file
 github.com/golang/groupcache/lru
 github.com/golang/groupcache/singleflight
 # github.com/golang/protobuf v1.5.2
+## explicit
 github.com/golang/protobuf/descriptor
 github.com/golang/protobuf/internal/gengogrpc
 github.com/golang/protobuf/jsonpb
@@ -662,6 +681,7 @@ github.com/miekg/dns
 # github.com/minio/md5-simd v1.1.0
 github.com/minio/md5-simd
 # github.com/minio/minio-go/v7 v7.0.10
+## explicit
 github.com/minio/minio-go/v7
 github.com/minio/minio-go/v7/pkg/credentials
 github.com/minio/minio-go/v7/pkg/encrypt
@@ -698,10 +718,12 @@ github.com/morikuni/aec
 ## explicit
 github.com/mwitkow/go-conntrack
 # github.com/ncw/swift v1.0.52
+## explicit
 github.com/ncw/swift
 # github.com/oklog/run v1.1.0
 github.com/oklog/run
 # github.com/oklog/ulid v1.3.1
+## explicit
 github.com/oklog/ulid
 # github.com/opencontainers/go-digest v1.0.0
 github.com/opencontainers/go-digest
@@ -709,6 +731,7 @@ github.com/opencontainers/go-digest
 github.com/opencontainers/image-spec/specs-go
 github.com/opencontainers/image-spec/specs-go/v1
 # github.com/opentracing-contrib/go-grpc v0.0.0-20210225150812-73cb765af46e
+## explicit
 github.com/opentracing-contrib/go-grpc
 # github.com/opentracing-contrib/go-stdlib v1.0.0
 github.com/opentracing-contrib/go-stdlib/nethttp
@@ -881,6 +904,7 @@ github.com/sirupsen/logrus
 # github.com/soheilhy/cmux v0.1.5-0.20210205191134-5ec6847320e5
 github.com/soheilhy/cmux
 # github.com/sony/gobreaker v0.4.1
+## explicit
 github.com/sony/gobreaker
 # github.com/spf13/afero v1.2.2
 ## explicit
@@ -898,6 +922,7 @@ github.com/stretchr/testify/assert
 github.com/stretchr/testify/mock
 github.com/stretchr/testify/require
 # github.com/thanos-io/thanos v0.19.1-0.20210427154226-d5bd651319d2
+## explicit
 github.com/thanos-io/thanos/pkg/block
 github.com/thanos-io/thanos/pkg/block/indexheader
 github.com/thanos-io/thanos/pkg/block/metadata
@@ -970,7 +995,7 @@ github.com/uber/jaeger-lib/metrics/prometheus
 ## explicit
 # github.com/ugorji/go/codec v1.1.7
 github.com/ugorji/go/codec
-# github.com/weaveworks/common v0.0.0-20210419092856-009d1eebd624
+# github.com/weaveworks/common v0.0.0-20210506120931-f2676019da11
 ## explicit
 github.com/weaveworks/common/aws
 github.com/weaveworks/common/errors
@@ -983,12 +1008,18 @@ github.com/weaveworks/common/middleware
 github.com/weaveworks/common/mtime
 github.com/weaveworks/common/server
 github.com/weaveworks/common/signals
+github.com/weaveworks/common/test
 github.com/weaveworks/common/tracing
 github.com/weaveworks/common/user
 # github.com/weaveworks/promrus v1.2.0
 github.com/weaveworks/promrus
 # github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2
 github.com/xiang90/probing
+# github.com/yuin/gopher-lua v0.0.0-20200816102855-ee81675732da
+github.com/yuin/gopher-lua
+github.com/yuin/gopher-lua/ast
+github.com/yuin/gopher-lua/parse
+github.com/yuin/gopher-lua/pm
 # go.etcd.io/bbolt v1.3.5
 ## explicit
 go.etcd.io/bbolt
@@ -1179,6 +1210,7 @@ golang.org/x/oauth2/internal
 golang.org/x/oauth2/jws
 golang.org/x/oauth2/jwt
 # golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
+## explicit
 golang.org/x/sync/errgroup
 golang.org/x/sync/semaphore
 # golang.org/x/sys v0.0.0-20210503173754-0981d6026fa6
@@ -1200,6 +1232,7 @@ golang.org/x/text/unicode/bidi
 golang.org/x/text/unicode/norm
 golang.org/x/text/width
 # golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
+## explicit
 golang.org/x/time/rate
 # golang.org/x/tools v0.1.0
 golang.org/x/tools/cmd/goimports

From fdc0502bb716840c81fd6cddea6610483fca08a9 Mon Sep 17 00:00:00 2001
From: Cyril Tovena <cyril.tovena@gmail.com>
Date: Tue, 27 Jul 2021 16:15:37 +0200
Subject: [PATCH 660/660] lint

Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
---
 .golangci.yml                                             | 1 +
 clients/pkg/promtail/client/client.go                     | 2 +-
 clients/pkg/promtail/targets/journal/journaltarget.go     | 8 +++++---
 .../targets/journal/journaltargetmanager_linux.go         | 2 +-
 pkg/logql/log/labels.go                                   | 3 ++-
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/.golangci.yml b/.golangci.yml
index 404bdd878a9ff..c284a2b30041c 100644
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -73,3 +73,4 @@ issues:
   exclude:
     - Error return value of .*log\.Logger\)\.Log\x60 is not checked
     - Error return value of .*.Log.* is not checked
+    - Error return value of `` is not checked
diff --git a/clients/pkg/promtail/client/client.go b/clients/pkg/promtail/client/client.go
index 443cecb57176a..714a87465203d 100644
--- a/clients/pkg/promtail/client/client.go
+++ b/clients/pkg/promtail/client/client.go
@@ -125,7 +125,7 @@ func newMetrics(reg prometheus.Registerer) *metrics {
 func mustRegisterOrGet(reg prometheus.Registerer, c prometheus.Collector) prometheus.Collector {
 	if err := reg.Register(c); err != nil {
 		if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
-			return are.ExistingCollector.(prometheus.Collector)
+			return are.ExistingCollector
 		}
 		panic(err)
 	}
diff --git a/clients/pkg/promtail/targets/journal/journaltarget.go b/clients/pkg/promtail/targets/journal/journaltarget.go
index 17e58e6252a44..8e1c5ae7d6c23 100644
--- a/clients/pkg/promtail/targets/journal/journaltarget.go
+++ b/clients/pkg/promtail/targets/journal/journaltarget.go
@@ -45,8 +45,10 @@ type journalReader interface {
 }
 
 // Abstracted functions for interacting with the journal, used for mocking in tests:
-type journalReaderFunc func(sdjournal.JournalReaderConfig) (journalReader, error)
-type journalEntryFunc func(cfg sdjournal.JournalReaderConfig, cursor string) (*sdjournal.JournalEntry, error)
+type (
+	journalReaderFunc func(sdjournal.JournalReaderConfig) (journalReader, error)
+	journalEntryFunc  func(cfg sdjournal.JournalReaderConfig, cursor string) (*sdjournal.JournalEntry, error)
+)
 
 // Default implementations of abstracted functions:
 var defaultJournalReaderFunc = func(c sdjournal.JournalReaderConfig) (journalReader, error) {
@@ -84,7 +86,7 @@ var defaultJournalEntryFunc = func(c sdjournal.JournalReaderConfig, cursor strin
 }
 
 // JournalTarget tails systemd journal entries.
-// nolint(golint)
+// nolint
 type JournalTarget struct {
 	logger        log.Logger
 	handler       api.EntryHandler
diff --git a/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go b/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go
index 96883fbc97e71..33f989a74af54 100644
--- a/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go
+++ b/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go
@@ -15,7 +15,7 @@ import (
 )
 
 // JournalTargetManager manages a series of JournalTargets.
-// nolint(golint)
+// nolint
 type JournalTargetManager struct {
 	logger  log.Logger
 	targets map[string]*JournalTarget
diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go
index a51025c0c43dc..5ef3669b85e3b 100644
--- a/pkg/logql/log/labels.go
+++ b/pkg/logql/log/labels.go
@@ -66,7 +66,8 @@ func (h *hasher) Hash(lbs labels.Labels) uint64 {
 type BaseLabelsBuilder struct {
 	del []string
 	add []labels.Label
-	// nolint(structcheck) https://github.com/golangci/golangci-lint/issues/826
+	// nolint:structcheck
+	// https://github.com/golangci/golangci-lint/issues/826
 	err string
 
 	groups            []string