m3db · justinjc · Oct 22, 2018 · Oct 17, 2018 · Oct 17, 2018 · Oct 17, 2018
diff --git a/src/dbnode/storage/block/block_mock.go b/src/dbnode/storage/block/block_mock.go
diff --git a/src/dbnode/storage/block/types.go b/src/dbnode/storage/block/types.go
@@ -168,10 +168,6 @@ type DatabaseBlock interface {
 	// merged during Stream().
 	HasMergeTarget() bool
 
-	// IsRetrieved returns whether the block is already retrieved. Only
-	// meaningful in the context of the CacheAllMetadata series caching policy.
-	IsRetrieved() bool
-
 	// WasRetrievedFromDisk returns whether the block was retrieved from storage.
 	WasRetrievedFromDisk() bool
 

diff --git a/src/dbnode/storage/bootstrap/bootstrapper/fs/source.go b/src/dbnode/storage/bootstrap/bootstrapper/fs/source.go
@@ -506,15 +506,6 @@ func (s *fileSystemSource) loadShardReadersDataIntoShardResult(
 			if shardRetrieverMgr != nil {
 				shardRetriever = shardRetrieverMgr.ShardRetriever(shard)
 			}
-			if seriesCachePolicy == series.CacheAllMetadata && shardRetriever == nil {
-				s.log.WithFields(
-					xlog.NewField("has-shard-retriever-mgr", shardRetrieverMgr != nil),
-					xlog.NewField("has-shard-retriever", shardRetriever != nil),
-				).Errorf("shard retriever missing for shard: %d", shard)
-				s.markRunResultErrorsAndUnfulfilled(runResult, requestedRanges,
-					remainingRanges, timesWithErrors)
-				return
-			}
 		}
 
 		for _, r := range readers {
@@ -558,8 +549,6 @@ func (s *fileSystemSource) loadShardReadersDataIntoShardResult(
 					switch seriesCachePolicy {
 					case series.CacheAll:
 						validateErr = r.Validate()
-					case series.CacheAllMetadata:
-						validateErr = r.ValidateMetadata()
 					default:
 						err = fmt.Errorf("invalid series cache policy: %s", seriesCachePolicy.String())
 					}
@@ -639,15 +628,11 @@ func (s *fileSystemSource) readNextEntryAndRecordBlock(
 		id          ident.ID
 		tagsIter    ident.TagIterator
 		data        checked.Bytes
-		length      int
-		checksum    uint32
 		err         error
 	)
 	switch seriesCachePolicy {
 	case series.CacheAll:
-		id, tagsIter, data, checksum, err = r.Read()
-	case series.CacheAllMetadata:
-		id, tagsIter, length, checksum, err = r.ReadMetadata()
+		id, tagsIter, data, _, err = r.Read()
 	default:
 		err = fmt.Errorf("invalid series cache policy: %s", seriesCachePolicy.String())
 	}
@@ -683,13 +668,6 @@ func (s *fileSystemSource) readNextEntryAndRecordBlock(
 	case series.CacheAll:
 		seg := ts.NewSegment(data, nil, ts.FinalizeHead)
 		seriesBlock.Reset(blockStart, blockSize, seg)
-	case series.CacheAllMetadata:
-		metadata := block.RetrievableBlockMetadata{
-			ID:       id,
-			Length:   length,
-			Checksum: checksum,
-		}
-		seriesBlock.ResetRetrievable(blockStart, blockSize, shardRetriever, metadata)
 	default:
 		return fmt.Errorf("invalid series cache policy: %s", seriesCachePolicy.String())
 	}
@@ -914,7 +892,6 @@ func (s *fileSystemSource) read(
 	runOpts bootstrap.RunOptions,
 ) (*runResult, error) {
 	var (
-		nsID              = md.ID()
 		seriesCachePolicy = s.opts.ResultOptions().SeriesCachePolicy()
 		blockRetriever    block.DatabaseBlockRetriever
 		res               *runResult
@@ -958,13 +935,6 @@ func (s *fileSystemSource) read(
 		switch seriesCachePolicy {
 		case series.CacheAll:
 			// No checks necessary
-		case series.CacheAllMetadata:
-			// Need to check block retriever available
-			if blockRetriever == nil {
-				return nil, fmt.Errorf(
-					"missing block retriever when using series cache metadata for namespace: %s",
-					nsID.String())
-			}
 		default:
 			// Unless we're caching all series (or all series metadata) in memory, we
 			// return just the availability of the files we have

diff --git a/src/dbnode/storage/bootstrap/bootstrapper/peers/source.go b/src/dbnode/storage/bootstrap/bootstrapper/peers/source.go
@@ -343,11 +343,8 @@ func (s *peersSource) logFetchBootstrapBlocksFromPeersOutcome(
 // the series will either be held in memory, or removed from memory once
 // flushing has completed.
 // Once everything has been flushed to disk then depending on the series
-// caching policy the function is either done, or in the case of the
-// CacheAllMetadata policy we loop through every series and make every block
-// retrievable (so that we can retrieve data for the blocks that we're caching
-// the metadata for).
-// In addition, if the caching policy is not CacheAll or CacheAllMetadata, then
+// caching policy the function is either done.
+// In addition, if the caching policy is not CacheAll, then
 // at the end we remove all the series objects from the shard result as well
 // (since all their corresponding blocks have been removed anyways) to prevent
 // a huge memory spike caused by adding lots of unused series to the Shard
@@ -364,14 +361,10 @@ func (s *peersSource) flush(
 	var (
 		ropts             = nsMetadata.Options().RetentionOptions()
 		blockSize         = ropts.BlockSize()
-		shardRetriever    = shardRetrieverMgr.ShardRetriever(shard)
 		tmpCtx            = context.NewContext()
 		seriesCachePolicy = s.opts.ResultOptions().SeriesCachePolicy()
 		persistConfig     = opts.PersistConfig()
 	)
-	if seriesCachePolicy == series.CacheAllMetadata && shardRetriever == nil {
-		return fmt.Errorf("shard retriever missing for shard: %d", shard)
-	}
 
 	for start := tr.Start; start.Before(tr.End); start = start.Add(blockSize) {
 		prepareOpts := persist.DataPrepareOptions{
@@ -440,18 +433,6 @@ func (s *peersSource) flush(
 				case series.CacheAll:
 					// Leave the blocks in the shard result, we need to return all blocks
 					// so we can cache in memory
-				case series.CacheAllMetadata:
-					// NB(r): We can now make the flushed blocks retrievable, note that we
-					// explicitly perform another loop here and lookup the block again
-					// to avoid a large expensive allocation to hold onto the blocks
-					// that we just flushed that would have to be pooled.
-					// We are explicitly trading CPU time here for lower GC pressure.
-					metadata := block.RetrievableBlockMetadata{
-						ID:       s.ID,
-						Length:   bl.Len(),
-						Checksum: checksum,
-					}
-					bl.ResetRetrievable(start, blockSize, shardRetriever, metadata)
 				default:
 					// Not caching the series or metadata in memory so finalize the block,
 					// better to do this as we loop through to make blocks return to the
@@ -486,15 +467,13 @@ func (s *peersSource) flush(
 		}
 	}
 
-	// We only want to retain the series metadata in one of three cases:
+	// We only want to retain the series metadata in one of two cases:
 	// 	1) CacheAll caching policy (because we're expected to cache everything in memory)
-	// 	2) CacheAllMetadata caching policy (because we're expected to cache all metadata in memory)
-	// 	3) PersistConfig.FileSetType is set to FileSetSnapshotType because that means we're bootstrapping
+	// 	2) PersistConfig.FileSetType is set to FileSetSnapshotType because that means we're bootstrapping
 	//     an active block that we'll want to perform a flush on later, and we're only flushing here for
 	//     the sake of allowing the commit log bootstrapper to be able to recover this data if the node
 	//     goes down in-between this bootstrapper completing and the subsequent flush.
 	shouldRetainSeriesMetadata := seriesCachePolicy == series.CacheAll ||
-		seriesCachePolicy == series.CacheAllMetadata ||
 		persistConfig.FileSetType == persist.FileSetSnapshotType
 
 	if !shouldRetainSeriesMetadata {

diff --git a/src/dbnode/storage/bootstrap/bootstrapper/peers/source_data_test.go b/src/dbnode/storage/bootstrap/bootstrapper/peers/source_data_test.go
@@ -232,7 +232,6 @@ func TestPeersSourceReturnsFulfilledAndUnfulfilled(t *testing.T) {
 
 func TestPeersSourceRunWithPersist(t *testing.T) {
 	for _, cachePolicy := range []series.CachePolicy{
-		series.CacheAllMetadata,
 		series.CacheRecentlyRead,
 	} {
 		ctrl := gomock.NewController(t)
@@ -406,36 +405,9 @@ func TestPeersSourceRunWithPersist(t *testing.T) {
 		require.True(t, r.Unfulfilled()[0].IsEmpty())
 		require.True(t, r.Unfulfilled()[1].IsEmpty())
 
-		if cachePolicy == series.CacheAllMetadata {
-			assert.Equal(t, 2, len(r.ShardResults()))
-			require.NotNil(t, r.ShardResults()[0])
-			require.NotNil(t, r.ShardResults()[1])
-
-			block, ok := r.ShardResults()[0].BlockAt(ident.StringID("foo"), start)
-			require.True(t, ok)
-			fooBlockChecksum, err := fooBlock.Checksum()
-			require.NoError(t, err)
-			assertBlockChecksum(t, fooBlockChecksum, block)
-			assert.False(t, block.IsRetrieved())
-
-			block, ok = r.ShardResults()[0].BlockAt(ident.StringID("bar"), start.Add(ropts.BlockSize()))
-			require.True(t, ok)
-			barBlockChecksum, err := barBlock.Checksum()
-			require.NoError(t, err)
-			assertBlockChecksum(t, barBlockChecksum, block)
-			assert.False(t, block.IsRetrieved())
-
-			block, ok = r.ShardResults()[1].BlockAt(ident.StringID("baz"), start)
-			require.True(t, ok)
-			bazBlockChecksum, err := bazBlock.Checksum()
-			require.NoError(t, err)
-			assertBlockChecksum(t, bazBlockChecksum, block)
-			assert.False(t, block.IsRetrieved())
-		} else {
-			assert.Equal(t, 0, len(r.ShardResults()))
-			require.Nil(t, r.ShardResults()[0])
-			require.Nil(t, r.ShardResults()[1])
-		}
+		assert.Equal(t, 0, len(r.ShardResults()))
+		require.Nil(t, r.ShardResults()[0])
+		require.Nil(t, r.ShardResults()[1])
 
 		assert.Equal(t, map[string]int{
 			"foo": 1, "bar": 1, "baz": 1,

diff --git a/src/dbnode/storage/series/buffer.go b/src/dbnode/storage/series/buffer.go
@@ -769,21 +769,6 @@ func (b *dbBufferBucket) merge() (mergeResult, error) {
 	encoder := bopts.EncoderPool().Get()
 	encoder.Reset(b.start, bopts.DatabaseBlockAllocSize())
 
-	// If we have to merge bootstrapped from disk during a merge then this
-	// can make ticking very slow, ensure to notify this bug
-	if len(b.bootstrapped) > 0 {
-		unretrieved := 0
-		for i := range b.bootstrapped {
-			if !b.bootstrapped[i].IsRetrieved() {
-				unretrieved++
-			}
-		}
-		if unretrieved > 0 {
-			log := b.opts.InstrumentOptions().Logger()
-			log.Warnf("buffer merging %d unretrieved blocks", unretrieved)
-		}
-	}
-
 	var (
 		start   = b.start
 		readers = make([]xio.SegmentReader, 0, len(b.encoders)+len(b.bootstrapped))

diff --git a/src/dbnode/storage/series/policy.go b/src/dbnode/storage/series/policy.go
@@ -39,11 +39,6 @@ const (
 	// which requires loading all into cache on bootstrap and never
 	// expiring series from memory until expired from retention.
 	CacheAll
-	// CacheAllMetadata specifies that all series metadata but not the
-	// data itself must be cached at all times and the metadata is never
-	// expired from memory until expired from retention.
-	// TODO: Remove this once recently read is production grade.
-	CacheAllMetadata
 	// CacheRecentlyRead specifies that series that are recently read
 	// must be cached, configurable by the namespace block expiry after
 	// not accessed period.
@@ -59,7 +54,7 @@ const (
 
 // ValidCachePolicies returns the valid series cache policies.
 func ValidCachePolicies() []CachePolicy {
-	return []CachePolicy{CacheNone, CacheAll, CacheAllMetadata, CacheRecentlyRead, CacheLRU}
+	return []CachePolicy{CacheNone, CacheAll, CacheRecentlyRead, CacheLRU}
 }
 
 func (p CachePolicy) String() string {
@@ -68,8 +63,6 @@ func (p CachePolicy) String() string {
 		return "none"
 	case CacheAll:
 		return "all"
-	case CacheAllMetadata:
-		return "all_metadata"
 	case CacheRecentlyRead:
 		return "recently_read"
 	case CacheLRU:

diff --git a/src/dbnode/storage/series/reader.go b/src/dbnode/storage/series/reader.go
@@ -140,8 +140,6 @@ func (r Reader) readersWithBlocksMapAndBuffer(
 		switch {
 		case cachePolicy == CacheAll:
 			// No-op, block metadata should have been in-memory
-		case cachePolicy == CacheAllMetadata:
-			// No-op, block metadata should have been in-memory
 		case r.retriever != nil:
 			// Try to stream from disk
 			if r.retriever.IsBlockRetrievable(blockAt) {
@@ -213,8 +211,6 @@ func (r Reader) fetchBlocksWithBlocksMapAndBuffer(
 		switch {
 		case cachePolicy == CacheAll:
 			// No-op, block metadata should have been in-memory
-		case cachePolicy == CacheAllMetadata:
-			// No-op, block metadata should have been in-memory
 		case r.retriever != nil:
 			// Try to stream from disk
 			if r.retriever.IsBlockRetrievable(start) {

diff --git a/src/dbnode/storage/series/series.go b/src/dbnode/storage/series/series.go
@@ -202,12 +202,6 @@ func (s *dbSeries) updateBlocksWithLock() (updateBlocksResult, error) {
 			continue
 		}
 
-		if cachePolicy == CacheAllMetadata && !currBlock.IsRetrieved() {
-			// Already unwired
-			result.UnwiredBlocks++
-			continue
-		}
-
 		// Potentially unwire
 		var unwired, shouldUnwire bool
 		// IsBlockRetrievable makes sure that the block has been flushed. This
@@ -217,9 +211,6 @@ func (s *dbSeries) updateBlocksWithLock() (updateBlocksResult, error) {
 			switch cachePolicy {
 			case CacheNone:
 				shouldUnwire = true
-			case CacheAllMetadata:
-				// Apply RecentlyRead logic (CacheAllMetadata is being removed soon)
-				fallthrough
 			case CacheRecentlyRead:
 				sinceLastRead := now.Sub(currBlock.LastReadTime())
 				shouldUnwire = sinceLastRead >= wiredTimeout
@@ -235,29 +226,9 @@ func (s *dbSeries) updateBlocksWithLock() (updateBlocksResult, error) {
 		}
 
 		if shouldUnwire {
-			switch cachePolicy {
-			case CacheAllMetadata:
-				// Keep the metadata but remove contents
-
-				// NB(r): Each block needs shared ref to the series ID
-				// or else each block needs to have a copy of the ID
-				id := s.id
-				checksum, err := currBlock.Checksum()
-				if err != nil {
-					return result, err
-				}
-				metadata := block.RetrievableBlockMetadata{
-					ID:       id,
-					Length:   currBlock.Len(),
-					Checksum: checksum,
-				}
-				currBlock.ResetRetrievable(start, currBlock.BlockSize(), retriever, metadata)
-			default:
-				// Remove the block and it will be looked up later
-				s.blocks.RemoveBlockAt(start)
-				currBlock.Close()
-			}
-
+			// Remove the block and it will be looked up later
+			s.blocks.RemoveBlockAt(start)
+			currBlock.Close()
 			unwired = true
 			result.madeUnwiredBlocks++
 		}