Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbnode] Cache the results of the commitlog bootstapper between runs #2635

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions src/dbnode/storage/bootstrap/bootstrapper/commitlog/source.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ type commitLogSource struct {
newReaderFn newReaderFn

metrics commitLogSourceMetrics

// Cache the results so the commit log is not read twice. This source is special since it reads the entire
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: the commitlog bootstrapper vs this source

// time series range, irrespective of the requested time range. This is required since the commit log may hold
// cold writes that have not been committed to a cold block. Another source might report it fulfilled a cold block,
// but it could be missing writes only available in the commit log.
//
// The bootstrapper is single threaded so we don't need a mutex for this.
bootstrapResults bootstrap.NamespaceResults
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the lifecycle of this? Any chance a downstream consumer will release the results in this map between te two reads?

}

type bootstrapNamespace struct {
Expand Down Expand Up @@ -175,6 +183,12 @@ func (s *commitLogSource) Read(
ctx, span, _ := ctx.StartSampledTraceSpan(tracepoint.BootstrapperCommitLogSourceRead)
defer span.Finish()

// bail early if this source already ran before.
if s.bootstrapResults.Results != nil {
s.log.Info("the entire range of the commit has already been read. returning previous results")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe make this a debug log since this will get logged every time?

return s.bootstrapResults, nil
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do this before the span is started? Currently we'll have a lot of 0 length spans if the cache is hit


var (
// Emit bootstrapping gauge for duration of ReadData.
doneReadingData = s.metrics.emitBootstrapping()
Expand Down Expand Up @@ -521,9 +535,8 @@ func (s *commitLogSource) Read(
return bootstrap.NamespaceResults{}, err
}

bootstrapResult := bootstrap.NamespaceResults{
Results: bootstrap.NewNamespaceResultsMap(bootstrap.NamespaceResultsMapOptions{}),
}
s.bootstrapResults.Results = bootstrap.NewNamespaceResultsMap(bootstrap.NamespaceResultsMapOptions{})

for _, ns := range namespaceResults {
id := ns.namespace.Metadata.ID()
dataResult := result.NewDataBootstrapResult()
Expand All @@ -539,15 +552,15 @@ func (s *commitLogSource) Read(
indexResult = shardTimeRanges.ToUnfulfilledIndexResult()
}
}
bootstrapResult.Results.Set(id, bootstrap.NamespaceResult{
s.bootstrapResults.Results.Set(id, bootstrap.NamespaceResult{
Metadata: ns.namespace.Metadata,
Shards: ns.namespace.Shards,
DataResult: dataResult,
IndexResult: indexResult,
})
}

return bootstrapResult, nil
return s.bootstrapResults, nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to make sure these bootstrap results won't get loaded twice?

}

func (s *commitLogSource) snapshotFilesByShard(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,66 @@ func TestReadEmpty(t *testing.T) {
tester.EnsureNoWrites()
}

func TestCachedResults(t *testing.T) {
opts := testDefaultOpts
md := testNsMetadata(t)

nsCtx := namespace.NewContextFrom(md)

src := newCommitLogSource(opts, fs.Inspection{}).(*commitLogSource)

blockSize := md.Options().RetentionOptions().BlockSize()
now := time.Now()
start := now.Truncate(blockSize).Add(-blockSize)
end := now.Truncate(blockSize)

ranges := xtime.NewRanges(xtime.Range{Start: start, End: end})

foo := ts.Series{Namespace: nsCtx.ID, Shard: 0, ID: ident.StringID("foo")}
bar := ts.Series{Namespace: nsCtx.ID, Shard: 1, ID: ident.StringID("bar")}
baz := ts.Series{Namespace: nsCtx.ID, Shard: 2, ID: ident.StringID("baz")}

values := testValues{
{foo, start, 1.0, xtime.Second, nil},
{foo, start.Add(1 * time.Minute), 2.0, xtime.Second, nil},
{bar, start.Add(2 * time.Minute), 1.0, xtime.Second, nil},
{bar, start.Add(3 * time.Minute), 2.0, xtime.Second, nil},
// "baz" is in shard 2 and should not be returned
{baz, start.Add(4 * time.Minute), 1.0, xtime.Second, nil},
}

iterated := false
src.newIteratorFn = func(
_ commitlog.IteratorOpts,
) (commitlog.Iterator, []commitlog.ErrorWithPath, error) {
if iterated == true {
return nil, nil, errors.New("expected results to be cached")
}
iterated = true
return newTestCommitLogIterator(values, nil), nil, nil
}

targetRanges := result.NewShardTimeRanges().Set(0, ranges).Set(1, ranges)
tester := bootstrap.BuildNamespacesTester(t, testDefaultRunOpts, targetRanges, md)
defer tester.Finish()

tester.TestReadWith(src)
tester.TestUnfulfilledForNamespaceIsEmpty(md)

read := tester.EnsureDumpWritesForNamespace(md)
require.Equal(t, 2, len(read))
enforceValuesAreCorrect(t, values[:4], read)
tester.EnsureNoLoadedBlocks()

tester.TestReadWith(src)
tester.TestUnfulfilledForNamespaceIsEmpty(md)

read = tester.EnsureDumpWritesForNamespace(md)
require.Equal(t, 2, len(read))
enforceValuesAreCorrect(t, values[:4], read)
tester.EnsureNoLoadedBlocks()
}

func TestReadErrorOnNewIteratorError(t *testing.T) {
opts := testDefaultOpts
src := newCommitLogSource(opts, fs.Inspection{}).(*commitLogSource)
Expand Down