From 7d99d76569b7b5b634cf2af2673645bca6c6e37d Mon Sep 17 00:00:00 2001 From: Nate Broyles Date: Mon, 4 Jan 2021 15:45:03 -0500 Subject: [PATCH 1/3] Add support for reading encoded metadata from an index Reading raw document metadata from an index can end up being rather expensive for metadata with a lot of tags. This commit introduces the concept of encoded metadata, which wrap the relevant section of bytes and provides an efficient reader to retrieve the non-encoded metadata without ballooning memory usage. Additionally, introduce a concept of a document that wraps either raw metadata or encoded metadata, which can be used regardless of whether an index segment is backed by metadata (from memory) or encoded metadata (read from disk) --- src/m3ninx/doc/document.go | 44 +++++++ .../index/segment/fst/encoding/docs/data.go | 110 +++++++++++++++++ .../segment/fst/encoding/docs/data_test.go | 114 +++++++++++------- .../index/segment/fst/encoding/encoding.go | 51 +++++--- 4 files changed, 264 insertions(+), 55 deletions(-) diff --git a/src/m3ninx/doc/document.go b/src/m3ninx/doc/document.go index bb92d6525d..7dc3f04a36 100644 --- a/src/m3ninx/doc/document.go +++ b/src/m3ninx/doc/document.go @@ -212,3 +212,47 @@ func (ds Documents) Less(i, j int) bool { func (ds Documents) Swap(i, j int) { ds[i], ds[j] = ds[j], ds[i] } + +// Encoded is a serialized document metadata. +type Encoded struct { + Bytes []byte +} + +// Document contains either metadata or an encoded metadata +// but never both. +type Document struct { + metadata Metadata + encoded Encoded + + hasMetadata bool +} + +// NewDocumentFromMetadata creates a Document from a Metadata. +func NewDocumentFromMetadata(m Metadata) Document { + return Document{metadata: m, hasMetadata: true} +} + +// NewDocumentFromEncoded creates a Document from an Encoded. +func NewDocumentFromEncoded(e Encoded) Document { + return Document{encoded: e} +} + +// Metadata returns the metadata it contains, if it has one. Otherwise returns an empty metadata +// and false. +func (d *Document) Metadata() (Metadata, bool) { + if d.hasMetadata { + return d.metadata, true + } + + return Metadata{}, false +} + +// Encoded returns the encoded metadata it contains, if it has one. Otherwise returns an +// empty encoded metadata and false. +func (d *Document) Encoded() (Encoded, bool) { + if !d.hasMetadata { + return d.encoded, true + } + + return Encoded{}, false +} diff --git a/src/m3ninx/index/segment/fst/encoding/docs/data.go b/src/m3ninx/index/segment/fst/encoding/docs/data.go index b702cfacfb..00092152b3 100644 --- a/src/m3ninx/index/segment/fst/encoding/docs/data.go +++ b/src/m3ninx/index/segment/fst/encoding/docs/data.go @@ -21,6 +21,7 @@ package docs import ( + "errors" "fmt" "io" @@ -128,3 +129,112 @@ func (r *DataReader) Read(offset uint64) (doc.Metadata, error) { return d, nil } + +// EncodedDataReader is a reader for the data file for encoded document metadata. +type EncodedDataReader struct { + data []byte +} + +// NewEncodedDataReader returns a new EncodedDataReader. +func NewEncodedDataReader(data []byte) *EncodedDataReader { + return &EncodedDataReader{ + data: data, + } +} + +// Read reads a doc.Encoded from a data stream starting at the specified offset. +func (e *EncodedDataReader) Read(offset uint64) (doc.Encoded, error) { + if offset >= uint64(len(e.data)) { + return doc.Encoded{}, fmt.Errorf( + "invalid offset: %v is past the end of the data file", offset, + ) + } + + return doc.Encoded{ + Bytes: e.data[int(offset):], + }, nil +} + +// EncodedDocumentReader is a reader for reading documents from encoded metadata. +type EncodedDocumentReader struct { + currFields []doc.Field +} + +// NewEncodedDocumentReader returns a new EncodedDocumentReader. +func NewEncodedDocumentReader() *EncodedDocumentReader { + return &EncodedDocumentReader{} +} + +// Read reads a doc.Metadata from a doc.Encoded. Returned doc.Metadata should be +// processed before calling Read again as the underlying array pointed to by the Fields +// slice will be updated. This approach avoids allocating a new slice with a new backing +// array for every document processed, unlike (*DataReader).Read +func (r *EncodedDocumentReader) Read(encoded doc.Encoded) (doc.Metadata, error) { + for i := range r.currFields { + r.currFields[i] = doc.Field{} + } + r.currFields = r.currFields[:0] + id, buf, err := encoding.ReadBytes(encoded.Bytes) + if err != nil { + return doc.Metadata{}, err + } + + x, buf, err := encoding.ReadUvarint(buf) + if err != nil { + return doc.Metadata{}, err + } + n := int(x) + + var name, val []byte + for i := 0; i < n; i++ { + name, buf, err = encoding.ReadBytes(buf) + if err != nil { + return doc.Metadata{}, err + } + val, buf, err = encoding.ReadBytes(buf) + if err != nil { + return doc.Metadata{}, err + } + r.currFields = append(r.currFields, doc.Field{ + Name: name, + Value: val, + }) + } + + return doc.Metadata{ + ID: id, + Fields: r.currFields, + }, nil +} + +// ReadEncodedDocumentID reads the document ID from the encoded document metadata. +func ReadEncodedDocumentID(encoded doc.Encoded) ([]byte, error) { + id, _, err := encoding.ReadBytes(encoded.Bytes) + return id, err +} + +// GetFromDocument retrieves a doc.Metadata from a doc.Document. +func GetFromDocument(document doc.Document, reader *EncodedDocumentReader) (doc.Metadata, error) { + if d, ok := document.Metadata(); ok { + return d, nil + } + + if e, ok := document.Encoded(); ok { + return reader.Read(e) + } + + return doc.Metadata{}, errors.New("document does not contain metadata or encoded metadata") +} + +// ReadIDFromDocument reads the document ID from the document. +func ReadIDFromDocument(document doc.Document) ([]byte, error) { + if d, ok := document.Metadata(); ok { + return d.ID, nil + } + + if e, ok := document.Encoded(); ok { + return ReadEncodedDocumentID(e) + } + + return nil, errors.New("document does not contain metadata or encoded metadata") +} diff --git a/src/m3ninx/index/segment/fst/encoding/docs/data_test.go b/src/m3ninx/index/segment/fst/encoding/docs/data_test.go index 1257b406c6..792c107680 100644 --- a/src/m3ninx/index/segment/fst/encoding/docs/data_test.go +++ b/src/m3ninx/index/segment/fst/encoding/docs/data_test.go @@ -30,56 +30,56 @@ import ( "github.com/stretchr/testify/require" ) -func TestStoredFieldsData(t *testing.T) { - tests := []struct { - name string - docs []doc.Metadata - }{ - { - name: "empty document", - docs: []doc.Metadata{ - { - Fields: doc.Fields{}, - }, +var tests = []struct { + name string + docs []doc.Metadata +}{ + { + name: "empty document", + docs: []doc.Metadata{ + { + Fields: doc.Fields{}, }, }, - { - name: "standard documents", - docs: []doc.Metadata{ - { - ID: []byte("831992"), - Fields: []doc.Field{ - { - Name: []byte("fruit"), - Value: []byte("apple"), - }, - { - Name: []byte("color"), - Value: []byte("red"), - }, + }, + { + name: "standard documents", + docs: []doc.Metadata{ + { + ID: []byte("831992"), + Fields: []doc.Field{ + { + Name: []byte("fruit"), + Value: []byte("apple"), + }, + { + Name: []byte("color"), + Value: []byte("red"), }, }, - { - ID: []byte("080392"), - Fields: []doc.Field{ - { - Name: []byte("fruit"), - Value: []byte("banana"), - }, - { - Name: []byte("color"), - Value: []byte("yellow"), - }, + }, + { + ID: []byte("080392"), + Fields: []doc.Field{ + { + Name: []byte("fruit"), + Value: []byte("banana"), + }, + { + Name: []byte("color"), + Value: []byte("yellow"), }, }, }, }, - { - name: "node exporter metrics", - docs: util.MustReadDocs("../../../../../util/testdata/node_exporter.json", 2000), - }, - } + }, + { + name: "node exporter metrics", + docs: util.MustReadDocs("../../../../../util/testdata/node_exporter.json", 2000), + }, +} +func TestStoredFieldsData(t *testing.T) { w := NewDataWriter(nil) for _, test := range tests { t.Run(test.name, func(t *testing.T) { @@ -106,3 +106,35 @@ func TestStoredFieldsData(t *testing.T) { }) } } + +func TestEncodedDataReader(t *testing.T) { + w := NewDataWriter(nil) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var ( + buf = new(bytes.Buffer) + offsets = make([]int, 0) + idx int + ) + w.Reset(buf) + + for i := range test.docs { + n, err := w.Write(test.docs[i]) + require.NoError(t, err) + offsets = append(offsets, idx) + idx += n + } + + dataReader := NewEncodedDataReader(buf.Bytes()) + docReader := NewEncodedDocumentReader() + for i := range test.docs { + encoded, err := dataReader.Read(uint64(offsets[i])) + require.NoError(t, err) + + actual, err := docReader.Read(encoded) + require.NoError(t, err) + require.True(t, actual.Equal(test.docs[i])) + } + }) + } +} diff --git a/src/m3ninx/index/segment/fst/encoding/encoding.go b/src/m3ninx/index/segment/fst/encoding/encoding.go index 09fbb23e4f..9c0f35a809 100644 --- a/src/m3ninx/index/segment/fst/encoding/encoding.go +++ b/src/m3ninx/index/segment/fst/encoding/encoding.go @@ -117,34 +117,57 @@ func (d *Decoder) Uint64() (uint64, error) { // Uvarint reads a variable-sized unsigned integer. func (d *Decoder) Uvarint() (uint64, error) { - x, n := binary.Uvarint(d.buf) - if n == 0 { - return 0, io.ErrShortBuffer - } - if n < 0 { - return 0, errUvarintOverflow + x, buf, err := ReadUvarint(d.buf) + if err != nil { + return 0, err } - d.buf = d.buf[n:] + d.buf = buf return x, nil } // Bytes reads a byte slice from the decoder. func (d *Decoder) Bytes() ([]byte, error) { - x, err := d.Uvarint() + b, buf, err := ReadBytes(d.buf) if err != nil { return nil, err } + d.buf = buf + return b, nil +} + +// ReadUvarint reads a variable-size unsigned integer from a byte slice +// and returns a new slice positioned after the integer that was just read. +func ReadUvarint(buf []byte) (uint64, []byte, error) { + x, n := binary.Uvarint(buf) + if n == 0 { + return 0, nil, io.ErrShortBuffer + } + if n < 0 { + return 0, nil, errUvarintOverflow + } + buf = buf[n:] + return x, buf, nil +} + +// ReadBytes reads an unsigned integer from a byte slice and +// returns that amount of bytes along with a new slice positioned after +// the last byte just read. +func ReadBytes(buf []byte) ([]byte, []byte, error) { + x, buf, err := ReadUvarint(buf) + if err != nil { + return nil, nil, err + } // Verify the length of the slice won't overflow an int. if x > uint64(maxInt) { - return nil, errIntOverflow + return nil, nil, errIntOverflow } n := int(x) - if len(d.buf) < n { - return nil, io.ErrShortBuffer + if len(buf) < n { + return nil, nil, io.ErrShortBuffer } - b := d.buf[:n] - d.buf = d.buf[n:] - return b, nil + b := buf[:n] + buf = buf[n:] + return b, buf, nil } From 3a7e18439479ffb79a1e7fb00fd28de91c1f9807 Mon Sep 17 00:00:00 2001 From: Nate Broyles Date: Mon, 4 Jan 2021 18:00:25 -0500 Subject: [PATCH 2/3] Rename doc.Iterator to doc.MetadataIterator --- src/dbnode/storage/index/block_test.go | 48 +++++------ .../storage/index/read_through_segment.go | 2 +- src/m3ninx/doc/doc_mock.go | 85 ++++++++++++++++++- src/m3ninx/doc/types.go | 10 +-- src/m3ninx/generated/mocks/generate.go | 2 +- src/m3ninx/idx/types.go | 2 +- src/m3ninx/index/index_mock.go | 4 +- .../{iterator.go => metadata_iterator.go} | 4 +- ...ator_test.go => metadata_iterator_test.go} | 12 +-- src/m3ninx/index/segment/fst/segment.go | 4 +- .../index/segment/fst/writer_reader_test.go | 4 +- src/m3ninx/index/segment/mem/reader.go | 2 +- src/m3ninx/index/segment/segment_mock.go | 4 +- src/m3ninx/index/types.go | 4 +- src/m3ninx/search/executor/executor.go | 4 +- src/m3ninx/search/executor/executor_test.go | 2 +- src/m3ninx/search/executor/iterator.go | 6 +- src/m3ninx/search/executor/iterator_test.go | 8 +- src/m3ninx/search/proptest/segment_gen.go | 2 +- src/m3ninx/search/proptest/util.go | 2 +- src/m3ninx/search/search_mock.go | 4 +- src/m3ninx/search/types.go | 2 +- 22 files changed, 147 insertions(+), 70 deletions(-) rename src/m3ninx/index/{iterator.go => metadata_iterator.go} (97%) rename src/m3ninx/index/{iterator_test.go => metadata_iterator_test.go} (98%) diff --git a/src/dbnode/storage/index/block_test.go b/src/dbnode/storage/index/block_test.go index 67f8710f9e..f84914962f 100644 --- a/src/dbnode/storage/index/block_test.go +++ b/src/dbnode/storage/index/block_test.go @@ -452,8 +452,8 @@ func TestBlockQueryAddResultsSegmentsError(t *testing.T) { b.mutableSegments.foregroundSegments = []*readableSeg{newReadableSeg(seg1, testOpts)} b.shardRangesSegmentsByVolumeType = map[idxpersist.IndexVolumeType][]blockShardRangesSegments{ - idxpersist.DefaultIndexVolumeType: []blockShardRangesSegments{ - blockShardRangesSegments{segments: []segment.Segment{seg2, seg3}}, + idxpersist.DefaultIndexVolumeType: { + {segments: []segment.Segment{seg2, seg3}}, }, } @@ -518,7 +518,7 @@ func TestBlockMockQueryExecutorExecIterErr(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -559,7 +559,7 @@ func TestBlockMockQueryExecutorExecLimit(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -610,7 +610,7 @@ func TestBlockMockQueryExecutorExecIterCloseErr(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(false), @@ -649,7 +649,7 @@ func TestBlockMockQuerySeriesLimitNonExhaustive(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -699,7 +699,7 @@ func TestBlockMockQuerySeriesLimitExhaustive(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -751,7 +751,7 @@ func TestBlockMockQueryDocsLimitNonExhaustive(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -801,7 +801,7 @@ func TestBlockMockQueryDocsLimitExhaustive(t *testing.T) { return exec, nil } - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -860,7 +860,7 @@ func TestBlockMockQueryMergeResultsMapLimit(t *testing.T) { _, _, err = results.AddDocuments([]doc.Metadata{testDoc1()}) require.NoError(t, err) - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -911,7 +911,7 @@ func TestBlockMockQueryMergeResultsDupeID(t *testing.T) { _, _, err = results.AddDocuments([]doc.Metadata{testDoc1()}) require.NoError(t, err) - dIter := doc.NewMockIterator(ctrl) + dIter := doc.NewMockMetadataIterator(ctrl) gomock.InOrder( exec.EXPECT().Execute(gomock.Any()).Return(dIter, nil), dIter.EXPECT().Next().Return(true), @@ -1900,8 +1900,8 @@ func TestBlockAggregate(t *testing.T) { require.True(t, exhaustive) assertAggregateResultsMapEquals(t, map[string][]string{ - "f1": []string{"t1", "t2", "t3"}, - "f2": []string{"t1"}, + "f1": {"t1", "t2", "t3"}, + "f2": {"t1"}, }, results) sp.Finish() @@ -1976,7 +1976,7 @@ func TestBlockAggregateNotExhaustive(t *testing.T) { require.False(t, exhaustive) assertAggregateResultsMapEquals(t, map[string][]string{ - "f1": []string{"t1"}, + "f1": {"t1"}, }, results) sp.Finish() @@ -2067,8 +2067,8 @@ func TestBlockE2EInsertAggregate(t *testing.T) { require.NoError(t, err) require.True(t, exhaustive) assertAggregateResultsMapEquals(t, map[string][]string{ - "bar": []string{"baz", "qux"}, - "some": []string{"more", "other"}, + "bar": {"baz", "qux"}, + "some": {"more", "other"}, }, results) results = NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{ @@ -2085,7 +2085,7 @@ func TestBlockE2EInsertAggregate(t *testing.T) { require.NoError(t, err) require.True(t, exhaustive) assertAggregateResultsMapEquals(t, map[string][]string{ - "bar": []string{"baz", "qux"}, + "bar": {"baz", "qux"}, }, results) results = NewAggregateResults(ident.StringID("ns"), AggregateResultsOptions{ @@ -2162,7 +2162,7 @@ func testDoc1() doc.Metadata { return doc.Metadata{ ID: []byte("foo"), Fields: []doc.Field{ - doc.Field{ + { Name: []byte("bar"), Value: []byte("baz"), }, @@ -2174,11 +2174,11 @@ func testDoc1DupeID() doc.Metadata { return doc.Metadata{ ID: []byte("foo"), Fields: []doc.Field{ - doc.Field{ + { Name: []byte("why"), Value: []byte("not"), }, - doc.Field{ + { Name: []byte("some"), Value: []byte("more"), }, @@ -2190,11 +2190,11 @@ func testDoc2() doc.Metadata { return doc.Metadata{ ID: []byte("something"), Fields: []doc.Field{ - doc.Field{ + { Name: []byte("bar"), Value: []byte("baz"), }, - doc.Field{ + { Name: []byte("some"), Value: []byte("more"), }, @@ -2206,11 +2206,11 @@ func testDoc3() doc.Metadata { return doc.Metadata{ ID: []byte("bar"), Fields: []doc.Field{ - doc.Field{ + { Name: []byte("bar"), Value: []byte("qux"), }, - doc.Field{ + { Name: []byte("some"), Value: []byte("other"), }, diff --git a/src/dbnode/storage/index/read_through_segment.go b/src/dbnode/storage/index/read_through_segment.go index 2de3fb368c..39d6c501ef 100644 --- a/src/dbnode/storage/index/read_through_segment.go +++ b/src/dbnode/storage/index/read_through_segment.go @@ -268,7 +268,7 @@ func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Metadata, error) { } // Docs is a pass through call, since there's no postings list to cache. -func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.Iterator, error) { +func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.MetadataIterator, error) { return s.reader.Docs(pl) } diff --git a/src/m3ninx/doc/doc_mock.go b/src/m3ninx/doc/doc_mock.go index 25e54975a1..d955067ed1 100644 --- a/src/m3ninx/doc/doc_mock.go +++ b/src/m3ninx/doc/doc_mock.go @@ -1,7 +1,7 @@ // Code generated by MockGen. DO NOT EDIT. // Source: github.com/m3db/m3/src/m3ninx/doc/types.go -// Copyright (c) 2018 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -30,6 +30,85 @@ import ( "github.com/golang/mock/gomock" ) +// MockMetadataIterator is a mock of MetadataIterator interface +type MockMetadataIterator struct { + ctrl *gomock.Controller + recorder *MockMetadataIteratorMockRecorder +} + +// MockMetadataIteratorMockRecorder is the mock recorder for MockMetadataIterator +type MockMetadataIteratorMockRecorder struct { + mock *MockMetadataIterator +} + +// NewMockMetadataIterator creates a new mock instance +func NewMockMetadataIterator(ctrl *gomock.Controller) *MockMetadataIterator { + mock := &MockMetadataIterator{ctrl: ctrl} + mock.recorder = &MockMetadataIteratorMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockMetadataIterator) EXPECT() *MockMetadataIteratorMockRecorder { + return m.recorder +} + +// Next mocks base method +func (m *MockMetadataIterator) Next() bool { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Next") + ret0, _ := ret[0].(bool) + return ret0 +} + +// Next indicates an expected call of Next +func (mr *MockMetadataIteratorMockRecorder) Next() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Next", reflect.TypeOf((*MockMetadataIterator)(nil).Next)) +} + +// Current mocks base method +func (m *MockMetadataIterator) Current() Metadata { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Current") + ret0, _ := ret[0].(Metadata) + return ret0 +} + +// Current indicates an expected call of Current +func (mr *MockMetadataIteratorMockRecorder) Current() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Current", reflect.TypeOf((*MockMetadataIterator)(nil).Current)) +} + +// Err mocks base method +func (m *MockMetadataIterator) Err() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Err") + ret0, _ := ret[0].(error) + return ret0 +} + +// Err indicates an expected call of Err +func (mr *MockMetadataIteratorMockRecorder) Err() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Err", reflect.TypeOf((*MockMetadataIterator)(nil).Err)) +} + +// Close mocks base method +func (m *MockMetadataIterator) Close() error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Close") + ret0, _ := ret[0].(error) + return ret0 +} + +// Close indicates an expected call of Close +func (mr *MockMetadataIteratorMockRecorder) Close() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Close", reflect.TypeOf((*MockMetadataIterator)(nil).Close)) +} + // MockIterator is a mock of Iterator interface type MockIterator struct { ctrl *gomock.Controller @@ -68,10 +147,10 @@ func (mr *MockIteratorMockRecorder) Next() *gomock.Call { } // Current mocks base method -func (m *MockIterator) Current() Metadata { +func (m *MockIterator) Current() Document { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Current") - ret0, _ := ret[0].(Metadata) + ret0, _ := ret[0].(Document) return ret0 } diff --git a/src/m3ninx/doc/types.go b/src/m3ninx/doc/types.go index 5593b07f24..451daf84d4 100644 --- a/src/m3ninx/doc/types.go +++ b/src/m3ninx/doc/types.go @@ -20,14 +20,14 @@ package doc -// Iterator provides an iterator over a collection of documents. It is NOT safe for multiple -// goroutines to invoke methods on an Iterator simultaneously. -type Iterator interface { - // Next returns a bool indicating if the iterator has any more documents +// MetadataIterator provides an iterator over a collection of document metadata. It is NOT +// safe for multiple goroutines to invoke methods on an MetadataIterator simultaneously. +type MetadataIterator interface { + // Next returns a bool indicating if the iterator has any more metadata // to return. Next() bool - // Current returns the current document. It is only safe to call Current immediately + // Current returns the current metadata. It is only safe to call Current immediately // after a call to Next confirms there are more elements remaining. The Metadata // returned from Current is only valid until the following call to Next(). Callers // should copy the Metadata if they need it live longer. diff --git a/src/m3ninx/generated/mocks/generate.go b/src/m3ninx/generated/mocks/generate.go index 33756ac9a4..418a43997c 100644 --- a/src/m3ninx/generated/mocks/generate.go +++ b/src/m3ninx/generated/mocks/generate.go @@ -30,4 +30,4 @@ package mocks // mockgen rules for generating mocks (reflection mode) //go:generate sh -c "mockgen -package=mem -destination=$GOPATH/src/github.com/m3db/m3/src/m3ninx/index/segment/mem/mem_mock.go github.com/m3db/m3/src/m3ninx/index/segment/mem ReadableSegment" //go:generate sh -c "mockgen -package=fst -destination=$GOPATH/src/github.com/m3db/m3/src/m3ninx/index/segment/fst/fst_mock.go github.com/m3db/m3/src/m3ninx/index/segment/fst Writer,Segment" -//go:generate sh -c "mockgen -package=index -destination=$GOPATH/src/github.com/m3db/m3/src/m3ninx/index/index_mock.go github.com/m3db/m3/src/m3ninx/index Reader,DocRetriever" +//go:generate sh -c "mockgen -package=index -destination=$GOPATH/src/github.com/m3db/m3/src/m3ninx/index/index_mock.go github.com/m3db/m3/src/m3ninx/index Reader,DocRetriever,MetadataRetriever" diff --git a/src/m3ninx/idx/types.go b/src/m3ninx/idx/types.go index 11fe2609ad..ceee1cbfe5 100644 --- a/src/m3ninx/idx/types.go +++ b/src/m3ninx/idx/types.go @@ -38,7 +38,7 @@ type Index interface { // Searcher provides search over a point-in-time view of an index. type Searcher interface { - Search(q Query) (doc.Iterator, error) + Search(q Query) (doc.MetadataIterator, error) Close() error } diff --git a/src/m3ninx/index/index_mock.go b/src/m3ninx/index/index_mock.go index 4888262daf..eaae0e8ee9 100644 --- a/src/m3ninx/index/index_mock.go +++ b/src/m3ninx/index/index_mock.go @@ -101,10 +101,10 @@ func (mr *MockReaderMockRecorder) Doc(arg0 interface{}) *gomock.Call { } // Docs mocks base method -func (m *MockReader) Docs(arg0 postings.List) (doc.Iterator, error) { +func (m *MockReader) Docs(arg0 postings.List) (doc.MetadataIterator, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Docs", arg0) - ret0, _ := ret[0].(doc.Iterator) + ret0, _ := ret[0].(doc.MetadataIterator) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/src/m3ninx/index/iterator.go b/src/m3ninx/index/metadata_iterator.go similarity index 97% rename from src/m3ninx/index/iterator.go rename to src/m3ninx/index/metadata_iterator.go index 464e430517..aa464e3cf2 100644 --- a/src/m3ninx/index/iterator.go +++ b/src/m3ninx/index/metadata_iterator.go @@ -27,9 +27,7 @@ import ( "github.com/m3db/m3/src/m3ninx/postings" ) -var ( - errIteratorClosed = errors.New("iterator has been closed") -) +var errIteratorClosed = errors.New("iterator has been closed") type idDocIterator struct { retriever DocRetriever diff --git a/src/m3ninx/index/iterator_test.go b/src/m3ninx/index/metadata_iterator_test.go similarity index 98% rename from src/m3ninx/index/iterator_test.go rename to src/m3ninx/index/metadata_iterator_test.go index d0da569c2f..0cfe820f39 100644 --- a/src/m3ninx/index/iterator_test.go +++ b/src/m3ninx/index/metadata_iterator_test.go @@ -23,11 +23,11 @@ package index import ( "testing" - "github.com/m3db/m3/src/m3ninx/doc" - "github.com/m3db/m3/src/m3ninx/postings" - "github.com/golang/mock/gomock" "github.com/stretchr/testify/require" + + "github.com/m3db/m3/src/m3ninx/doc" + "github.com/m3db/m3/src/m3ninx/postings" ) func TestIterator(t *testing.T) { @@ -42,7 +42,7 @@ func TestIterator(t *testing.T) { id: 42, doc: doc.Metadata{ Fields: []doc.Field{ - doc.Field{ + { Name: []byte("apple"), Value: []byte("red"), }, @@ -53,7 +53,7 @@ func TestIterator(t *testing.T) { id: 53, doc: doc.Metadata{ Fields: []doc.Field{ - doc.Field{ + { Name: []byte("banana"), Value: []byte("yellow"), }, @@ -64,7 +64,7 @@ func TestIterator(t *testing.T) { id: 81, doc: doc.Metadata{ Fields: []doc.Field{ - doc.Field{ + { Name: []byte("carrot"), Value: []byte("orange"), }, diff --git a/src/m3ninx/index/segment/fst/segment.go b/src/m3ninx/index/segment/fst/segment.go index 37091f6965..825753fe89 100644 --- a/src/m3ninx/index/segment/fst/segment.go +++ b/src/m3ninx/index/segment/fst/segment.go @@ -602,7 +602,7 @@ func (r *fsSegment) docNotClosedMaybeFinalizedWithRLock(id postings.ID) (doc.Met func (r *fsSegment) docsNotClosedMaybeFinalizedWithRLock( retriever index.DocRetriever, pl postings.List, -) (doc.Iterator, error) { +) (doc.MetadataIterator, error) { // NB(r): Not closed, but could be finalized (i.e. closed segment reader) // calling match field after this segment is finalized. if r.finalized { @@ -908,7 +908,7 @@ func (sr *fsSegmentReader) Doc(id postings.ID) (doc.Metadata, error) { return pl, err } -func (sr *fsSegmentReader) Docs(pl postings.List) (doc.Iterator, error) { +func (sr *fsSegmentReader) Docs(pl postings.List) (doc.MetadataIterator, error) { if sr.closed { return nil, errReaderClosed } diff --git a/src/m3ninx/index/segment/fst/writer_reader_test.go b/src/m3ninx/index/segment/fst/writer_reader_test.go index 2c785fabcf..c4a6198a15 100644 --- a/src/m3ninx/index/segment/fst/writer_reader_test.go +++ b/src/m3ninx/index/segment/fst/writer_reader_test.go @@ -589,7 +589,7 @@ func assertSliceOfByteSlicesEqual(t *testing.T, a, b [][]byte) { require.Equal(t, a, b) } -func assertDocsEqual(t *testing.T, a, b doc.Iterator) { +func assertDocsEqual(t *testing.T, a, b doc.MetadataIterator) { aDocs, err := collectDocs(a) require.NoError(t, err) bDocs, err := collectDocs(b) @@ -647,7 +647,7 @@ func assertPostingsList(t *testing.T, l postings.List, exp []postings.ID) { require.Fail(t, msg) } -func collectDocs(iter doc.Iterator) ([]doc.Metadata, error) { +func collectDocs(iter doc.MetadataIterator) ([]doc.Metadata, error) { var docs []doc.Metadata for iter.Next() { docs = append(docs, iter.Current()) diff --git a/src/m3ninx/index/segment/mem/reader.go b/src/m3ninx/index/segment/mem/reader.go index 35993ff0f7..b3c5a2cdde 100644 --- a/src/m3ninx/index/segment/mem/reader.go +++ b/src/m3ninx/index/segment/mem/reader.go @@ -138,7 +138,7 @@ func (r *reader) Doc(id postings.ID) (doc.Metadata, error) { return r.segment.getDoc(id) } -func (r *reader) Docs(pl postings.List) (doc.Iterator, error) { +func (r *reader) Docs(pl postings.List) (doc.MetadataIterator, error) { r.RLock() defer r.RUnlock() if r.closed { diff --git a/src/m3ninx/index/segment/segment_mock.go b/src/m3ninx/index/segment/segment_mock.go index 3fc758bb7f..e5610b598d 100644 --- a/src/m3ninx/index/segment/segment_mock.go +++ b/src/m3ninx/index/segment/segment_mock.go @@ -257,10 +257,10 @@ func (mr *MockReaderMockRecorder) MatchAll() *gomock.Call { } // Docs mocks base method -func (m *MockReader) Docs(pl postings.List) (doc.Iterator, error) { +func (m *MockReader) Docs(pl postings.List) (doc.MetadataIterator, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Docs", pl) - ret0, _ := ret[0].(doc.Iterator) + ret0, _ := ret[0].(doc.MetadataIterator) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/src/m3ninx/index/types.go b/src/m3ninx/index/types.go index 99e4de0c6e..ac9f80119a 100644 --- a/src/m3ninx/index/types.go +++ b/src/m3ninx/index/types.go @@ -78,7 +78,7 @@ type Readable interface { // Docs returns an iterator over the documents whose IDs are in the provided // postings list. - Docs(pl postings.List) (doc.Iterator, error) + Docs(pl postings.List) (doc.MetadataIterator, error) // AllDocs returns an iterator over the documents known to the Reader. AllDocs() (IDDocIterator, error) @@ -103,7 +103,7 @@ type DocRetriever interface { // IDDocIterator is an extented documents Iterator which can also return the postings // ID of the current document. type IDDocIterator interface { - doc.Iterator + doc.MetadataIterator // PostingsID returns the current document postings ID. PostingsID() postings.ID diff --git a/src/m3ninx/search/executor/executor.go b/src/m3ninx/search/executor/executor.go index e5f606a82f..79eca270fe 100644 --- a/src/m3ninx/search/executor/executor.go +++ b/src/m3ninx/search/executor/executor.go @@ -33,7 +33,7 @@ var ( errExecutorClosed = errors.New("executor is closed") ) -type newIteratorFn func(s search.Searcher, rs index.Readers) (doc.Iterator, error) +type newIteratorFn func(s search.Searcher, rs index.Readers) (doc.MetadataIterator, error) type executor struct { sync.RWMutex @@ -52,7 +52,7 @@ func NewExecutor(rs index.Readers) search.Executor { } } -func (e *executor) Execute(q search.Query) (doc.Iterator, error) { +func (e *executor) Execute(q search.Query) (doc.MetadataIterator, error) { e.RLock() defer e.RUnlock() if e.closed { diff --git a/src/m3ninx/search/executor/executor_test.go b/src/m3ninx/search/executor/executor_test.go index 15d7c90f34..ce8ef85a04 100644 --- a/src/m3ninx/search/executor/executor_test.go +++ b/src/m3ninx/search/executor/executor_test.go @@ -58,7 +58,7 @@ func TestExecutor(t *testing.T) { e := NewExecutor(rs).(*executor) // Override newIteratorFn to return test iterator. - e.newIteratorFn = func(_ search.Searcher, _ index.Readers) (doc.Iterator, error) { + e.newIteratorFn = func(_ search.Searcher, _ index.Readers) (doc.MetadataIterator, error) { return newTestIterator(), nil } diff --git a/src/m3ninx/search/executor/iterator.go b/src/m3ninx/search/executor/iterator.go index da310902e7..51d65b0614 100644 --- a/src/m3ninx/search/executor/iterator.go +++ b/src/m3ninx/search/executor/iterator.go @@ -32,13 +32,13 @@ type iterator struct { idx int currDoc doc.Metadata - currIter doc.Iterator + currIter doc.MetadataIterator err error closed bool } -func newIterator(s search.Searcher, rs index.Readers) (doc.Iterator, error) { +func newIterator(s search.Searcher, rs index.Readers) (doc.MetadataIterator, error) { it := &iterator{ searcher: s, readers: rs, @@ -110,7 +110,7 @@ func (it *iterator) Close() error { // nextIter gets the next document iterator by getting the next postings list from // the it's searcher and then getting the documents for that postings list from the // corresponding reader associated with that postings list. -func (it *iterator) nextIter() (doc.Iterator, bool, error) { +func (it *iterator) nextIter() (doc.MetadataIterator, bool, error) { it.idx++ if it.idx >= len(it.readers) { return nil, false, nil diff --git a/src/m3ninx/search/executor/iterator_test.go b/src/m3ninx/search/executor/iterator_test.go index 1e56f4abd7..28cf95f021 100644 --- a/src/m3ninx/search/executor/iterator_test.go +++ b/src/m3ninx/search/executor/iterator_test.go @@ -71,8 +71,8 @@ func TestIterator(t *testing.T) { }, } - firstDocIter := doc.NewMockIterator(mockCtrl) - secondDocIter := doc.NewMockIterator(mockCtrl) + firstDocIter := doc.NewMockMetadataIterator(mockCtrl) + secondDocIter := doc.NewMockMetadataIterator(mockCtrl) gomock.InOrder( firstDocIter.EXPECT().Next().Return(true), firstDocIter.EXPECT().Current().Return(docs[0]), @@ -92,8 +92,8 @@ func TestIterator(t *testing.T) { firstReader := index.NewMockReader(mockCtrl) secondReader := index.NewMockReader(mockCtrl) gomock.InOrder( - firstReader.EXPECT().Docs(firstPL).Return(firstDocIter, nil), - secondReader.EXPECT().Docs(secondPL).Return(secondDocIter, nil), + firstReader.EXPECT().MetadataIterator(firstPL).Return(firstDocIter, nil), + secondReader.EXPECT().MetadataIterator(secondPL).Return(secondDocIter, nil), ) searcher := search.NewMockSearcher(mockCtrl) diff --git a/src/m3ninx/search/proptest/segment_gen.go b/src/m3ninx/search/proptest/segment_gen.go index 41ef29daa7..aa3bf83c4e 100644 --- a/src/m3ninx/search/proptest/segment_gen.go +++ b/src/m3ninx/search/proptest/segment_gen.go @@ -40,7 +40,7 @@ var ( fstOptions = fst.NewOptions() ) -func collectDocs(iter doc.Iterator) ([]doc.Metadata, error) { +func collectDocs(iter doc.MetadataIterator) ([]doc.Metadata, error) { var docs []doc.Metadata for iter.Next() { docs = append(docs, iter.Current()) diff --git a/src/m3ninx/search/proptest/util.go b/src/m3ninx/search/proptest/util.go index 2e2fd9d71c..60dd08e341 100644 --- a/src/m3ninx/search/proptest/util.go +++ b/src/m3ninx/search/proptest/util.go @@ -43,7 +43,7 @@ func newDocumentIteratorMatcher(docs ...doc.Metadata) (*documentIteratorMatcher, } // Matches returns whether the provided iterator matches the collection of provided docs. -func (m *documentIteratorMatcher) Matches(i doc.Iterator) error { +func (m *documentIteratorMatcher) Matches(i doc.MetadataIterator) error { pendingDocIDs := make(map[string]doc.Metadata, len(m.expectedDocs)) for id := range m.expectedDocs { pendingDocIDs[id] = m.expectedDocs[id] diff --git a/src/m3ninx/search/search_mock.go b/src/m3ninx/search/search_mock.go index 3f2e886417..b070619f5f 100644 --- a/src/m3ninx/search/search_mock.go +++ b/src/m3ninx/search/search_mock.go @@ -59,10 +59,10 @@ func (m *MockExecutor) EXPECT() *MockExecutorMockRecorder { } // Execute mocks base method -func (m *MockExecutor) Execute(q Query) (doc.Iterator, error) { +func (m *MockExecutor) Execute(q Query) (doc.MetadataIterator, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Execute", q) - ret0, _ := ret[0].(doc.Iterator) + ret0, _ := ret[0].(doc.MetadataIterator) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/src/m3ninx/search/types.go b/src/m3ninx/search/types.go index d4fc76b4ab..cb21903fda 100644 --- a/src/m3ninx/search/types.go +++ b/src/m3ninx/search/types.go @@ -32,7 +32,7 @@ import ( // Executor is responsible for executing queries over a snapshot. type Executor interface { // Execute executes a query over the Executor's snapshot. - Execute(q Query) (doc.Iterator, error) + Execute(q Query) (doc.MetadataIterator, error) // Close closes the iterator. Close() error From dbbee601b405dd21742718a5c31941e894c0995b Mon Sep 17 00:00:00 2001 From: Nate Broyles Date: Mon, 4 Jan 2021 18:03:54 -0500 Subject: [PATCH 3/3] Update index.Readable interface to support retreiving new doc.Document --- .../storage/index/read_through_segment.go | 14 ++- src/m3ninx/doc/types.go | 20 +++ src/m3ninx/index/index_mock.go | 84 +++++++++++-- src/m3ninx/index/iterator.go | 79 ++++++++++++ src/m3ninx/index/iterator_test.go | 112 +++++++++++++++++ src/m3ninx/index/metadata_iterator.go | 6 +- src/m3ninx/index/metadata_iterator_test.go | 8 +- src/m3ninx/index/segment/builder/builder.go | 2 +- .../segment/builder/multi_segments_builder.go | 2 +- .../index/segment/fst/encoding/docs/slice.go | 6 +- src/m3ninx/index/segment/fst/segment.go | 117 ++++++++++++++---- .../index/segment/fst/writer_reader_test.go | 10 +- src/m3ninx/index/segment/mem/reader.go | 47 +++++-- src/m3ninx/index/segment/mem/reader_test.go | 2 +- src/m3ninx/index/segment/mem/segment_test.go | 10 +- src/m3ninx/index/segment/segment_mock.go | 40 +++++- src/m3ninx/index/types.go | 17 ++- src/m3ninx/search/executor/iterator.go | 2 +- 18 files changed, 505 insertions(+), 73 deletions(-) create mode 100644 src/m3ninx/index/iterator.go create mode 100644 src/m3ninx/index/iterator_test.go diff --git a/src/dbnode/storage/index/read_through_segment.go b/src/dbnode/storage/index/read_through_segment.go index 39d6c501ef..089fd546bc 100644 --- a/src/dbnode/storage/index/read_through_segment.go +++ b/src/dbnode/storage/index/read_through_segment.go @@ -262,13 +262,23 @@ func (s *readThroughSegmentReader) AllDocs() (index.IDDocIterator, error) { return s.reader.AllDocs() } +// Metadata is a pass through call, since there's no postings list to cache. +func (s *readThroughSegmentReader) Metadata(id postings.ID) (doc.Metadata, error) { + return s.reader.Metadata(id) +} + +// MetadataIterator is a pass through call, since there's no postings list to cache. +func (s *readThroughSegmentReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) { + return s.reader.MetadataIterator(pl) +} + // Doc is a pass through call, since there's no postings list to cache. -func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Metadata, error) { +func (s *readThroughSegmentReader) Doc(id postings.ID) (doc.Document, error) { return s.reader.Doc(id) } // Docs is a pass through call, since there's no postings list to cache. -func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.MetadataIterator, error) { +func (s *readThroughSegmentReader) Docs(pl postings.List) (doc.Iterator, error) { return s.reader.Docs(pl) } diff --git a/src/m3ninx/doc/types.go b/src/m3ninx/doc/types.go index 451daf84d4..1eb41a0992 100644 --- a/src/m3ninx/doc/types.go +++ b/src/m3ninx/doc/types.go @@ -39,3 +39,23 @@ type MetadataIterator interface { // Close releases any internal resources used by the iterator. Close() error } + +// Iterator provides an iterator over a collection of documents. It is NOT +// safe for multiple goroutines to invoke methods on an Iterator simultaneously. +type Iterator interface { + // Next returns a bool indicating if the iterator has any more documents + // to return. + Next() bool + + // Current returns the current document. It is only safe to call Current immediately + // after a call to Next confirms there are more elements remaining. The Document + // returned from Current is only valid until the following call to Next(). Callers + // should copy the Document if they need it live longer. + Current() Document + + // Err returns any errors encountered during iteration. + Err() error + + // Close releases any internal resources used by the iterator. + Close() error +} diff --git a/src/m3ninx/index/index_mock.go b/src/m3ninx/index/index_mock.go index eaae0e8ee9..d7899331dd 100644 --- a/src/m3ninx/index/index_mock.go +++ b/src/m3ninx/index/index_mock.go @@ -1,7 +1,7 @@ // Code generated by MockGen. DO NOT EDIT. -// Source: github.com/m3db/m3/src/m3ninx/index (interfaces: Reader,DocRetriever) +// Source: github.com/m3db/m3/src/m3ninx/index (interfaces: Reader,DocRetriever,MetadataRetriever) -// Copyright (c) 2019 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -86,10 +86,10 @@ func (mr *MockReaderMockRecorder) Close() *gomock.Call { } // Doc mocks base method -func (m *MockReader) Doc(arg0 postings.ID) (doc.Metadata, error) { +func (m *MockReader) Doc(arg0 postings.ID) (doc.Document, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Doc", arg0) - ret0, _ := ret[0].(doc.Metadata) + ret0, _ := ret[0].(doc.Document) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -101,10 +101,10 @@ func (mr *MockReaderMockRecorder) Doc(arg0 interface{}) *gomock.Call { } // Docs mocks base method -func (m *MockReader) Docs(arg0 postings.List) (doc.MetadataIterator, error) { +func (m *MockReader) Docs(arg0 postings.List) (doc.Iterator, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Docs", arg0) - ret0, _ := ret[0].(doc.MetadataIterator) + ret0, _ := ret[0].(doc.Iterator) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -175,6 +175,36 @@ func (mr *MockReaderMockRecorder) MatchTerm(arg0, arg1 interface{}) *gomock.Call return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MatchTerm", reflect.TypeOf((*MockReader)(nil).MatchTerm), arg0, arg1) } +// Metadata mocks base method +func (m *MockReader) Metadata(arg0 postings.ID) (doc.Metadata, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Metadata", arg0) + ret0, _ := ret[0].(doc.Metadata) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Metadata indicates an expected call of Metadata +func (mr *MockReaderMockRecorder) Metadata(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Metadata", reflect.TypeOf((*MockReader)(nil).Metadata), arg0) +} + +// MetadataIterator mocks base method +func (m *MockReader) MetadataIterator(arg0 postings.List) (doc.MetadataIterator, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "MetadataIterator", arg0) + ret0, _ := ret[0].(doc.MetadataIterator) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// MetadataIterator indicates an expected call of MetadataIterator +func (mr *MockReaderMockRecorder) MetadataIterator(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MetadataIterator", reflect.TypeOf((*MockReader)(nil).MetadataIterator), arg0) +} + // MockDocRetriever is a mock of DocRetriever interface type MockDocRetriever struct { ctrl *gomock.Controller @@ -199,10 +229,10 @@ func (m *MockDocRetriever) EXPECT() *MockDocRetrieverMockRecorder { } // Doc mocks base method -func (m *MockDocRetriever) Doc(arg0 postings.ID) (doc.Metadata, error) { +func (m *MockDocRetriever) Doc(arg0 postings.ID) (doc.Document, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Doc", arg0) - ret0, _ := ret[0].(doc.Metadata) + ret0, _ := ret[0].(doc.Document) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -212,3 +242,41 @@ func (mr *MockDocRetrieverMockRecorder) Doc(arg0 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Doc", reflect.TypeOf((*MockDocRetriever)(nil).Doc), arg0) } + +// MockMetadataRetriever is a mock of MetadataRetriever interface +type MockMetadataRetriever struct { + ctrl *gomock.Controller + recorder *MockMetadataRetrieverMockRecorder +} + +// MockMetadataRetrieverMockRecorder is the mock recorder for MockMetadataRetriever +type MockMetadataRetrieverMockRecorder struct { + mock *MockMetadataRetriever +} + +// NewMockMetadataRetriever creates a new mock instance +func NewMockMetadataRetriever(ctrl *gomock.Controller) *MockMetadataRetriever { + mock := &MockMetadataRetriever{ctrl: ctrl} + mock.recorder = &MockMetadataRetrieverMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use +func (m *MockMetadataRetriever) EXPECT() *MockMetadataRetrieverMockRecorder { + return m.recorder +} + +// Metadata mocks base method +func (m *MockMetadataRetriever) Metadata(arg0 postings.ID) (doc.Metadata, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Metadata", arg0) + ret0, _ := ret[0].(doc.Metadata) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Metadata indicates an expected call of Metadata +func (mr *MockMetadataRetrieverMockRecorder) Metadata(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Metadata", reflect.TypeOf((*MockMetadataRetriever)(nil).Metadata), arg0) +} diff --git a/src/m3ninx/index/iterator.go b/src/m3ninx/index/iterator.go new file mode 100644 index 0000000000..b41bdf6735 --- /dev/null +++ b/src/m3ninx/index/iterator.go @@ -0,0 +1,79 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package index + +import ( + "github.com/m3db/m3/src/m3ninx/doc" + "github.com/m3db/m3/src/m3ninx/postings" +) + +type documentIterator struct { + retriever DocRetriever + postingsIter postings.Iterator + + currDoc doc.Document + currID postings.ID + closed bool + err error +} + +// NewIterator returns a new Iterator +func NewIterator(r DocRetriever, pi postings.Iterator) doc.Iterator { + return &documentIterator{ + retriever: r, + postingsIter: pi, + } +} + +func (e *documentIterator) Next() bool { + if e.closed || e.err != nil || !e.postingsIter.Next() { + return false + } + id := e.postingsIter.Current() + e.currID = id + + d, err := e.retriever.Doc(id) + if err != nil { + e.err = err + return false + } + e.currDoc = d + return true +} + +func (e *documentIterator) Current() doc.Document { + return e.currDoc +} + +func (e *documentIterator) Err() error { + return e.err +} + +func (e *documentIterator) Close() error { + if e.closed { + return errIteratorClosed + } + e.closed = true + e.currDoc = doc.Document{} + e.currID = postings.ID(0) + err := e.postingsIter.Close() + return err +} diff --git a/src/m3ninx/index/iterator_test.go b/src/m3ninx/index/iterator_test.go new file mode 100644 index 0000000000..8083ad04b3 --- /dev/null +++ b/src/m3ninx/index/iterator_test.go @@ -0,0 +1,112 @@ +// Copyright (c) 2021 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package index + +import ( + "testing" + + "github.com/m3db/m3/src/m3ninx/doc" + "github.com/m3db/m3/src/m3ninx/index/segment/fst/encoding" + "github.com/m3db/m3/src/m3ninx/postings" + xtest "github.com/m3db/m3/src/x/test" + + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" +) + +func TestDocIterator(t *testing.T) { + mockCtrl := xtest.NewController(t) + defer mockCtrl.Finish() + + docs := []doc.Metadata{ + { + ID: []byte("doc-id-1"), + Fields: []doc.Field{ + { + Name: []byte("apple"), + Value: []byte("red"), + }, + }, + }, + { + ID: []byte("doc-id-2"), + Fields: []doc.Field{ + { + Name: []byte("banana"), + Value: []byte("yellow"), + }, + }, + }, + } + + encodedDocsWithIds := make([]docsWithIDs, 0, len(docs)) + for i, d := range docs { + encodedDocsWithIds = append(encodedDocsWithIds, docsWithIDs{ + id: postings.ID(i), + doc: doc.NewDocumentFromEncoded( + doc.Encoded{Bytes: docToBytes(d)}), + }) + } + + retriever := NewMockDocRetriever(mockCtrl) + gomock.InOrder( + retriever.EXPECT().Doc(encodedDocsWithIds[0].id).Return(encodedDocsWithIds[0].doc, nil), + retriever.EXPECT().Doc(encodedDocsWithIds[1].id).Return(encodedDocsWithIds[1].doc, nil), + ) + + postingsIter := postings.NewMockIterator(mockCtrl) + gomock.InOrder( + postingsIter.EXPECT().Next().Return(true), + postingsIter.EXPECT().Current().Return(encodedDocsWithIds[0].id), + postingsIter.EXPECT().Next().Return(true), + postingsIter.EXPECT().Current().Return(encodedDocsWithIds[1].id), + postingsIter.EXPECT().Next().Return(false), + postingsIter.EXPECT().Close().Return(nil), + ) + + it := NewIterator(retriever, postingsIter) + + require.True(t, it.Next()) + require.Equal(t, encodedDocsWithIds[0].doc, it.Current()) + require.True(t, it.Next()) + require.Equal(t, encodedDocsWithIds[1].doc, it.Current()) + require.False(t, it.Next()) + require.NoError(t, it.Err()) + + require.NoError(t, it.Close()) +} + +type docsWithIDs struct { + id postings.ID + doc doc.Document +} + +func docToBytes(d doc.Metadata) []byte { + enc := encoding.NewEncoder(1024) + n := enc.PutBytes(d.ID) + n += enc.PutUvarint(uint64(len(d.Fields))) + for _, f := range d.Fields { // nolint:gocritic + n += enc.PutBytes(f.Name) + n += enc.PutBytes(f.Value) + } + + return enc.Bytes() +} diff --git a/src/m3ninx/index/metadata_iterator.go b/src/m3ninx/index/metadata_iterator.go index aa464e3cf2..afe53d9ecd 100644 --- a/src/m3ninx/index/metadata_iterator.go +++ b/src/m3ninx/index/metadata_iterator.go @@ -30,7 +30,7 @@ import ( var errIteratorClosed = errors.New("iterator has been closed") type idDocIterator struct { - retriever DocRetriever + retriever MetadataRetriever postingsIter postings.Iterator currDoc doc.Metadata @@ -40,7 +40,7 @@ type idDocIterator struct { } // NewIDDocIterator returns a new NewIDDocIterator. -func NewIDDocIterator(r DocRetriever, pi postings.Iterator) IDDocIterator { +func NewIDDocIterator(r MetadataRetriever, pi postings.Iterator) IDDocIterator { return &idDocIterator{ retriever: r, postingsIter: pi, @@ -54,7 +54,7 @@ func (it *idDocIterator) Next() bool { id := it.postingsIter.Current() it.currID = id - d, err := it.retriever.Doc(id) + d, err := it.retriever.Metadata(id) if err != nil { it.err = err return false diff --git a/src/m3ninx/index/metadata_iterator_test.go b/src/m3ninx/index/metadata_iterator_test.go index 0cfe820f39..2e17ebe252 100644 --- a/src/m3ninx/index/metadata_iterator_test.go +++ b/src/m3ninx/index/metadata_iterator_test.go @@ -73,11 +73,11 @@ func TestIterator(t *testing.T) { }, } - retriever := NewMockDocRetriever(mockCtrl) + retriever := NewMockMetadataRetriever(mockCtrl) gomock.InOrder( - retriever.EXPECT().Doc(docWithIds[0].id).Return(docWithIds[0].doc, nil), - retriever.EXPECT().Doc(docWithIds[1].id).Return(docWithIds[1].doc, nil), - retriever.EXPECT().Doc(docWithIds[2].id).Return(docWithIds[2].doc, nil), + retriever.EXPECT().Metadata(docWithIds[0].id).Return(docWithIds[0].doc, nil), + retriever.EXPECT().Metadata(docWithIds[1].id).Return(docWithIds[1].doc, nil), + retriever.EXPECT().Metadata(docWithIds[2].id).Return(docWithIds[2].doc, nil), ) postingsIter := postings.NewMockIterator(mockCtrl) diff --git a/src/m3ninx/index/segment/builder/builder.go b/src/m3ninx/index/segment/builder/builder.go index 4e2576b6f0..6217d8e820 100644 --- a/src/m3ninx/index/segment/builder/builder.go +++ b/src/m3ninx/index/segment/builder/builder.go @@ -485,7 +485,7 @@ func (b *builder) AllDocs() (index.IDDocIterator, error) { return index.NewIDDocIterator(b, rangeIter), nil } -func (b *builder) Doc(id postings.ID) (doc.Metadata, error) { +func (b *builder) Metadata(id postings.ID) (doc.Metadata, error) { b.status.RLock() defer b.status.RUnlock() diff --git a/src/m3ninx/index/segment/builder/multi_segments_builder.go b/src/m3ninx/index/segment/builder/multi_segments_builder.go index 4c156e5127..7c8822b711 100644 --- a/src/m3ninx/index/segment/builder/multi_segments_builder.go +++ b/src/m3ninx/index/segment/builder/multi_segments_builder.go @@ -159,7 +159,7 @@ func (b *builderFromSegments) AllDocs() (index.IDDocIterator, error) { return index.NewIDDocIterator(b, rangeIter), nil } -func (b *builderFromSegments) Doc(id postings.ID) (doc.Metadata, error) { +func (b *builderFromSegments) Metadata(id postings.ID) (doc.Metadata, error) { idx := int(id) if idx < 0 || idx >= len(b.docs) { return doc.Metadata{}, errDocNotFound diff --git a/src/m3ninx/index/segment/fst/encoding/docs/slice.go b/src/m3ninx/index/segment/fst/encoding/docs/slice.go index 20df8c8a5e..02111d6fa1 100644 --- a/src/m3ninx/index/segment/fst/encoding/docs/slice.go +++ b/src/m3ninx/index/segment/fst/encoding/docs/slice.go @@ -33,7 +33,7 @@ var ( ) var _ Reader = (*SliceReader)(nil) -var _ index.DocRetriever = (*SliceReader)(nil) +var _ index.MetadataRetriever = (*SliceReader)(nil) // SliceReader is a docs slice reader for use with documents // stored in memory. @@ -61,8 +61,8 @@ func (r *SliceReader) Read(id postings.ID) (doc.Metadata, error) { return r.docs[idx], nil } -// Doc implements DocRetriever and reads the document with postings ID. -func (r *SliceReader) Doc(id postings.ID) (doc.Metadata, error) { +// Metadata implements MetadataRetriever and reads the document with postings ID. +func (r *SliceReader) Metadata(id postings.ID) (doc.Metadata, error) { return r.Read(id) } diff --git a/src/m3ninx/index/segment/fst/segment.go b/src/m3ninx/index/segment/fst/segment.go index 825753fe89..fd041df720 100644 --- a/src/m3ninx/index/segment/fst/segment.go +++ b/src/m3ninx/index/segment/fst/segment.go @@ -131,9 +131,10 @@ func NewSegment(data SegmentData, opts Options) (Segment, error) { } var ( - docsThirdPartyReader = data.DocsReader - docsDataReader *docs.DataReader - docsIndexReader *docs.IndexReader + docsThirdPartyReader = data.DocsReader + docsDataReader *docs.DataReader + docsEncodedDataReader *docs.EncodedDataReader + docsIndexReader *docs.IndexReader ) if docsThirdPartyReader == nil { docsDataReader = docs.NewDataReader(data.DocsData.Bytes) @@ -142,12 +143,14 @@ func NewSegment(data SegmentData, opts Options) (Segment, error) { return nil, fmt.Errorf("unable to load documents index: %v", err) } } + docsEncodedDataReader = docs.NewEncodedDataReader(data.DocsData.Bytes) s := &fsSegment{ - fieldsFST: fieldsFST, - docsDataReader: docsDataReader, - docsIndexReader: docsIndexReader, - docsThirdPartyReader: docsThirdPartyReader, + fieldsFST: fieldsFST, + docsDataReader: docsDataReader, + docsEncodedDataReader: docsEncodedDataReader, + docsIndexReader: docsIndexReader, + docsThirdPartyReader: docsThirdPartyReader, data: data, opts: opts, @@ -169,15 +172,16 @@ var _ segment.ImmutableSegment = (*fsSegment)(nil) type fsSegment struct { sync.RWMutex - ctx context.Context - closed bool - finalized bool - fieldsFST *vellum.FST - docsDataReader *docs.DataReader - docsIndexReader *docs.IndexReader - docsThirdPartyReader docs.Reader - data SegmentData - opts Options + ctx context.Context + closed bool + finalized bool + fieldsFST *vellum.FST + docsDataReader *docs.DataReader + docsEncodedDataReader *docs.EncodedDataReader + docsIndexReader *docs.IndexReader + docsThirdPartyReader docs.Reader + data SegmentData + opts Options numDocs int64 } @@ -579,7 +583,7 @@ func (r *fsSegment) matchAllNotClosedMaybeFinalizedWithRLock() (postings.Mutable return pl, nil } -func (r *fsSegment) docNotClosedMaybeFinalizedWithRLock(id postings.ID) (doc.Metadata, error) { +func (r *fsSegment) metadataNotClosedMaybeFinalizedWithRLock(id postings.ID) (doc.Metadata, error) { // NB(r): Not closed, but could be finalized (i.e. closed segment reader) // calling match field after this segment is finalized. if r.finalized { @@ -599,8 +603,8 @@ func (r *fsSegment) docNotClosedMaybeFinalizedWithRLock(id postings.ID) (doc.Met return r.docsDataReader.Read(offset) } -func (r *fsSegment) docsNotClosedMaybeFinalizedWithRLock( - retriever index.DocRetriever, +func (r *fsSegment) metadataIteratorNotClosedMaybeFinalizedWithRLock( + retriever index.MetadataRetriever, pl postings.List, ) (doc.MetadataIterator, error) { // NB(r): Not closed, but could be finalized (i.e. closed segment reader) @@ -612,8 +616,51 @@ func (r *fsSegment) docsNotClosedMaybeFinalizedWithRLock( return index.NewIDDocIterator(retriever, pl.Iterator()), nil } -func (r *fsSegment) allDocsNotClosedMaybeFinalizedWithRLock( +func (r *fsSegment) docNotClosedMaybeFinalizedWithRLock(id postings.ID) (doc.Document, error) { + // NB(r): Not closed, but could be finalized (i.e. closed segment reader) + // calling match field after this segment is finalized. + if r.finalized { + return doc.Document{}, errReaderFinalized + } + + // If using docs slice reader, return from the in memory slice reader + if r.docsThirdPartyReader != nil { + m, err := r.docsThirdPartyReader.Read(id) + if err != nil { + return doc.Document{}, err + } + + return doc.NewDocumentFromMetadata(m), nil + } + + offset, err := r.docsIndexReader.Read(id) + if err != nil { + return doc.Document{}, err + } + + e, err := r.docsEncodedDataReader.Read(offset) + if err != nil { + return doc.Document{}, err + } + + return doc.NewDocumentFromEncoded(e), nil +} + +func (r *fsSegment) docsNotClosedMaybeFinalizedWithRLock( retriever index.DocRetriever, + pl postings.List, +) (doc.Iterator, error) { + // NB(r): Not closed, but could be finalized (i.e. closed segment reader) + // calling match field after this segment is finalized. + if r.finalized { + return nil, errReaderFinalized + } + + return index.NewIterator(retriever, pl.Iterator()), nil +} + +func (r *fsSegment) allDocsNotClosedMaybeFinalizedWithRLock( + retriever index.MetadataRetriever, ) (index.IDDocIterator, error) { // NB(r): Not closed, but could be finalized (i.e. closed segment reader) // calling match field after this segment is finalized. @@ -896,19 +943,45 @@ func (sr *fsSegmentReader) MatchAll() (postings.MutableList, error) { return pl, err } -func (sr *fsSegmentReader) Doc(id postings.ID) (doc.Metadata, error) { +func (sr *fsSegmentReader) Metadata(id postings.ID) (doc.Metadata, error) { if sr.closed { return doc.Metadata{}, errReaderClosed } // NB(r): We are allowed to call match field after Close called on // the segment but not after it is finalized. sr.fsSegment.RLock() + pl, err := sr.fsSegment.metadataNotClosedMaybeFinalizedWithRLock(id) + sr.fsSegment.RUnlock() + return pl, err +} + +func (sr *fsSegmentReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) { + if sr.closed { + return nil, errReaderClosed + } + // NB(r): We are allowed to call match field after Close called on + // the segment but not after it is finalized. + // Also make sure the doc retriever is the reader not the segment so that + // is closed check is not performed and only the is finalized check. + sr.fsSegment.RLock() + iter, err := sr.fsSegment.metadataIteratorNotClosedMaybeFinalizedWithRLock(sr, pl) + sr.fsSegment.RUnlock() + return iter, err +} + +func (sr *fsSegmentReader) Doc(id postings.ID) (doc.Document, error) { + if sr.closed { + return doc.Document{}, errReaderClosed + } + // NB(r): We are allowed to call match field after Close called on + // the segment but not after it is finalized. + sr.fsSegment.RLock() pl, err := sr.fsSegment.docNotClosedMaybeFinalizedWithRLock(id) sr.fsSegment.RUnlock() return pl, err } -func (sr *fsSegmentReader) Docs(pl postings.List) (doc.MetadataIterator, error) { +func (sr *fsSegmentReader) Docs(pl postings.List) (doc.Iterator, error) { if sr.closed { return nil, errReaderClosed } diff --git a/src/m3ninx/index/segment/fst/writer_reader_test.go b/src/m3ninx/index/segment/fst/writer_reader_test.go index c4a6198a15..eef2fbdf5f 100644 --- a/src/m3ninx/index/segment/fst/writer_reader_test.go +++ b/src/m3ninx/index/segment/fst/writer_reader_test.go @@ -420,9 +420,9 @@ func TestSegmentDocs(t *testing.T) { obsPl, err := obsReader.MatchTerm(f, []byte(term)) require.NoError(t, err) - expDocs, err := expReader.Docs(expPl) + expDocs, err := expReader.MetadataIterator(expPl) require.NoError(t, err) - obsDocs, err := obsReader.Docs(obsPl) + obsDocs, err := obsReader.MetadataIterator(obsPl) require.NoError(t, err) assertDocsEqual(t, expDocs, obsDocs) @@ -529,10 +529,10 @@ func TestSegmentReaderValidUntilClose(t *testing.T) { require.NoError(t, err) assertPostingsList(t, list, []postings.ID{0, 1, 2}) - _, err = reader.Doc(0) + _, err = reader.Metadata(0) require.NoError(t, err) - _, err = reader.Docs(list) + _, err = reader.MetadataIterator(list) require.NoError(t, err) _, err = reader.AllDocs() @@ -543,7 +543,7 @@ func TestSegmentReaderValidUntilClose(t *testing.T) { require.NoError(t, err) list, err = reader.MatchRegexp([]byte("fruit"), re) require.NoError(t, err) - iter, err := reader.Docs(list) + iter, err := reader.MetadataIterator(list) require.NoError(t, err) var docs int for iter.Next() { diff --git a/src/m3ninx/index/segment/mem/reader.go b/src/m3ninx/index/segment/mem/reader.go index b3c5a2cdde..998a184452 100644 --- a/src/m3ninx/index/segment/mem/reader.go +++ b/src/m3ninx/index/segment/mem/reader.go @@ -124,28 +124,45 @@ func (r *reader) MatchAll() (postings.MutableList, error) { return pl, nil } -func (r *reader) Doc(id postings.ID) (doc.Metadata, error) { +func (r *reader) Metadata(id postings.ID) (doc.Metadata, error) { + r.RLock() + defer r.RUnlock() + + return r.getMetadataWithRLock(id) +} + +func (r *reader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) { r.RLock() defer r.RUnlock() if r.closed { - return doc.Metadata{}, errSegmentReaderClosed + return nil, errSegmentReaderClosed } + boundedIter := newBoundedPostingsIterator(pl.Iterator(), r.limits) + return r.getMetadataIterWithLock(boundedIter), nil +} - if id < r.limits.startInclusive || id >= r.limits.endExclusive { - return doc.Metadata{}, index.ErrDocNotFound +func (r *reader) Doc(id postings.ID) (doc.Document, error) { + r.RLock() + defer r.RUnlock() + + m, err := r.getMetadataWithRLock(id) + if err != nil { + return doc.Document{}, err } - return r.segment.getDoc(id) + return doc.NewDocumentFromMetadata(m), nil } -func (r *reader) Docs(pl postings.List) (doc.MetadataIterator, error) { +func (r *reader) Docs(pl postings.List) (doc.Iterator, error) { r.RLock() defer r.RUnlock() + if r.closed { return nil, errSegmentReaderClosed } + boundedIter := newBoundedPostingsIterator(pl.Iterator(), r.limits) - return r.getDocIterWithLock(boundedIter), nil + return index.NewIterator(r, boundedIter), nil } func (r *reader) AllDocs() (index.IDDocIterator, error) { @@ -156,13 +173,25 @@ func (r *reader) AllDocs() (index.IDDocIterator, error) { } pi := postings.NewRangeIterator(r.limits.startInclusive, r.limits.endExclusive) - return r.getDocIterWithLock(pi), nil + return r.getMetadataIterWithLock(pi), nil } -func (r *reader) getDocIterWithLock(iter postings.Iterator) index.IDDocIterator { +func (r *reader) getMetadataIterWithLock(iter postings.Iterator) index.IDDocIterator { return index.NewIDDocIterator(r, iter) } +func (r *reader) getMetadataWithRLock(id postings.ID) (doc.Metadata, error) { + if r.closed { + return doc.Metadata{}, errSegmentReaderClosed + } + + if id < r.limits.startInclusive || id >= r.limits.endExclusive { + return doc.Metadata{}, index.ErrDocNotFound + } + + return r.segment.getDoc(id) +} + func (r *reader) Close() error { r.Lock() if r.closed { diff --git a/src/m3ninx/index/segment/mem/reader_test.go b/src/m3ninx/index/segment/mem/reader_test.go index cc3ab7e9b8..f82bfef96f 100644 --- a/src/m3ninx/index/segment/mem/reader_test.go +++ b/src/m3ninx/index/segment/mem/reader_test.go @@ -147,7 +147,7 @@ func TestReaderDocs(t *testing.T) { reader := newReader(segment, readerDocRange{0, maxID}, postings.NewPool(nil, roaring.NewPostingsList)) - iter, err := reader.Docs(postingsList) + iter, err := reader.MetadataIterator(postingsList) require.NoError(t, err) actualDocs := make([]doc.Metadata, 0, len(docs)) diff --git a/src/m3ninx/index/segment/mem/segment_test.go b/src/m3ninx/index/segment/mem/segment_test.go index 3ec0aebb00..7c2379b823 100644 --- a/src/m3ninx/index/segment/mem/segment_test.go +++ b/src/m3ninx/index/segment/mem/segment_test.go @@ -127,7 +127,7 @@ func TestSegmentInsert(t *testing.T) { pl, err := r.MatchTerm(doc.IDReservedFieldName, id) require.NoError(t, err) - iter, err := r.Docs(pl) + iter, err := r.MetadataIterator(pl) require.NoError(t, err) require.True(t, iter.Next()) @@ -186,7 +186,7 @@ func TestSegmentInsertDuplicateID(t *testing.T) { pl, err := r.MatchTerm(doc.IDReservedFieldName, id) require.NoError(t, err) - iter, err := r.Docs(pl) + iter, err := r.MetadataIterator(pl) require.NoError(t, err) require.True(t, iter.Next()) @@ -714,7 +714,7 @@ func TestSegmentReaderMatchExact(t *testing.T) { pl, err := r.MatchTerm([]byte("fruit"), []byte("apple")) require.NoError(t, err) - iter, err := r.Docs(pl) + iter, err := r.MetadataIterator(pl) require.NoError(t, err) actualDocs := make([]doc.Metadata, 0) @@ -845,7 +845,7 @@ func TestSegmentReaderMatchRegex(t *testing.T) { pl, err := r.MatchRegexp(field, index.CompiledRegex{Simple: compiled}) require.NoError(t, err) - iter, err := r.Docs(pl) + iter, err := r.MetadataIterator(pl) require.NoError(t, err) actualDocs := make([]doc.Metadata, 0) @@ -872,7 +872,7 @@ func testDocument(t *testing.T, d doc.Metadata, r index.Reader) { pl, err := r.MatchTerm(name, value) require.NoError(t, err) - iter, err := r.Docs(pl) + iter, err := r.MetadataIterator(pl) require.NoError(t, err) require.True(t, iter.Next()) diff --git a/src/m3ninx/index/segment/segment_mock.go b/src/m3ninx/index/segment/segment_mock.go index e5610b598d..7c86c01359 100644 --- a/src/m3ninx/index/segment/segment_mock.go +++ b/src/m3ninx/index/segment/segment_mock.go @@ -1,7 +1,7 @@ // Code generated by MockGen. DO NOT EDIT. // Source: github.com/m3db/m3/src/m3ninx/index/segment/types.go -// Copyright (c) 2020 Uber Technologies, Inc. +// Copyright (c) 2021 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -181,11 +181,26 @@ func (m *MockReader) EXPECT() *MockReaderMockRecorder { return m.recorder } +// Metadata mocks base method +func (m *MockReader) Metadata(id postings.ID) (doc.Metadata, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Metadata", id) + ret0, _ := ret[0].(doc.Metadata) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Metadata indicates an expected call of Metadata +func (mr *MockReaderMockRecorder) Metadata(id interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Metadata", reflect.TypeOf((*MockReader)(nil).Metadata), id) +} + // Doc mocks base method -func (m *MockReader) Doc(id postings.ID) (doc.Metadata, error) { +func (m *MockReader) Doc(id postings.ID) (doc.Document, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Doc", id) - ret0, _ := ret[0].(doc.Metadata) + ret0, _ := ret[0].(doc.Document) ret1, _ := ret[1].(error) return ret0, ret1 } @@ -256,11 +271,26 @@ func (mr *MockReaderMockRecorder) MatchAll() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MatchAll", reflect.TypeOf((*MockReader)(nil).MatchAll)) } +// MetadataIterator mocks base method +func (m *MockReader) MetadataIterator(pl postings.List) (doc.MetadataIterator, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "MetadataIterator", pl) + ret0, _ := ret[0].(doc.MetadataIterator) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// MetadataIterator indicates an expected call of MetadataIterator +func (mr *MockReaderMockRecorder) MetadataIterator(pl interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "MetadataIterator", reflect.TypeOf((*MockReader)(nil).MetadataIterator), pl) +} + // Docs mocks base method -func (m *MockReader) Docs(pl postings.List) (doc.MetadataIterator, error) { +func (m *MockReader) Docs(pl postings.List) (doc.Iterator, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "Docs", pl) - ret0, _ := ret[0].(doc.MetadataIterator) + ret0, _ := ret[0].(doc.Iterator) ret1, _ := ret[1].(error) return ret0, ret1 } diff --git a/src/m3ninx/index/types.go b/src/m3ninx/index/types.go index ac9f80119a..1a005691aa 100644 --- a/src/m3ninx/index/types.go +++ b/src/m3ninx/index/types.go @@ -61,6 +61,7 @@ type Writer interface { // Readable provides a point-in-time accessor to the documents in an index. type Readable interface { + MetadataRetriever DocRetriever // MatchField returns a postings list over all documents which match the given field. @@ -76,9 +77,13 @@ type Readable interface { // MatchAll returns a postings list for all documents known to the Reader. MatchAll() (postings.MutableList, error) - // Docs returns an iterator over the documents whose IDs are in the provided + // MetadataIterator returns an iterator over the metadata whose IDs are in the provided // postings list. - Docs(pl postings.List) (doc.MetadataIterator, error) + MetadataIterator(pl postings.List) (doc.MetadataIterator, error) + + // Docs returns an iterator over the document whose IDs + // are in the provided postings list. + Docs(pl postings.List) (doc.Iterator, error) // AllDocs returns an iterator over the documents known to the Reader. AllDocs() (IDDocIterator, error) @@ -94,10 +99,16 @@ type CompiledRegex struct { PrefixEnd []byte } +// MetadataRetriever returns the metadata associated with a postings ID. It returns +// ErrDocNotFound if there is no metadata corresponding to the given postings ID. +type MetadataRetriever interface { + Metadata(id postings.ID) (doc.Metadata, error) +} + // DocRetriever returns the document associated with a postings ID. It returns // ErrDocNotFound if there is no document corresponding to the given postings ID. type DocRetriever interface { - Doc(id postings.ID) (doc.Metadata, error) + Doc(id postings.ID) (doc.Document, error) } // IDDocIterator is an extented documents Iterator which can also return the postings diff --git a/src/m3ninx/search/executor/iterator.go b/src/m3ninx/search/executor/iterator.go index 51d65b0614..c1cc471ef9 100644 --- a/src/m3ninx/search/executor/iterator.go +++ b/src/m3ninx/search/executor/iterator.go @@ -122,7 +122,7 @@ func (it *iterator) nextIter() (doc.MetadataIterator, bool, error) { return nil, false, err } - iter, err := reader.Docs(pl) + iter, err := reader.MetadataIterator(pl) if err != nil { return nil, false, err }