Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[dbnode] Direct conversion of encoded tags to doc.Metadata #3087

Merged
merged 25 commits into from
Jan 20, 2021
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ac80c69
add benchmark
vpranckaitis Jan 11, 2021
f0370d8
speed up the search of tag bytes in series ID
vpranckaitis Jan 11, 2021
5e77e4a
update tests
vpranckaitis Jan 11, 2021
fe8ee23
PR comments
vpranckaitis Jan 12, 2021
4221b84
add tests that check if bytes tag slices reuse data from series ID
vpranckaitis Jan 12, 2021
4927f6f
update benchmark
vpranckaitis Jan 12, 2021
6bb6d9b
PR comments
vpranckaitis Jan 13, 2021
6d007e3
un-base64 series ID samples
vpranckaitis Jan 13, 2021
9114f90
Merge branch 'master' into vilius/tags_to_doc_performace_improvement
linasm Jan 13, 2021
a879a2a
[dbnode] Direct conversion of encoded tags to doc.Metadata
vpranckaitis Jan 14, 2021
db2023e
extract decoding steps into smaller functions
vpranckaitis Jan 14, 2021
f7c6bcd
thinner encoded tag iterator and benchmarks
vpranckaitis Jan 15, 2021
f769ce0
merge DecodeTagName() and DecodeTagValue() methods
vpranckaitis Jan 15, 2021
d3a8c38
function that uses indexing instead of sliding slice
vpranckaitis Jan 18, 2021
c4296b6
benchmark TagValueFromEncodedTagsFast
vpranckaitis Jan 18, 2021
9b1ad47
revert decoder_fast.go changes
vpranckaitis Jan 19, 2021
ba77217
revert decoder_fast_iter.go changes
vpranckaitis Jan 19, 2021
3e949af
remove convert.FromSeriesIDAndEncodedTagsIndex()
vpranckaitis Jan 19, 2021
e0cad90
export and use variables from serialize package
vpranckaitis Jan 19, 2021
f77ccef
add tests
vpranckaitis Jan 19, 2021
45378ef
Apply suggestions from code review
vpranckaitis Jan 19, 2021
a5436e8
PR comments
vpranckaitis Jan 19, 2021
8b725f0
use ident.BytesID instead of ident.ID
vpranckaitis Jan 19, 2021
f4f96c0
Merge branch 'master' into vilius/convert_encoded_tags_to_doc
vpranckaitis Jan 20, 2021
d782a61
Merge branch 'master' into vilius/convert_encoded_tags_to_doc
vpranckaitis Jan 20, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 118 additions & 26 deletions src/dbnode/storage/index/convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,26 @@ import (
"fmt"
"unicode/utf8"

"github.com/m3db/m3/src/dbnode/ts"
"github.com/m3db/m3/src/m3ninx/doc"
"github.com/m3db/m3/src/query/graphite/graphite"
"github.com/m3db/m3/src/x/ident"
"github.com/m3db/m3/src/x/pool"
"github.com/m3db/m3/src/x/serialize"
)

const (
// NB: this assumes that series ID has a format:
// {tag1="value1",tag2="value2",...}
//
// Thus firstTagBytesPosition points to the 't' immediately after curly brace '{'
firstTagBytesPosition int = 1
// distanceBetweenTagNameAndValue corresponds to '="' in series ID that separates tag name from
// it's value
distanceBetweenTagNameAndValue int = 2
// distanceBetweenTagValueAndNextName corresponds to '",' in series ID that separates
// tag's value from the following tag name
distanceBetweenTagValueAndNextName int = 2
)

var (
Expand Down Expand Up @@ -108,22 +124,19 @@ func ValidateSeriesTag(tag ident.Tag) error {

// FromSeriesIDAndTags converts the provided series id+tags into a document.
func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {
clonedID := clone(id)
fields := make([]doc.Field, 0, len(tags.Values()))
var (
clonedID = clone(id.Bytes())
fields = make([]doc.Field, 0, len(tags.Values()))
expectedStart = firstTagBytesPosition
)
for _, tag := range tags.Values() {
nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()

var clonedName, clonedValue []byte
if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
clonedName = clonedID[idx : idx+len(nameBytes)]
} else {
clonedName = append([]byte(nil), nameBytes...)
}
if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
clonedValue = clonedID[idx : idx+len(valueBytes)]
} else {
clonedValue = append([]byte(nil), valueBytes...)
}
clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart,
distanceBetweenTagNameAndValue)
clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart,
distanceBetweenTagValueAndNextName)

fields = append(fields, doc.Field{
Name: clonedName,
Expand All @@ -143,23 +156,20 @@ func FromSeriesIDAndTags(id ident.ID, tags ident.Tags) (doc.Metadata, error) {

// FromSeriesIDAndTagIter converts the provided series id+tags into a document.
func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata, error) {
clonedID := clone(id)
fields := make([]doc.Field, 0, tags.Remaining())
var (
clonedID = clone(id.Bytes())
fields = make([]doc.Field, 0, tags.Remaining())
expectedStart = firstTagBytesPosition
)
for tags.Next() {
tag := tags.Current()
nameBytes, valueBytes := tag.Name.Bytes(), tag.Value.Bytes()

var clonedName, clonedValue []byte
if idx := bytes.Index(clonedID, nameBytes); idx != -1 {
clonedName = clonedID[idx : idx+len(nameBytes)]
} else {
clonedName = append([]byte(nil), nameBytes...)
}
if idx := bytes.Index(clonedID, valueBytes); idx != -1 {
clonedValue = clonedID[idx : idx+len(valueBytes)]
} else {
clonedValue = append([]byte(nil), valueBytes...)
}
clonedName, expectedStart = findSliceOrClone(clonedID, nameBytes, expectedStart,
distanceBetweenTagNameAndValue)
clonedValue, expectedStart = findSliceOrClone(clonedID, valueBytes, expectedStart,
distanceBetweenTagValueAndNextName)

fields = append(fields, doc.Field{
Name: clonedName,
Expand All @@ -180,6 +190,89 @@ func FromSeriesIDAndTagIter(id ident.ID, tags ident.TagIterator) (doc.Metadata,
return d, nil
}

// FromSeriesIDAndEncodedTags converts the provided series id and encoded tags into a document.
vpranckaitis marked this conversation as resolved.
Show resolved Hide resolved
func FromSeriesIDAndEncodedTags(id ident.ID, encodedTags ts.EncodedTags) (doc.Metadata, error) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please change ident.ID to ident.BytesID. I've just realized that using an interface here will force an allocation on some of the code paths where we will be using this function.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

var (
byteOrder = serialize.ByteOrder
total = len(encodedTags)
)
if total < 4 {
return doc.Metadata{}, fmt.Errorf("encoded tags too short: size=%d, need=%d", total, 4)
}

header := byteOrder.Uint16(encodedTags[:2])
encodedTags = encodedTags[2:]
if header != serialize.HeaderMagicNumber {
return doc.Metadata{}, serialize.ErrIncorrectHeader
}

length := int(byteOrder.Uint16(encodedTags[:2]))
encodedTags = encodedTags[2:]

var (
clonedID = clone(id.Bytes())
fields = make([]doc.Field, 0, length)
expectedStart = firstTagBytesPosition
)

for i := 0; i < length; i++ {
if len(encodedTags) < 2 {
return doc.Metadata{}, fmt.Errorf("missing size for tag name: index=%d", i)
}
numBytesName := int(byteOrder.Uint16(encodedTags[:2]))
if numBytesName == 0 {
return doc.Metadata{}, serialize.ErrEmptyTagNameLiteral
}
encodedTags = encodedTags[2:]

bytesName := encodedTags[:numBytesName]
encodedTags = encodedTags[numBytesName:]

if len(encodedTags) < 2 {
return doc.Metadata{}, fmt.Errorf("missing size for tag value: index=%d", i)
}

numBytesValue := int(byteOrder.Uint16(encodedTags[:2]))
encodedTags = encodedTags[2:]

bytesValue := encodedTags[:numBytesValue]
encodedTags = encodedTags[numBytesValue:]

var clonedName, clonedValue []byte
clonedName, expectedStart = findSliceOrClone(clonedID, bytesName, expectedStart,
distanceBetweenTagNameAndValue)
clonedValue, expectedStart = findSliceOrClone(clonedID, bytesValue, expectedStart,
distanceBetweenTagValueAndNextName)

fields = append(fields, doc.Field{
Name: clonedName,
Value: clonedValue,
})
}

d := doc.Metadata{
ID: clonedID,
Fields: fields,
}
if err := Validate(d); err != nil {
return doc.Metadata{}, err
}
return d, nil
}

func findSliceOrClone(id, tag []byte, expectedStart, nextPositionDistance int) ([]byte, int) { //nolint:unparam
n := len(tag)
expectedEnd := expectedStart + n
if expectedStart != -1 && expectedEnd <= len(id) &&
bytes.Equal(id[expectedStart:expectedEnd], tag) {
return id[expectedStart:expectedEnd], expectedEnd + nextPositionDistance
} else if idx := bytes.Index(id, tag); idx != -1 {
return id[idx : idx+n], expectedEnd + nextPositionDistance
} else {
return clone(tag), -1
}
}

// TagsFromTagsIter returns an ident.Tags from a TagIterator. It also tries
// to re-use bytes from the seriesID if they're also present in the tags
// instead of re-allocating them. This requires that the ident.Tags that is
Expand Down Expand Up @@ -252,8 +345,7 @@ func TagsFromTagsIter(
// NB(prateek): we take an independent copy of the bytes underlying
// any ids provided, as we need to maintain the lifecycle of the indexed
// bytes separately from the rest of the storage subsystem.
func clone(id ident.ID) []byte {
original := id.Bytes()
func clone(original []byte) []byte {
clone := make([]byte, len(original))
copy(clone, original)
return clone
Expand Down
Loading