Skip to content
This repository has been archived by the owner on Dec 1, 2022. It is now read-only.

refactor index key utils #12

Merged
merged 2 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions src/common/IndexKeyUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,63 @@

namespace nebula {

// static
void IndexKeyUtils::indexRaw(const IndexValues &values, std::string& raw) {
std::vector<int32_t> colsLen;
for (auto& col : values) {
if (col.first == Value::Type::STRING) {
colsLen.emplace_back(col.second.size());
}
raw.append(col.second.data(), col.second.size());
}
for (auto len : colsLen) {
raw.append(reinterpret_cast<const char*>(&len), sizeof(int32_t));
}
}

// static
std::string IndexKeyUtils::vertexIndexKey(size_t vIdLen, PartitionID partId,
IndexID indexId, VertexID vId,
const IndexValues& values) {
int32_t item = (partId << kPartitionOffset) | static_cast<uint32_t>(NebulaKeyType::kIndex);
std::string key;
key.reserve(256);
key.append(reinterpret_cast<const char*>(&item), sizeof(int32_t))
.append(reinterpret_cast<const char*>(&indexId), sizeof(IndexID));
indexRaw(values, key);
key.append(vId.data(), vId.size())
.append(vIdLen - vId.size(), '\0');
return key;
}

// static
std::string IndexKeyUtils::edgeIndexKey(size_t vIdLen, PartitionID partId,
IndexID indexId, VertexID srcId,
EdgeRanking rank, VertexID dstId,
const IndexValues& values) {
int32_t item = (partId << kPartitionOffset) | static_cast<uint32_t>(NebulaKeyType::kIndex);
std::string key;
key.reserve(256);
key.append(reinterpret_cast<const char*>(&item), sizeof(int32_t))
.append(reinterpret_cast<const char*>(&indexId), sizeof(IndexID));
indexRaw(values, key);
key.append(srcId.data(), srcId.size())
.append(vIdLen - srcId.size(), '\0')
Copy link
Contributor

@darionyaphet darionyaphet Apr 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The zero filling will arise at here ? I mean for example: when we define the vertex size is 6 and current vertex ID is ABCD the graph service will send ABCD\0\0 or ABCD ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The zero filling will arise at here ? I mean for example: when we define the vertex size is 6 and current vertex ID is ABCD the graph service will send ABCD\0\0 or ABCD ?

The graph service should send ABCD, then storage service will convert it to fixed length vid ABCD\0\0

.append(reinterpret_cast<const char*>(&rank), sizeof(EdgeRanking))
.append(dstId.data(), dstId.size())
.append(vIdLen - dstId.size(), '\0');
return key;
}

// static
std::string IndexKeyUtils::indexPrefix(PartitionID partId, IndexID indexId) {
PartitionID item = (partId << kPartitionOffset) | static_cast<uint32_t>(NebulaKeyType::kIndex);
std::string key;
key.reserve(sizeof(PartitionID) + sizeof(IndexID));
key.append(reinterpret_cast<const char*>(&item), sizeof(PartitionID))
.append(reinterpret_cast<const char*>(&indexId), sizeof(IndexID));
return key;
}

} // namespace nebula

183 changes: 133 additions & 50 deletions src/common/IndexKeyUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,44 +9,59 @@

#include "base/Base.h"
#include "base/StatusOr.h"
#include "thrift/ThriftTypes.h"
#include "interface/gen-cpp2/meta_types.h"
#include "common/Types.h"


namespace nebula {

using VariantType = boost::variant<int64_t, double, bool, std::string>;

using OptVariantType = StatusOr<VariantType>;

using IndexValues = std::vector<std::pair<nebula::meta::cpp2::PropertyType, std::string>>;
using IndexValues = std::vector<std::pair<Value::Type, std::string>>;

/**
* This class supply some utils for transition between Vertex/Edge and key in kvstore.
* This class supply some utils for index in kvstore.
* */
class IndexKeyUtils final {
public:
~IndexKeyUtils() = default;

static std::string encodeVariant(const VariantType& v) {
switch (v.which()) {
case VAR_INT64:
return encodeInt64(boost::get<int64_t>(v));
case VAR_DOUBLE:
return encodeDouble(boost::get<double>(v));
case VAR_BOOL: {
auto val = boost::get<bool>(v);
static std::string encodeValue(const Value& v) {
switch (v.type()) {
case Value::Type::INT :
return encodeInt64(v.getInt());
case Value::Type::FLOAT :
return encodeDouble(v.getFloat());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

encodeDouble is too good,👍

case Value::Type::BOOL: {
auto val = v.getBool();
std::string raw;
raw.reserve(sizeof(bool));
raw.append(reinterpret_cast<const char*>(&val), sizeof(bool));
return raw;
}
case VAR_STR:
return boost::get<std::string>(v);
default:
std::string errMsg = folly::stringPrintf("Unknown VariantType: %d", v.which());
LOG(ERROR) << errMsg;
case Value::Type::STRING :
return v.getStr();
case Value::Type::DATE : {
std::string buf;
buf.reserve(sizeof(int8_t) * 2 + sizeof(int16_t));
buf.append(reinterpret_cast<const char*>(&v.getDate().year), sizeof(int16_t))
.append(reinterpret_cast<const char*>(&v.getDate().month), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&v.getDate().day), sizeof(int8_t));
return buf;
}
case Value::Type::DATETIME : {
std::string buf;
buf.reserve(sizeof(int32_t) * 2 + sizeof(int16_t) + sizeof(int8_t) * 5);
auto dt = v.getDateTime();
buf.append(reinterpret_cast<const char*>(&dt.year), sizeof(int16_t))
.append(reinterpret_cast<const char*>(&dt.month), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&dt.day), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&dt.hour), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&dt.minute), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&dt.sec), sizeof(int8_t))
.append(reinterpret_cast<const char*>(&dt.microsec), sizeof(int32_t))
.append(reinterpret_cast<const char*>(&dt.timezone), sizeof(int32_t));
return buf;
}
default :
LOG(ERROR) << "Unsupported default value type";
}
return "";
}
Expand Down Expand Up @@ -118,53 +133,121 @@ class IndexKeyUtils final {
return val;
}

static OptVariantType decodeVariant(const folly::StringPiece& raw,
nebula::meta::cpp2::PropertyType type) {
static StatusOr<Value> decodeValue(const folly::StringPiece& raw, Value::Type type) {
Value v;
switch (type) {
case nebula::meta::cpp2::PropertyType::BOOL : {
return *reinterpret_cast<const bool*>(raw.data());
case Value::Type::INT : {
v.setInt(decodeInt64(raw));
break;
}
case Value::Type::FLOAT : {
v.setFloat(decodeDouble(raw));
break;
}
case nebula::meta::cpp2::PropertyType::INT64 :
case nebula::meta::cpp2::PropertyType::TIMESTAMP : {
return decodeInt64(raw);
case Value::Type::BOOL : {
v.setBool(*reinterpret_cast<const bool*>(raw.data()));
break;
}
case nebula::meta::cpp2::PropertyType::DOUBLE :
case nebula::meta::cpp2::PropertyType::FLOAT : {
return decodeDouble(raw);
case Value::Type::STRING : {
v.setStr(raw.str());
break;
}
case nebula::meta::cpp2::PropertyType::STRING : {
return raw.str();
case Value::Type::DATE: {
nebula::Date dt;
memcpy(reinterpret_cast<void*>(&dt.year), &raw[0], sizeof(int16_t));
memcpy(reinterpret_cast<void*>(&dt.month),
&raw[sizeof(int16_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.day),
&raw[sizeof(int16_t) + sizeof(int8_t)],
sizeof(int8_t));
v.setDate(dt);
break;
}
case Value::Type::DATETIME: {
nebula::DateTime dt;
memcpy(reinterpret_cast<void*>(&dt.year), &raw[0], sizeof(int16_t));
memcpy(reinterpret_cast<void*>(&dt.month),
&raw[sizeof(int16_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.day),
&raw[sizeof(int16_t) + sizeof(int8_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.hour),
&raw[sizeof(int16_t) + 2 * sizeof(int8_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.minute),
&raw[sizeof(int16_t) + 3 * sizeof(int8_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.sec),
&raw[sizeof(int16_t) + 4 * sizeof(int8_t)],
sizeof(int8_t));
memcpy(reinterpret_cast<void*>(&dt.microsec),
&raw[sizeof(int16_t) + 5 * sizeof(int8_t)],
sizeof(int32_t));
memcpy(reinterpret_cast<void*>(&dt.timezone),
&raw[sizeof(int16_t) + 5 * sizeof(int8_t) + sizeof(int32_t)],
sizeof(int32_t));
v.setDateTime(dt);
break;
}
default:
return OptVariantType(Status::Error("Unknown type"));
return Status::Error("Unknown value type");
}
return v;
}

static VertexIntID getIndexVertexIntID(const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kVertexIndexLen);
auto offset = rawKey.size() - sizeof(VertexIntID);
return *reinterpret_cast<const VertexIntID*>(rawKey.data() + offset);
static VertexIDSlice getIndexVertexID(size_t vIdLen, const folly::StringPiece& rawKey) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so seems we don't need VertexIntID ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If so seems we don't need VertexIntID ?

Yes, we don't need VertexIntID , the vIdLen should be 8 if this is nebula1.0 data.

CHECK_GE(rawKey.size(), kVertexIndexLen + vIdLen);
auto offset = rawKey.size() - vIdLen;
return rawKey.subpiece(offset, vIdLen);
}

static VertexIntID getIndexSrcId(const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen);
auto offset = rawKey.size() -
sizeof(VertexIntID) * 2 - sizeof(EdgeRanking);
return readInt<VertexIntID>(rawKey.data() + offset, sizeof(VertexIntID));
static VertexIDSlice getIndexSrcId(size_t vIdLen, const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen + vIdLen * 2);
auto offset = rawKey.size() - (vIdLen << 1) - sizeof(EdgeRanking);
return rawKey.subpiece(offset, vIdLen);
}

static VertexIntID getIndexDstId(const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen);
auto offset = rawKey.size() - sizeof(VertexIntID);
return readInt<VertexIntID>(rawKey.data() + offset, sizeof(VertexIntID));
static VertexIDSlice getIndexDstId(size_t vIdLen, const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen + vIdLen * 2);
auto offset = rawKey.size() - vIdLen;
return rawKey.subpiece(offset, vIdLen);
}

static EdgeRanking getIndexRank(const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen);
auto offset = rawKey.size() - sizeof(VertexIntID) - sizeof(EdgeRanking);
static EdgeRanking getIndexRank(size_t vIdLen, const folly::StringPiece& rawKey) {
CHECK_GE(rawKey.size(), kEdgeIndexLen + vIdLen * 2);
auto offset = rawKey.size() - vIdLen - sizeof(EdgeRanking);
return readInt<EdgeRanking>(rawKey.data() + offset, sizeof(EdgeRanking));
}

static bool isIndexKey(const folly::StringPiece& key) {
constexpr int32_t len = static_cast<int32_t>(sizeof(NebulaKeyType));
auto type = readInt<int32_t>(key.data(), len) & kTypeMask;
return static_cast<uint32_t>(NebulaKeyType::kIndex) == type;
}

static IndexID getIndexId(const folly::StringPiece& rawKey) {
auto offset = sizeof(PartitionID);
return readInt<IndexID>(rawKey.data() + offset, sizeof(IndexID));
}

/**
* Generate vertex|edge index key for kv store
**/
static void indexRaw(const IndexValues &values, std::string& raw);

static std::string vertexIndexKey(size_t vIdLen, PartitionID partId,
IndexID indexId, VertexID vId,
const IndexValues& values);

static std::string edgeIndexKey(size_t vIdLen, PartitionID partId,
IndexID indexId, VertexID srcId,
EdgeRanking rank, VertexID dstId,
const IndexValues& values);

static std::string indexPrefix(PartitionID partId, IndexID indexId);

private:
IndexKeyUtils() = delete;

Expand Down
13 changes: 0 additions & 13 deletions src/common/NebulaKeyUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

namespace nebula {

using VertexIDSlice = folly::StringPiece;

/**
* VertexKeyUtils:
* type(1) + partId(3) + vertexId(*) + tagId(4) + version(8)
Expand Down Expand Up @@ -202,17 +200,6 @@ class NebulaKeyUtils final {
return rawKey.subpiece(0, rawKey.size() - sizeof(int64_t));
}

static bool isIndexKey(const folly::StringPiece& key) {
constexpr int32_t len = static_cast<int32_t>(sizeof(NebulaKeyType));
auto type = readInt<int32_t>(key.data(), len) & kTypeMask;
return static_cast<uint32_t>(NebulaKeyType::kIndex) == type;
}

static IndexID getIndexId(const folly::StringPiece& rawKey) {
auto offset = sizeof(PartitionID);
return readInt<IndexID>(rawKey.data() + offset, sizeof(IndexID));
}

static void dumpBadKey(const folly::StringPiece& rawKey, size_t expect, size_t vIdLen) {
std::stringstream msg;
msg << "rawKey.size() != expect size"
Expand Down
13 changes: 6 additions & 7 deletions src/common/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ enum class NebulaSystemKeyType : uint32_t {
kSystemPart = 0x00000002,
};

using VertexIDSlice = folly::StringPiece;
using VertexIntID = int64_t;
using IndexID = int32_t;

Expand All @@ -38,7 +39,8 @@ readInt(const char* data, int32_t len) {
static constexpr int32_t kVertexLen = sizeof(PartitionID) + sizeof(TagID) + sizeof(TagVersion);

// size of vertex key except srcId and dstId
static constexpr int32_t kEdgeLen = sizeof(PartitionID) + sizeof(EdgeType) + sizeof(EdgeRanking) + sizeof(EdgeVersion);
static constexpr int32_t kEdgeLen = sizeof(PartitionID) + sizeof(EdgeType) +
sizeof(EdgeRanking) + sizeof(EdgeVersion);

static constexpr int32_t kSystemLen = sizeof(PartitionID) + sizeof(NebulaSystemKeyType);

Expand All @@ -59,13 +61,10 @@ static constexpr uint32_t kEdgeMaskSet = kTagEdgeMask;
// Write Tag by |=
static constexpr uint32_t kTagMaskSet = ~kTagEdgeMask;

static constexpr int32_t kVertexIndexLen = sizeof(PartitionID) + sizeof(IndexID)
+ sizeof(VertexID);
static constexpr int32_t kVertexIndexLen = sizeof(PartitionID) + sizeof(IndexID);

static constexpr int32_t kEdgeIndexLen = sizeof(PartitionID) + sizeof(IndexID)
+ sizeof(VertexID) * 2 + sizeof(EdgeRanking);

static constexpr int32_t kIndexLen = std::min(kVertexIndexLen, kEdgeIndexLen);
static constexpr int32_t kEdgeIndexLen = sizeof(PartitionID) + sizeof(IndexID) +
sizeof(EdgeRanking);

} // namespace nebula
#endif // COMMON_TYPES_H_
Expand Down
27 changes: 22 additions & 5 deletions src/common/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
nebula_add_test(
NAME nebula_key_utils_test
SOURCES NebulaKeyUtilsTest.cpp
OBJECTS $<TARGET_OBJECTS:keyutils_obj>
LIBRARIES gtest
)
NAME
nebula_key_utils_test
SOURCES
NebulaKeyUtilsTest.cpp
OBJECTS
$<TARGET_OBJECTS:keyutils_obj>
LIBRARIES
gtest
)

nebula_add_test(
NAME
index_key_utils_test
SOURCES
IndexKeyUtilsTest.cpp
OBJECTS
$<TARGET_OBJECTS:keyutils_obj>
$<TARGET_OBJECTS:common_base_obj>
$<TARGET_OBJECTS:common_datatypes_obj>
LIBRARIES
gtest
)
Loading