From 5a66a88fc6684df1d5b6b5355e884d4bb86e9406 Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Sat, 18 Sep 2021 17:02:27 +0800 Subject: [PATCH] Geo spatial: Geography --- src/codec/RowReaderV2.cpp | 11 ++ src/codec/RowWriterV2.cpp | 13 ++ src/codec/RowWriterV2.h | 3 + src/common/CMakeLists.txt | 1 + src/common/base/Base.h | 2 +- src/common/conf/Configuration.h | 25 +-- src/common/datatypes/CMakeLists.txt | 1 + src/common/datatypes/CommonCpp2Ops.h | 2 + src/common/datatypes/Geography.cpp | 76 +++++++++ src/common/datatypes/Geography.h | 90 +++++++++++ src/common/datatypes/GeographyOps-inl.h | 132 ++++++++++++++++ src/common/datatypes/Value.cpp | 114 ++++++++++++++ src/common/datatypes/Value.h | 18 +++ src/common/datatypes/ValueOps-inl.h | 47 ++++++ src/common/datatypes/test/ValueTest.cpp | 1 + src/common/function/CMakeLists.txt | 4 + src/common/function/FunctionManager.cpp | 155 +++++++++++++++++++ src/common/geo/CMakeLists.txt | 11 ++ src/common/geo/GeoIndex.cpp | 163 ++++++++++++++++++++ src/common/geo/GeoIndex.h | 97 ++++++++++++ src/common/geo/GeoParser.cpp | 5 + src/common/geo/GeoParser.h | 5 + src/common/geo/function/Covers.cpp | 74 +++++++++ src/common/geo/function/Covers.h | 20 +++ src/common/geo/function/DWithin.cpp | 154 ++++++++++++++++++ src/common/geo/function/DWithin.h | 32 ++++ src/common/geo/function/Distance.cpp | 115 ++++++++++++++ src/common/geo/function/Distance.h | 22 +++ src/common/geo/function/Intersects.cpp | 74 +++++++++ src/common/geo/function/Intersects.h | 18 +++ src/common/geo/wkb/WKBReader.h | 7 + src/common/geo/wkb/WKBWriter.h | 5 + src/common/geo/wkt/WKTReader.h | 5 + src/common/geo/wkt/WKTWriter.h | 5 + src/common/meta/NebulaSchemaProvider.cpp | 2 + src/common/thread/GenericWorker.h | 2 +- src/common/time/TimezoneInfo.h | 6 +- src/common/utils/IndexKeyUtils.cpp | 3 +- src/common/utils/IndexKeyUtils.h | 36 +++++ src/common/utils/test/CMakeLists.txt | 3 + src/daemons/CMakeLists.txt | 1 + src/graph/optimizer/OptimizerUtils.cpp | 5 + src/graph/service/Authenticator.h | 2 +- src/graph/service/GraphService.h | 4 +- src/graph/util/SchemaUtil.cpp | 2 + src/graph/validator/Validator.h | 2 +- src/interface/common.thrift | 7 +- src/interface/meta.thrift | 14 ++ src/kvstore/test/CMakeLists.txt | 3 +- src/meta/CMakeLists.txt | 1 + src/parser/parser.yy | 30 +++- src/parser/scanner.lex | 4 + src/storage/index/LookupBaseProcessor-inl.h | 5 +- src/storage/test/CMakeLists.txt | 1 + src/tools/db-dump/CMakeLists.txt | 1 + src/tools/db-upgrade/CMakeLists.txt | 1 + src/tools/meta-dump/CMakeLists.txt | 1 + src/tools/simple-kv-verify/CMakeLists.txt | 1 + src/tools/storage-perf/CMakeLists.txt | 1 + src/webservice/WebService.h | 4 +- 60 files changed, 1621 insertions(+), 28 deletions(-) create mode 100644 src/common/datatypes/Geography.cpp create mode 100644 src/common/datatypes/Geography.h create mode 100644 src/common/datatypes/GeographyOps-inl.h create mode 100644 src/common/geo/CMakeLists.txt create mode 100644 src/common/geo/GeoIndex.cpp create mode 100644 src/common/geo/GeoIndex.h create mode 100644 src/common/geo/GeoParser.cpp create mode 100644 src/common/geo/GeoParser.h create mode 100644 src/common/geo/function/Covers.cpp create mode 100644 src/common/geo/function/Covers.h create mode 100644 src/common/geo/function/DWithin.cpp create mode 100644 src/common/geo/function/DWithin.h create mode 100644 src/common/geo/function/Distance.cpp create mode 100644 src/common/geo/function/Distance.h create mode 100644 src/common/geo/function/Intersects.cpp create mode 100644 src/common/geo/function/Intersects.h create mode 100644 src/common/geo/wkb/WKBReader.h create mode 100644 src/common/geo/wkb/WKBWriter.h create mode 100644 src/common/geo/wkt/WKTReader.h create mode 100644 src/common/geo/wkt/WKTWriter.h diff --git a/src/codec/RowReaderV2.cpp b/src/codec/RowReaderV2.cpp index ad7c6d2866a..caff6a89838 100644 --- a/src/codec/RowReaderV2.cpp +++ b/src/codec/RowReaderV2.cpp @@ -175,6 +175,17 @@ Value RowReaderV2::getValueByIndex(const int64_t index) const noexcept { dt.microsec = microsec; return dt; } + case meta::cpp2::PropertyType::GEOGRAPHY: { + int32_t strOffset; + int32_t strLen; + memcpy(reinterpret_cast(&strOffset), &data_[offset], sizeof(int32_t)); + memcpy(reinterpret_cast(&strLen), &data_[offset + sizeof(int32_t)], sizeof(int32_t)); + if (static_cast(strOffset) == data_.size() && strLen == 0) { + return Geography(); + } + CHECK_LT(strOffset, data_.size()); + return Geography(std::string(&data_[strOffset], strLen)); + } case meta::cpp2::PropertyType::UNKNOWN: break; } diff --git a/src/codec/RowWriterV2.cpp b/src/codec/RowWriterV2.cpp index 9b3723284fe..cb181f08f31 100644 --- a/src/codec/RowWriterV2.cpp +++ b/src/codec/RowWriterV2.cpp @@ -126,6 +126,9 @@ RowWriterV2::RowWriterV2(RowReader& reader) : RowWriterV2(reader.getSchema()) { case Value::Type::DATETIME: set(i, v.moveDateTime()); break; + case Value::Type::GEOGRAPHY: + set(i, v.moveGeography()); + break; default: LOG(FATAL) << "Invalid data: " << v << ", type: " << v.typeName(); } @@ -203,6 +206,8 @@ WriteResult RowWriterV2::setValue(ssize_t index, const Value& val) noexcept { return write(index, val.getTime()); case Value::Type::DATETIME: return write(index, val.getDateTime()); + case Value::Type::GEOGRAPHY: + return write(index, val.getGeography()); default: return WriteResult::TYPE_MISMATCH; } @@ -637,6 +642,7 @@ WriteResult RowWriterV2::write(ssize_t index, folly::StringPiece v) noexcept { auto field = schema_->field(index); auto offset = headerLen_ + numNullBytes_ + field->offset(); switch (field->type()) { + case meta::cpp2::PropertyType::GEOGRAPHY: // write wkb case meta::cpp2::PropertyType::STRING: { if (isSet_[index]) { // The string value has already been set, we need to turn it @@ -755,6 +761,10 @@ WriteResult RowWriterV2::write(ssize_t index, const DateTime& v) noexcept { return WriteResult::SUCCEEDED; } +WriteResult RowWriterV2::write(ssize_t index, const Geography& v) noexcept { + return write(index, folly::StringPiece(v.wkb)); +} + WriteResult RowWriterV2::checkUnsetFields() noexcept { DefaultValueContext expCtx; for (size_t i = 0; i < schema_->getNumFields(); i++) { @@ -794,6 +804,9 @@ WriteResult RowWriterV2::checkUnsetFields() noexcept { case Value::Type::DATETIME: r = write(i, defVal.getDateTime()); break; + case Value::Type::GEOGRAPHY: + r = write(i, defVal.getGeography()); + break; default: LOG(FATAL) << "Unsupported default value type: " << defVal.typeName() << ", default value: " << defVal diff --git a/src/codec/RowWriterV2.h b/src/codec/RowWriterV2.h index d3bec72064f..16030a00afd 100644 --- a/src/codec/RowWriterV2.h +++ b/src/codec/RowWriterV2.h @@ -63,6 +63,7 @@ enum class WriteResult { TIMESTAMP (8 bytes) DATE (4 bytes) DATETIME (15 bytes) + GEOGRAPHY (8 bytes) * All except STRING typed properties are stored in-place. The STRING property stored the offset of the string content in the first 4 bytes and the length @@ -188,6 +189,8 @@ class RowWriterV2 { WriteResult write(ssize_t index, const Date& v) noexcept; WriteResult write(ssize_t index, const Time& v) noexcept; WriteResult write(ssize_t index, const DateTime& v) noexcept; + + WriteResult write(ssize_t index, const Geography& v) noexcept; }; } // namespace nebula diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 9122bd1f76f..e9ccafe75fe 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -25,3 +25,4 @@ nebula_add_subdirectory(function) nebula_add_subdirectory(graph) nebula_add_subdirectory(plugin) nebula_add_subdirectory(utils) +nebula_add_subdirectory(geo) diff --git a/src/common/base/Base.h b/src/common/base/Base.h index bae877f2883..aabff0e7b09 100644 --- a/src/common/base/Base.h +++ b/src/common/base/Base.h @@ -67,7 +67,7 @@ #include "common/base/Logging.h" -#define MUST_USE_RESULT __attribute__((warn_unused_result)) +#define NG_MUST_USE_RESULT __attribute__((warn_unused_result)) #define DONT_OPTIMIZE __attribute__((optimize("O0"))) #define ALWAYS_INLINE __attribute__((always_inline)) diff --git a/src/common/conf/Configuration.h b/src/common/conf/Configuration.h index 4f2a5c6e6f7..18aa9de8f0d 100644 --- a/src/common/conf/Configuration.h +++ b/src/common/conf/Configuration.h @@ -27,11 +27,11 @@ class Configuration final { /** * Parse from a file */ - Status MUST_USE_RESULT parseFromFile(const std::string &filename); + Status NG_MUST_USE_RESULT parseFromFile(const std::string &filename); /** * Parse from a string buffer */ - Status MUST_USE_RESULT parseFromString(const std::string &content); + Status NG_MUST_USE_RESULT parseFromString(const std::string &content); std::string dumpToString() const; @@ -42,19 +42,20 @@ class Configuration final { * @key item key * @val to hold the item value. */ - Status MUST_USE_RESULT fetchAsInt(const char *key, int64_t &val) const; - Status MUST_USE_RESULT fetchAsDouble(const char *key, double &val) const; - Status MUST_USE_RESULT fetchAsBool(const char *key, bool &val) const; - Status MUST_USE_RESULT fetchAsString(const char *key, std::string &val) const; + Status NG_MUST_USE_RESULT fetchAsInt(const char *key, int64_t &val) const; + Status NG_MUST_USE_RESULT fetchAsDouble(const char *key, double &val) const; + Status NG_MUST_USE_RESULT fetchAsBool(const char *key, bool &val) const; + Status NG_MUST_USE_RESULT fetchAsString(const char *key, std::string &val) const; - Status MUST_USE_RESULT fetchAsIntArray(const char *key, std::vector &val) const; - Status MUST_USE_RESULT fetchAsDoubleArray(const char *key, std::vector &val) const; - Status MUST_USE_RESULT fetchAsBoolArray(const char *key, std::vector &val) const; - Status MUST_USE_RESULT fetchAsStringArray(const char *key, std::vector &val) const; + Status NG_MUST_USE_RESULT fetchAsIntArray(const char *key, std::vector &val) const; + Status NG_MUST_USE_RESULT fetchAsDoubleArray(const char *key, std::vector &val) const; + Status NG_MUST_USE_RESULT fetchAsBoolArray(const char *key, std::vector &val) const; + Status NG_MUST_USE_RESULT fetchAsStringArray(const char *key, + std::vector &val) const; - Status MUST_USE_RESULT fetchAsSubConf(const char *key, Configuration &val) const; + Status NG_MUST_USE_RESULT fetchAsSubConf(const char *key, Configuration &val) const; - Status MUST_USE_RESULT upsertStringField(const char *key, const std::string &val); + Status NG_MUST_USE_RESULT upsertStringField(const char *key, const std::string &val); // Iterate through every key in the configuration Status forEachKey(std::function processor) const; diff --git a/src/common/datatypes/CMakeLists.txt b/src/common/datatypes/CMakeLists.txt index 53d0cae9a41..a6cbaf9fad5 100644 --- a/src/common/datatypes/CMakeLists.txt +++ b/src/common/datatypes/CMakeLists.txt @@ -14,6 +14,7 @@ nebula_add_library( Map.cpp List.cpp Set.cpp + Geography.cpp ) nebula_add_subdirectory(test) diff --git a/src/common/datatypes/CommonCpp2Ops.h b/src/common/datatypes/CommonCpp2Ops.h index 5d969c94d2d..8be8fdf89b4 100644 --- a/src/common/datatypes/CommonCpp2Ops.h +++ b/src/common/datatypes/CommonCpp2Ops.h @@ -24,6 +24,7 @@ struct Map; struct Set; struct List; struct DataSet; +struct Geography; } // namespace nebula namespace apache::thrift { @@ -43,6 +44,7 @@ SPECIALIZE_CPP2OPS(nebula::Map); SPECIALIZE_CPP2OPS(nebula::Set); SPECIALIZE_CPP2OPS(nebula::List); SPECIALIZE_CPP2OPS(nebula::DataSet); +SPECIALIZE_CPP2OPS(nebula::Geography); } // namespace apache::thrift diff --git a/src/common/datatypes/Geography.cpp b/src/common/datatypes/Geography.cpp new file mode 100644 index 00000000000..53c0c6a3a7c --- /dev/null +++ b/src/common/datatypes/Geography.cpp @@ -0,0 +1,76 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/datatypes/Geography.h" + +#include +#include + +#include + +namespace nebula { + +S2Region* Geography::asS2() const { + // auto geom = WKBReader().read(wkb); + // return s2RegionFromGeom(&geom); + return nullptr; +} + +// S2Region* Geography::s2RegionFromGeom(const geos::geom::Geometry* geom) { +// return new S2Region; +// switch (geom->getGeometryTypeId()) { +// case geos::geom::GEOS_POINT: { +// auto *point = static_cast(geom); +// auto latlng = S2LatLng::FromDegrees(point->getX(), point->getY()); +// return new S2PointRegion(latlng.toPoint()); +// } +// case geos::geom::GEOS_LINESTRING: { +// auot *lineString = static_cast(geom); +// std::vector s2Points; +// latlngs.reserve(lineString->numPoints()); +// for (size_t i = 0; i < lineString->numPoints(); ++i) { +// auto latlng = lineString->getCoordinateN(i); +// s2Points.emplace_back(S2LatLng::FromDegrees(latlng.x, latlng.y).ToPoint()); +// } +// return new S2Polyline(s2Points); +// } +// case geos::geom::GEOS_POLYGON: { +// auto *polygon = static_cast(geom); +// size_t ringNum = 1 + polygon->getNumInteriorRing(); +// std::vector> s2Loops; +// s2Loops.reserve(ringNum); + +// std::vector rings; +// rings.reserve(ringNum); + +// std::vector s2Points; +// for (size_t i = 0; i < rings.size(); ++i) { +// const auto *ring = rings[i]; +// s2Points.clear(); +// s2Points.reserve(ring->numPoints()); +// for (size_t j = 0; j < ring->numPoints(); ++j) { +// auto latlng = ring->getCoordinateN(i); +// s2Points.empalce_back(S2LatLng::FromDegrees(latlng.x, latlng.y).ToPoint()); +// } +// auto *s2Loop = new S2Loop(s2Points); +// s2Loop->Normalize(); +// s2Loops.emplace_back(s2Loop); // make loop be CCW +// return new S2Polygon(s2Loops); +// } +// } +// } +// } + +} // namespace nebula + +namespace std { + +// Inject a customized hash function +std::size_t hash::operator()(const nebula::Geography& h) const noexcept { + return hash{}(h.wkb); +} + +} // namespace std diff --git a/src/common/datatypes/Geography.h b/src/common/datatypes/Geography.h new file mode 100644 index 00000000000..139114c6e17 --- /dev/null +++ b/src/common/datatypes/Geography.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include "common/datatypes/Value.h" + +class S2Polygon; + +namespace nebula { + +// clang-format off +/* +static const std::unordered_map kShapeTypeToS2Region = { + {ShapeType::Point, S2PointRegion}, // S2PointRegion is a wrapper of S2Point, and it inherits from the S2Region class + {ShapeType::LineString, S2Polyline}, + {ShapeType::Polygon, S2Polygon}, +}; +*/ +// clang-format on + +enum class ShapeType : uint8_t { + Point = 1, + LineString = 2, + Polygon = 3, +}; + +// Do not construct a S2 data when constructing Geography. It's expensive. +// We just construct S2 when doing computation. +struct Geography { + std::string wkb; + + Geography() = default; + explicit Geography(const std::string& validWKB) { + // DCHECK(WKB::isValid(wkb)); + wkb = validWKB; + } + + S2Region* asS2() const; + + ShapeType shape() const { + // auto type = WKBReader.readUint32(wkb.substr(1)); + // DCHECK(type >= 1 && type <= 3); + // return static_cast(type); + return static_cast(1); + } + + void clear() { wkb.clear(); } + + void __clear() { clear(); } + + std::string toString() const { return wkb; } + + folly::dynamic toJson() const { return toString(); } + + bool operator==(const Geography& rhs) const { return wkb == rhs.wkb; } + + bool operator!=(const Geography& rhs) const { return !(wkb == rhs.wkb); } + + bool operator<(const Geography& rhs) const { return wkb < rhs.wkb; } + + // private: + // S2Region* s2RegionFromGeom(const geos::geom::Geometry* geom); +}; + +inline std::ostream& operator<<(std::ostream& os, const Geography& g) { return os << g.wkb; } + +} // namespace nebula + +namespace std { + +// Inject a customized hash function +template <> +struct hash { + std::size_t operator()(const nebula::Geography& h) const noexcept; +}; + +} // namespace std diff --git a/src/common/datatypes/GeographyOps-inl.h b/src/common/datatypes/GeographyOps-inl.h new file mode 100644 index 00000000000..8e5804ae593 --- /dev/null +++ b/src/common/datatypes/GeographyOps-inl.h @@ -0,0 +1,132 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#ifndef COMMON_DATATYPES_GEOGRAPHYOPS_H_ +#define COMMON_DATATYPES_GEOGRAPHYOPS_H_ + +#include +#include +#include + +#include "common/base/Base.h" +#include "common/datatypes/CommonCpp2Ops.h" +#include "common/datatypes/Geography.h" + +namespace apache { +namespace thrift { + +/************************************** + * + * Ops for class Geography + * + *************************************/ +namespace detail { + +template <> +struct TccStructTraits { + static void translateFieldName(MAYBE_UNUSED folly::StringPiece _fname, + MAYBE_UNUSED int16_t& fid, + MAYBE_UNUSED apache::thrift::protocol::TType& _ftype) { + if (_fname == "wkb") { + fid = 1; + _ftype = apache::thrift::protocol::T_STRING; + } + } +}; + +} // namespace detail + +inline constexpr protocol::TType Cpp2Ops::thriftType() { + return apache::thrift::protocol::T_STRUCT; +} + +template +uint32_t Cpp2Ops::write(Protocol* proto, nebula::Geography const* obj) { + uint32_t xfer = 0; + xfer += proto->writeStructBegin("Geography"); + xfer += proto->writeFieldBegin("wkb", apache::thrift::protocol::T_STRING, 1); + xfer += proto->writeString(obj->wkb); + xfer += proto->writeFieldEnd(); + xfer += proto->writeFieldStop(); + xfer += proto->writeStructEnd(); + return xfer; +} + +template +void Cpp2Ops::read(Protocol* proto, nebula::Geography* obj) { + apache::thrift::detail::ProtocolReaderStructReadState readState; + + readState.readStructBegin(proto); + + using apache::thrift::TProtocolException; + + if (UNLIKELY(!readState.advanceToNextField(proto, 0, 1, apache::thrift::protocol::T_STRING))) { + goto _loop; + } +_readField_wkb : { proto->readString(obj->wkb); } + + if (UNLIKELY(!readState.advanceToNextField(proto, 1, 0, apache::thrift::protocol::T_STOP))) { + goto _loop; + } + +_end: + readState.readStructEnd(proto); + + return; + +_loop: + if (readState.fieldType == apache::thrift::protocol::T_STOP) { + goto _end; + } + + if (proto->kUsesFieldNames()) { + detail::TccStructTraits::translateFieldName( + readState.fieldName(), readState.fieldId, readState.fieldType); + } + + switch (readState.fieldId) { + case 1: { + if (LIKELY(readState.fieldType == apache::thrift::protocol::T_STRING)) { + goto _readField_wkb; + } else { + goto _skip; + } + } + default: { +_skip: + proto->skip(readState.fieldType); + readState.readFieldEnd(proto); + readState.readFieldBeginNoInline(proto); + goto _loop; + } + } +} + +template +uint32_t Cpp2Ops::serializedSize(Protocol const* proto, + nebula::Geography const* obj) { + uint32_t xfer = 0; + xfer += proto->serializedStructSize("Geography"); + xfer += proto->serializedFieldSize("wkb", apache::thrift::protocol::T_STRING, 1); + xfer += proto->serializedSizeString(obj->wkb); + xfer += proto->serializedSizeStop(); + return xfer; +} + +template +uint32_t Cpp2Ops::serializedSizeZC(Protocol const* proto, + nebula::Geography const* obj) { + uint32_t xfer = 0; + xfer += proto->serializedStructSize("Geography"); + xfer += proto->serializedFieldSize("wkb", apache::thrift::protocol::T_STRING, 1); + xfer += proto->serializedSizeString(obj->wkb); + xfer += proto->serializedSizeStop(); + return xfer; +} + +} // namespace thrift +} // namespace apache +#endif // COMMON_DATATYPES_GEOGRAPHYOPS_H_ diff --git a/src/common/datatypes/Value.cpp b/src/common/datatypes/Value.cpp index 19545de8135..3704ae4ff09 100644 --- a/src/common/datatypes/Value.cpp +++ b/src/common/datatypes/Value.cpp @@ -15,6 +15,7 @@ #include "common/datatypes/DataSet.h" #include "common/datatypes/Edge.h" +#include "common/datatypes/Geography.h" #include "common/datatypes/List.h" #include "common/datatypes/Map.h" #include "common/datatypes/Path.h" @@ -64,6 +65,9 @@ std::size_t hash::operator()(const nebula::Value& v) const noexce case nebula::Value::Type::LIST: { return hash()(v.getList()); } + case nebula::Value::Type::GEOGRAPHY: { + return hash()(v.getGeography()); + } case nebula::Value::Type::MAP: { LOG(FATAL) << "Hash for MAP has not been implemented"; } @@ -164,6 +168,10 @@ Value::Value(Value&& rhs) noexcept : type_(Value::Type::__EMPTY__) { setG(std::move(rhs.value_.gVal)); break; } + case Type::GEOGRAPHY: { + setGG(std::move(rhs.value_.ggVal)); + break; + } default: { assert(false); break; @@ -240,6 +248,10 @@ Value::Value(const Value& rhs) : type_(Value::Type::__EMPTY__) { setG(rhs.value_.gVal); break; } + case Type::GEOGRAPHY: { + setGG(rhs.value_.ggVal); + break; + } default: { assert(false); break; @@ -333,6 +345,13 @@ Value::Value(const DataSet& v) { Value::Value(DataSet&& v) { setG(std::make_unique(std::move(v))); } +Value::Value(const Geography& v) { + auto c = std::make_unique(v); + setGG(std::move(c)); +} + +Value::Value(Geography&& v) { setGG(std::make_unique(std::move(v))); } + const std::string& Value::typeName() const { static const std::unordered_map typeNames = { {Type::__EMPTY__, "__EMPTY__"}, @@ -351,6 +370,7 @@ const std::string& Value::typeName() const { {Type::MAP, "map"}, {Type::SET, "set"}, {Type::DATASET, "dataset"}, + {Type::GEOGRAPHY, "geography"}, }; static const std::unordered_map nullTypes = { @@ -599,6 +619,21 @@ void Value::setDataSet(std::unique_ptr&& v) { setG(std::move(v)); } +void Value::setGeography(const Geography& v) { + clear(); + setGG(v); +} + +void Value::setGeography(Geography&& v) { + clear(); + setGG(std::move(v)); +} + +void Value::setGeography(std::unique_ptr&& v) { + clear(); + setGG(std::move(v)); +} + const NullType& Value::getNull() const { CHECK_EQ(type_, Type::NULLVALUE); return value_.nVal; @@ -709,6 +744,16 @@ const DataSet* Value::getDataSetPtr() const { return value_.gVal.get(); } +const Geography& Value::getGeography() const { + CHECK_EQ(type_, Type::GEOGRAPHY); + return *(value_.ggVal); +} + +const Geography* Value::getGeographyPtr() const { + CHECK_EQ(type_, Type::GEOGRAPHY); + return value_.ggVal.get(); +} + NullType& Value::mutableNull() { CHECK_EQ(type_, Type::NULLVALUE); return value_.nVal; @@ -784,6 +829,11 @@ DataSet& Value::mutableDataSet() { return *(value_.gVal); } +Geography& Value::mutableGeography() { + CHECK_EQ(type_, Type::GEOGRAPHY); + return *(value_.ggVal); +} + NullType Value::moveNull() { CHECK_EQ(type_, Type::NULLVALUE); NullType v = std::move(value_.nVal); @@ -889,6 +939,13 @@ DataSet Value::moveDataSet() { return ds; } +Geography Value::moveGeography() { + CHECK_EQ(type_, Type::GEOGRAPHY); + Geography v = std::move(*(value_.ggVal)); + clear(); + return v; +} + void Value::clear() { switch (type_) { case Type::__EMPTY__: { @@ -954,6 +1011,10 @@ void Value::clear() { destruct(value_.gVal); break; } + case Type::GEOGRAPHY: { + destruct(value_.ggVal); + break; + } } type_ = Type::__EMPTY__; } @@ -1027,6 +1088,10 @@ Value& Value::operator=(Value&& rhs) noexcept { setG(std::move(rhs.value_.gVal)); break; } + case Type::GEOGRAPHY: { + setGG(std::move(rhs.value_.ggVal)); + break; + } default: { assert(false); break; @@ -1105,6 +1170,10 @@ Value& Value::operator=(const Value& rhs) { setG(rhs.value_.gVal); break; } + case Type::GEOGRAPHY: { + setGG(rhs.value_.ggVal); + break; + } default: { assert(false); break; @@ -1344,6 +1413,26 @@ void Value::setG(DataSet&& v) { new (std::addressof(value_.gVal)) std::unique_ptr(new DataSet(std::move(v))); } +void Value::setGG(const std::unique_ptr& v) { + type_ = Type::GEOGRAPHY; + new (std::addressof(value_.ggVal)) std::unique_ptr(new Geography(*v)); +} + +void Value::setGG(std::unique_ptr&& v) { + type_ = Type::GEOGRAPHY; + new (std::addressof(value_.ggVal)) std::unique_ptr(std::move(v)); +} + +void Value::setGG(const Geography& v) { + type_ = Type::GEOGRAPHY; + new (std::addressof(value_.ggVal)) std::unique_ptr(new Geography(v)); +} + +void Value::setGG(Geography&& v) { + type_ = Type::GEOGRAPHY; + new (std::addressof(value_.ggVal)) std::unique_ptr(new Geography(std::move(v))); +} + // Convert Nebula::Value to a value compatible with Json standard // DATE, TIME, DATETIME will be converted to strings in UTC // VERTEX, EDGES, PATH will be converted to objects @@ -1403,6 +1492,9 @@ folly::dynamic Value::toJson() const { } case Value::Type::DATASET: { return getDataSet().toJson(); + } + case Value::Type::GEOGRAPHY: { + return getGeography().toJson(); } // no default so the compiler will warning when lack } @@ -1521,6 +1613,9 @@ std::string Value::toString() const { } case Value::Type::DATASET: { return getDataSet().toString(); + } + case Value::Type::GEOGRAPHY: { + return getGeography().toString(); } // no default so the compiler will warning when lack } @@ -1713,6 +1808,9 @@ Value Value::lessThan(const Value& v) const { case Value::Type::DATASET: { return getDataSet() < v.getDataSet(); } + case Value::Type::GEOGRAPHY: { + return getGeography() < v.getGeography(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return kNullBadType; @@ -1800,6 +1898,9 @@ Value Value::equal(const Value& v) const { case Value::Type::DATASET: { return getDataSet() == v.getDataSet(); } + case Value::Type::GEOGRAPHY: { + return getGeography() == v.getGeography(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; @@ -1881,6 +1982,10 @@ std::ostream& operator<<(std::ostream& os, const Value::Type& type) { os << "DATASET"; break; } + case Value::Type::GEOGRAPHY: { + os << "GEOGRAPHY"; + break; + } default: { os << "__UNKNOWN__"; break; @@ -2075,6 +2180,9 @@ Value operator+(const Value& lhs, const Value& rhs) { case Value::Type::DATASET: { return Value::kNullBadType; } + case Value::Type::GEOGRAPHY: { + return Value::kNullBadType; + } case Value::Type::__EMPTY__: { return Value::kEmpty; } @@ -2494,6 +2602,9 @@ bool operator<(const Value& lhs, const Value& rhs) { // TODO: return false; } + case Value::Type::GEOGRAPHY: { + return lhs.getGeography() < rhs.getGeography(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; @@ -2578,6 +2689,9 @@ bool operator==(const Value& lhs, const Value& rhs) { case Value::Type::DATASET: { return lhs.getDataSet() == rhs.getDataSet(); } + case Value::Type::GEOGRAPHY: { + return lhs.getGeography() == rhs.getGeography(); + } case Value::Type::NULLVALUE: case Value::Type::__EMPTY__: { return false; diff --git a/src/common/datatypes/Value.h b/src/common/datatypes/Value.h index 57dc60df4e4..abd2f21173f 100644 --- a/src/common/datatypes/Value.h +++ b/src/common/datatypes/Value.h @@ -32,6 +32,7 @@ struct Map; struct List; struct Set; struct DataSet; +struct Geography; enum class NullType { __NULL__ = 0, @@ -76,6 +77,7 @@ struct Value { MAP = 1UL << 12, SET = 1UL << 13, DATASET = 1UL << 14, + GEOGRAPHY = 1UL << 15, NULLVALUE = 1UL << 63, }; @@ -127,6 +129,8 @@ struct Value { Value(Set&& v); // NOLINT Value(const DataSet& v); // NOLINT Value(DataSet&& v); // NOLINT + Value(const Geography& v); // NOLINT + Value(Geography&& v); // NOLINT ~Value() { clear(); } Type type() const noexcept { return type_; } @@ -159,6 +163,7 @@ struct Value { bool isMap() const { return type_ == Type::MAP; } bool isSet() const { return type_ == Type::SET; } bool isDataSet() const { return type_ == Type::DATASET; } + bool isGeography() const { return type_ == Type::GEOGRAPHY; } void clear(); @@ -211,6 +216,9 @@ struct Value { void setDataSet(const DataSet& v); void setDataSet(DataSet&& v); void setDataSet(std::unique_ptr&& v); + void setGeography(const Geography& v); + void setGeography(Geography&& v); + void setGeography(std::unique_ptr&& v); const NullType& getNull() const; const bool& getBool() const; @@ -234,6 +242,8 @@ struct Value { const Set* getSetPtr() const; const DataSet& getDataSet() const; const DataSet* getDataSetPtr() const; + const Geography& getGeography() const; + const Geography* getGeographyPtr() const; NullType moveNull(); bool moveBool(); @@ -250,6 +260,7 @@ struct Value { Map moveMap(); Set moveSet(); DataSet moveDataSet(); + Geography moveGeography(); NullType& mutableNull(); bool& mutableBool(); @@ -266,6 +277,7 @@ struct Value { Map& mutableMap(); Set& mutableSet(); DataSet& mutableDataSet(); + Geography& mutableGeography(); static const Value& null() noexcept { return kNullValue; } @@ -301,6 +313,7 @@ struct Value { std::unique_ptr mVal; std::unique_ptr uVal; std::unique_ptr gVal; + std::unique_ptr ggVal; Storage() {} ~Storage() {} @@ -372,6 +385,11 @@ struct Value { void setG(std::unique_ptr&& v); void setG(const DataSet& v); void setG(DataSet&& v); + // Geography value + void setGG(const std::unique_ptr& v); + void setGG(std::unique_ptr&& v); + void setGG(const Geography& v); + void setGG(Geography&& v); }; static_assert(sizeof(Value) == 16UL, "The size of Value should be 16UL"); diff --git a/src/common/datatypes/ValueOps-inl.h b/src/common/datatypes/ValueOps-inl.h index 124151ee8b4..32982081d2d 100644 --- a/src/common/datatypes/ValueOps-inl.h +++ b/src/common/datatypes/ValueOps-inl.h @@ -17,6 +17,7 @@ #include "common/datatypes/DataSetOps-inl.h" #include "common/datatypes/DateOps-inl.h" #include "common/datatypes/EdgeOps-inl.h" +#include "common/datatypes/GeographyOps-inl.h" #include "common/datatypes/ListOps-inl.h" #include "common/datatypes/MapOps-inl.h" #include "common/datatypes/PathOps-inl.h" @@ -79,6 +80,9 @@ struct TccStructTraits { } else if (_fname == "gVal") { fid = 15; _ftype = apache::thrift::protocol::T_STRUCT; + } else if (_fname == "ggVal") { + fid = 16; + _ftype = apache::thrift::protocol::T_STRUCT; } } }; @@ -229,6 +233,18 @@ uint32_t Cpp2Ops::write(Protocol* proto, nebula::Value const* obj xfer += proto->writeFieldEnd(); break; } + case nebula::Value::Type::GEOGRAPHY: { + xfer += proto->writeFieldBegin("ggVal", protocol::T_STRUCT, 16); + if (obj->getGeographyPtr()) { + xfer += Cpp2Ops::write(proto, obj->getGeographyPtr()); + } else { + xfer += proto->writeStructBegin("Geography"); + xfer += proto->writeStructEnd(); + xfer += proto->writeFieldStop(); + } + xfer += proto->writeFieldEnd(); + break; + } case nebula::Value::Type::__EMPTY__: { break; } @@ -409,6 +425,17 @@ void Cpp2Ops::read(Protocol* proto, nebula::Value* obj) { } break; } + case 16: { + if (readState.fieldType == apache::thrift::protocol::T_STRUCT) { + obj->setGeography(nebula::Geography()); + auto ptr = std::make_unique(); + Cpp2Ops::read(proto, ptr.get()); + obj->setGeography(std::move(ptr)); + } else { + proto->skip(readState.fieldType); + } + break; + } default: { proto->skip(readState.fieldType); break; @@ -543,6 +570,16 @@ uint32_t Cpp2Ops::serializedSize(Protocol const* proto, nebula::V } break; } + case nebula::Value::Type::GEOGRAPHY: { + xfer += proto->serializedFieldSize("ggVal", protocol::T_STRUCT, 16); + if (obj->getGeographyPtr()) { + xfer += Cpp2Ops::serializedSize(proto, obj->getGeographyPtr()); + } else { + xfer += proto->serializedStructSize("Geography"); + xfer += proto->serializedSizeStop(); + } + break; + } case nebula::Value::Type::__EMPTY__: { break; } @@ -671,6 +708,16 @@ uint32_t Cpp2Ops::serializedSizeZC(Protocol const* proto, nebula: } break; } + case nebula::Value::Type::GEOGRAPHY: { + xfer += proto->serializedFieldSize("ggVal", protocol::T_STRUCT, 16); + if (obj->getGeographyPtr()) { + xfer += Cpp2Ops::serializedSizeZC(proto, obj->getGeographyPtr()); + } else { + xfer += proto->serializedStructSize("Geography"); + xfer += proto->serializedSizeStop(); + } + break; + } case nebula::Value::Type::__EMPTY__: { break; } diff --git a/src/common/datatypes/test/ValueTest.cpp b/src/common/datatypes/test/ValueTest.cpp index e29d3430c4e..67175a22bfc 100644 --- a/src/common/datatypes/test/ValueTest.cpp +++ b/src/common/datatypes/test/ValueTest.cpp @@ -11,6 +11,7 @@ #include "common/datatypes/DataSet.h" #include "common/datatypes/Date.h" #include "common/datatypes/Edge.h" +#include "common/datatypes/Geography.h" #include "common/datatypes/List.h" #include "common/datatypes/Map.h" #include "common/datatypes/Path.h" diff --git a/src/common/function/CMakeLists.txt b/src/common/function/CMakeLists.txt index f7c01e8ebb3..e32d8cabed4 100644 --- a/src/common/function/CMakeLists.txt +++ b/src/common/function/CMakeLists.txt @@ -6,6 +6,10 @@ nebula_add_library( function_manager_obj OBJECT FunctionManager.cpp + ../geo/function/Covers.cpp + ../geo/function/Distance.cpp + ../geo/function/DWithin.cpp + ../geo/function/Intersects.cpp ) nebula_add_library( diff --git a/src/common/function/FunctionManager.cpp b/src/common/function/FunctionManager.cpp index cd068125e6c..8d6a194228c 100644 --- a/src/common/function/FunctionManager.cpp +++ b/src/common/function/FunctionManager.cpp @@ -17,6 +17,10 @@ #include "common/datatypes/Set.h" #include "common/datatypes/Vertex.h" #include "common/expression/Expression.h" +#include "common/geo/function/Covers.h" +#include "common/geo/function/DWithin.h" +#include "common/geo/function/Distance.h" +#include "common/geo/function/Intersects.h" #include "common/thrift/ThriftTypes.h" #include "common/time/TimeUtils.h" #include "common/time/WallClock.h" @@ -302,6 +306,46 @@ std::unordered_map> FunctionManager::typ TypeSignature({Value::Type::DATASET, Value::Type::INT, Value::Type::STRING}, Value::Type::__EMPTY__), }}, + // geo builder + {"st_geogfromtext", + { + TypeSignature({Value::Type::STRING}, Value::Type::GEOGRAPHY), + }}, + {"st_geogfromwkb", + { + TypeSignature({Value::Type::STRING}, Value::Type::GEOGRAPHY), + }}, + // geo predicate + {"st_intersects", + { + TypeSignature({Value::Type::GEOGRAPHY, Value::Type::GEOGRAPHY}, Value::Type::BOOL), + }}, + {"st_covers", + { + TypeSignature({Value::Type::GEOGRAPHY, Value::Type::GEOGRAPHY}, Value::Type::BOOL), + }}, + {"st_coveredby", + { + TypeSignature({Value::Type::GEOGRAPHY, Value::Type::GEOGRAPHY}, Value::Type::BOOL), + }}, + {"st_dwithin", + { + TypeSignature({Value::Type::GEOGRAPHY, Value::Type::GEOGRAPHY, Value::Type::FLOAT}, + Value::Type::BOOL), + }}, + // geo analysis + {"st_distance", + { + TypeSignature({Value::Type::GEOGRAPHY, Value::Type::GEOGRAPHY}, Value::Type::FLOAT), + }}, + {"st_astext", + { + TypeSignature({Value::Type::GEOGRAPHY}, Value::Type::STRING), + }}, + {"st_aswkb", + { + TypeSignature({Value::Type::GEOGRAPHY}, Value::Type::STRING), + }}, }; // static @@ -2239,6 +2283,117 @@ FunctionManager::FunctionManager() { return folly::join(args[0].get().getStr(), result); }; } + { + auto &attr = functions_["st_geogfromtext"]; + attr.minArity_ = 1; + attr.maxArity_ = 1; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + // const std::string &wkt = args[0].get().getStr(); + // auto geom = WKTReader().read(wkt); + // if (!validateGeom(geom)) { + // return Value::kNullBadData; + // } + // std::ostringstream oss; + // WKBWriter().writeHEX(geom, oss); + // std::string wkb = stream.str(); + // return Geography(wkb); + UNUSED(args); + return Geography(); + }; + } + { + auto &attr = functions_["st_geogfromwkb"]; + attr.minArity_ = 1; + attr.maxArity_ = 1; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + // const std::string &wkb = args[0].get().getStr(); + // auto geom = WKBReader().read(wkb); + // if (!validateGeom(wkb)) { + // return Value::kNullBadData; + // } + // return Geography(wkb); + UNUSED(args); + return Geography(); + }; + } + { + auto &attr = functions_["st_intersects"]; + attr.minArity_ = 2; + attr.maxArity_ = 2; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + return intersects(args[0].get().getGeography(), args[1].get().getGeography()); + }; + } + { + auto &attr = functions_["st_covers"]; + attr.minArity_ = 2; + attr.maxArity_ = 2; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + return covers(args[0].get().getGeography(), args[1].get().getGeography()); + }; + } + { + auto &attr = functions_["st_coveredby"]; + attr.minArity_ = 2; + attr.maxArity_ = 2; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + return coveredBy(args[0].get().getGeography(), args[1].get().getGeography()); + }; + } + { + auto &attr = functions_["st_dwithin"]; + attr.minArity_ = 3; + attr.maxArity_ = 3; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + return dWithin(args[0].get().getGeography(), + args[1].get().getGeography(), + args[2].get().getFloat(), + true); + }; + } + { + auto &attr = functions_["st_distance"]; + attr.minArity_ = 2; + attr.maxArity_ = 2; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + return distance(args[0].get().getGeography(), args[1].get().getGeography()); + }; + } + { + auto &attr = functions_["st_astext"]; + attr.minArity_ = 1; + attr.maxArity_ = 1; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + // const Geography &g = args[0].get().getGeography(); + // const std::string &wkb = g.wkb; + // auto geom = WKBReader().read(wkb); + // std::string wkt = WKTWriter().write(geom); + // return wkt; + UNUSED(args); + return ""; + }; + } + { + auto &attr = functions_["st_aswkb"]; + attr.minArity_ = 2; + attr.maxArity_ = 2; + attr.isPure_ = true; + attr.body_ = [](const auto &args) -> Value { + // const Geography &g = args[0].get().getGeography(); + // const std::string &wkb = g.wkb; + // return wkb; + UNUSED(args); + return ""; + }; + } } // NOLINT // static diff --git a/src/common/geo/CMakeLists.txt b/src/common/geo/CMakeLists.txt new file mode 100644 index 00000000000..b55b7a8071d --- /dev/null +++ b/src/common/geo/CMakeLists.txt @@ -0,0 +1,11 @@ +# Copyright (c) 2020 vesoft inc. All rights reserved. +# +# This source code is licensed under Apache 2.0 License, +# attached with Common Clause Condition 1.0, found in the LICENSES directory. + +nebula_add_library( + geo_index_obj OBJECT + GeoIndex.cpp +) + +# nebula_add_subdirectory(test) diff --git a/src/common/geo/GeoIndex.cpp b/src/common/geo/GeoIndex.cpp new file mode 100644 index 00000000000..2934baa55e6 --- /dev/null +++ b/src/common/geo/GeoIndex.cpp @@ -0,0 +1,163 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This sourc_e code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/geo/GeoIndex.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "common/datatypes/Geography.h" +#include "common/utils/IndexKeyUtils.h" +#include "interface/gen-cpp2/storage_types.h" + +namespace nebula { + +storage::cpp2::IndexColumnHint ScanRange::toIndexColumnHint() { + storage::cpp2::IndexColumnHint hint; + // set_column_name should be called later + if (isRangeScan) { + hint.set_scan_type(storage::cpp2::ScanType::RANGE); + hint.set_begin_value( + IndexKeyUtils::encodeUint64(rangeMin)); // Encode uint64_t as string ahead of time + hint.set_end_value(IndexKeyUtils::encodeUint64(rangeMax)); + } else { + hint.set_scan_type(storage::cpp2::ScanType::PREFIX); + hint.set_begin_value(IndexKeyUtils::encodeUint64(rangeMin)); + } + return hint; +} + +std::vector GeographyIndex::indexCells(const Geography& g) const noexcept { + auto cells = coveringCells(g); + std::vector cellIds; + cellIds.reserve(cells.size()); + for (auto& cell : cells) { + cellIds.push_back(cell.id()); + } + return cellIds; +} + +std::vector GeographyIndex::dWithin(const Geography& g, double distance) const noexcept { + auto* r = g.asS2(); + S1Angle radius = S2Earth::ToAngle(util::units::Meters(distance)); + switch (g.shape()) { + case ShapeType::Point: { + const S2Point& gPoint = static_cast(r)->point(); + S2Cap gCap(gPoint, radius); + auto cells = coveringCells(gCap); + return scanRange(cells); + } + case ShapeType::LineString: { + S2Polyline* gLine = static_cast(r); + MutableS2ShapeIndex index; + index.Add(std::make_unique(gLine)); + S2ShapeIndexBufferedRegion gBuffer(&index, radius); + auto cells = coveringCells(gBuffer); + return scanRange(cells); + } + case ShapeType::Polygon: { + S2Polygon* gPolygon = static_cast(r); + S2ShapeIndexBufferedRegion gBuffer(&gPolygon->index(), radius); + auto cells = coveringCells(gBuffer); + return scanRange(cells); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return {}; + } +} + +std::vector GeographyIndex::coveringCells(const Geography& g) const noexcept { + auto* r = g.asS2(); + // Currently region coverer options doesn't work for point. Point always use level 30. + if (g.shape() == ShapeType::Point) { + const S2Point& gPoint = static_cast(r)->point(); + return {S2CellId(gPoint)}; + } + + S2RegionCoverer rc(rcParams_.s2RegionCovererOpts()); + std::vector covering; + rc.GetCovering(*r, &covering); + // 1. NO NEED TO CALL S2RegionCoverer::CanonicalizeCovering(covering), because the covering is + // already canonical, which means that is sorted, non-overlapping and satisfy the desired + // constraints min_level, max_level. + // 2. DO NOT CALL S2CellUnion::Normalize(covering), it will replacing groups of 4 child cells by + // their parent cell, In this case, it may cause the covering don't satisfy the desired + // constraints min_level. + return covering; +} + +std::vector GeographyIndex::coveringCells(const S2Region& r) const noexcept { + S2RegionCoverer rc(rcParams_.s2RegionCovererOpts()); + std::vector covering; + rc.GetCovering(r, &covering); + return covering; +} + +std::vector GeographyIndex::ancestorCells( + const std::vector& cells) const noexcept { + // DCHECK(rc.IsCanonical(cells)); + std::vector ancestors; + std::unordered_set seen; + for (const auto& cellId : cells) { + for (auto l = cellId.level() - 1; l >= rcParams_.minCellLevel_; --l) { + S2CellId parentCellId = cellId.parent(l); + if (seen.find(parentCellId) != seen.end()) { + break; + } + seen.emplace(parentCellId); + ancestors.push_back(parentCellId); + } + } + // The ancestors here is non-overlapping but unsorted. Do we need sort it? + // May need to call S2RegionCoverer::CanonicalizeCovering(&ancestors)? + return ancestors; +} + +std::vector GeographyIndex::scanRange( + const std::vector& cells) const noexcept { + std::vector scanRanges; + for (const S2CellId& cellId : cells) { + if (cellId.is_leaf()) { + scanRanges.emplace_back(cellId.id()); + } else { + scanRanges.emplace_back(cellId.range_min().id(), cellId.range_max().id()); + } + } + + if (!pointsOnly_) { + auto ancestors = ancestorCells(cells); + for (const S2CellId& cellId : ancestors) { + scanRanges.emplace_back(cellId.id()); + } + } + + return scanRanges; +} + +} // namespace nebula + +namespace std { + +// Inject a customized hash function +std::size_t hash::operator()(const S2CellId& c) const noexcept { + return hash{}(c.id()); +} + +} // namespace std diff --git a/src/common/geo/GeoIndex.h b/src/common/geo/GeoIndex.h new file mode 100644 index 00000000000..cf310eec122 --- /dev/null +++ b/src/common/geo/GeoIndex.h @@ -0,0 +1,97 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include + +#include +#include + +#include "common/datatypes/Geography.h" + +namespace nebula { + +namespace storage { +namespace cpp2 { +class IndexColumnHint; +} // namespace cpp2 +} // namespace storage + +struct RegionCoverParams { + int minCellLevel_ = 4; + int maxCellLevel_ = 23; // 30? + int maxCellNum_ = 8; + + RegionCoverParams() = default; + + RegionCoverParams(int minLevl, int maxLevl, int maxCells) + : minCellLevel_(minLevl), maxCellLevel_(maxLevl), maxCellNum_(maxCells) {} + + S2RegionCoverer::Options s2RegionCovererOpts() const { + S2RegionCoverer::Options opts; + opts.set_min_level(minCellLevel_); + opts.set_max_level(maxCellLevel_); + opts.set_max_cells(maxCellNum_); + return opts; + } +}; + +// scan type: PREFIX or RANGE +struct ScanRange { + uint64_t rangeMin; + uint64_t rangeMax; + bool isRangeScan; + + ScanRange(uint64_t min, uint64_t max) : rangeMin(min), rangeMax(max), isRangeScan(true) {} + + explicit ScanRange(uint64_t min) : rangeMin(min), isRangeScan(false) {} + + storage::cpp2::IndexColumnHint toIndexColumnHint(); +}; + +class GeographyIndex { + public: + explicit GeographyIndex(const RegionCoverParams& params, bool pointsOnly = false) + : rcParams_(params), pointsOnly_(pointsOnly) {} + + // build index + std::vector indexCells(const Geography& g) const noexcept; + + // query index + std::vector covers(const Geography& g) const noexcept; + + std::vector coveredBy(const Geography& g) const noexcept; + + std::vector intersects(const Geography& g) const noexcept; + + std::vector dWithin(const Geography& g, double distance) const noexcept; + + private: + std::vector coveringCells(const Geography& g) const noexcept; + std::vector coveringCells(const S2Region& r) const noexcept; + + std::vector ancestorCells(const std::vector& cells) const noexcept; + + std::vector scanRange(const std::vector& cells) const noexcept; + + private: + RegionCoverParams rcParams_; + bool pointsOnly_{ + false}; // For the column Geography(Point), we don't need to build ancestor cells +}; + +} // namespace nebula + +namespace std { + +// Inject a customized hash function +template <> +struct hash { + std::size_t operator()(const S2CellId& h) const noexcept; +}; + +} // namespace std diff --git a/src/common/geo/GeoParser.cpp b/src/common/geo/GeoParser.cpp new file mode 100644 index 00000000000..2ab9cc62b19 --- /dev/null +++ b/src/common/geo/GeoParser.cpp @@ -0,0 +1,5 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ diff --git a/src/common/geo/GeoParser.h b/src/common/geo/GeoParser.h new file mode 100644 index 00000000000..2ab9cc62b19 --- /dev/null +++ b/src/common/geo/GeoParser.h @@ -0,0 +1,5 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ diff --git a/src/common/geo/function/Covers.cpp b/src/common/geo/function/Covers.cpp new file mode 100644 index 00000000000..cf9341f043c --- /dev/null +++ b/src/common/geo/function/Covers.cpp @@ -0,0 +1,74 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/geo/function/Covers.h" + +#include +#include + +namespace nebula { + +// Covers returns whether geography b covers geography b. +// If no point in b lies exterior of b, a covers b. +// http://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html +bool covers(const Geography& a, const Geography& b) { + auto* aRegion = a.asS2(); + auto* bRegion = b.asS2(); + + switch (a.shape()) { + case ShapeType::Point: { + switch (b.shape()) { + case ShapeType::Point: + return static_cast(aRegion)->Contains( + static_cast(bRegion)->point()); + case ShapeType::LineString: + return false; + case ShapeType::Polygon: + return false; + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::LineString: { + S2Polyline* aLine = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: + return aLine->MayIntersect(S2Cell(static_cast(bRegion)->point())); + case ShapeType::LineString: + return aLine->NearlyCovers(*static_cast(bRegion), S1Angle::Radians(1e-15)); + case ShapeType::Polygon: + return false; + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::Polygon: { + S2Polygon* aPolygon = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: + return aPolygon->Contains(static_cast(bRegion)->point()); + case ShapeType::LineString: + return aPolygon->Contains(*static_cast(bRegion)); + case ShapeType::Polygon: + return aPolygon->Contains(static_cast(bRegion)); + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + } + + return false; +} + +bool coveredBy(const Geography& a, const Geography& b) { return covers(b, a); } + +} // namespace nebula diff --git a/src/common/geo/function/Covers.h b/src/common/geo/function/Covers.h new file mode 100644 index 00000000000..913ca916cbf --- /dev/null +++ b/src/common/geo/function/Covers.h @@ -0,0 +1,20 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/datatypes/Geography.h" + +namespace nebula { + +// Covers returns whether geography b covers geography b. +// If no point in b lies exterior of b, a covers b. +// http://lin-ear-th-inking.blogspot.com/2007/06/subtleties-of-ogc-covers-spatial.html +bool covers(const Geography& a, const Geography& b); + +bool coveredBy(const Geography& a, const Geography& b); + +} // namespace nebula diff --git a/src/common/geo/function/DWithin.cpp b/src/common/geo/function/DWithin.cpp new file mode 100644 index 00000000000..0a173ca882c --- /dev/null +++ b/src/common/geo/function/DWithin.cpp @@ -0,0 +1,154 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ +#include "common/geo/function/DWithin.h" + +#include +#include +#include +#include + +namespace nebula { + +// We don't need to find the closest points. We just need to find the first point pair whose +// distance is less than or less equal than the given distance. (Early quit) +bool dWithin(const Geography& a, const Geography& b, double distance, bool inclusive) { + auto aRegion = a.asS2(); + auto bRegion = b.asS2(); + + switch (a.shape()) { + case ShapeType::Point: { + const S2Point& aPoint = static_cast(aRegion)->point(); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + double closestDistance = S2Earth::GetDistanceMeters(aPoint, bPoint); + return inclusive ? closestDistance <= distance : closestDistance < distance; + } + case ShapeType::LineString: { + S2Polyline* bLine = static_cast(bRegion); + return s2PointAndS2PolylineAreWithinDistance(aPoint, bLine, distance, inclusive); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + return s2PointAndS2PolygonAreWithinDistance(aPoint, bPolygon, distance, inclusive); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::LineString: { + S2Polyline* aLine = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + return s2PointAndS2PolylineAreWithinDistance(bPoint, aLine, distance, inclusive); + } + case ShapeType::LineString: { + S2Polyline* bLine = static_cast(bRegion); + MutableS2ShapeIndex aIndex, bIndex; + aIndex.Add(std::make_unique(aLine)); + bIndex.Add(std::make_unique(bLine)); + S2ClosestEdgeQuery query(&aIndex); + S2ClosestEdgeQuery::ShapeIndexTarget target(&bIndex); + if (inclusive) { + return query.IsDistanceLessOrEqual( + &target, S2Earth::ToChordAngle(util::units::Meters(distance))); + } + return query.IsDistanceLess(&target, + S2Earth::ToChordAngle(util::units::Meters(distance))); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + return s2PolylineAndS2PolygonAreWithinDistance(aLine, bPolygon, distance, inclusive); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::Polygon: { + S2Polygon* aPolygon = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + return s2PointAndS2PolygonAreWithinDistance(bPoint, aPolygon, distance, inclusive); + } + case ShapeType::LineString: { + S2Polyline* bLine = static_cast(bRegion); + return s2PolylineAndS2PolygonAreWithinDistance(bLine, aPolygon, distance, inclusive); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + S2ClosestEdgeQuery query(&aPolygon->index()); + S2ClosestEdgeQuery::ShapeIndexTarget target(&bPolygon->index()); + if (inclusive) { + return query.IsDistanceLessOrEqual( + &target, S2Earth::ToChordAngle(util::units::Meters(distance))); + } + return query.IsDistanceLess(&target, + S2Earth::ToChordAngle(util::units::Meters(distance))); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + } + + return false; +} + +bool s2PointAndS2PolylineAreWithinDistance(const S2Point& aPoint, + const S2Polyline* bLine, + double distance, + bool inclusive) { + MutableS2ShapeIndex bIndex; + bIndex.Add(std::make_unique(bLine)); + S2ClosestEdgeQuery query(&bIndex); + S2ClosestEdgeQuery::PointTarget target(aPoint); + if (inclusive) { + return query.IsDistanceLessOrEqual(&target, + S2Earth::ToChordAngle(util::units::Meters(distance))); + } else { + return query.IsDistanceLess(&target, S2Earth::ToChordAngle(util::units::Meters(distance))); + } +} + +bool s2PointAndS2PolygonAreWithinDistance(const S2Point& aPoint, + const S2Polygon* bPolygon, + double distance, + bool inclusive) { + S2ClosestEdgeQuery query(&bPolygon->index()); + S2ClosestEdgeQuery::PointTarget target(aPoint); + if (inclusive) { + return query.IsDistanceLessOrEqual(&target, + S2Earth::ToChordAngle(util::units::Meters(distance))); + } else { + return query.IsDistanceLess(&target, S2Earth::ToChordAngle(util::units::Meters(distance))); + } +} + +bool s2PolylineAndS2PolygonAreWithinDistance(const S2Polyline* aLine, + const S2Polygon* bPolygon, + double distance, + bool inclusive) { + MutableS2ShapeIndex aIndex; + aIndex.Add(std::make_unique(aLine)); + S2ClosestEdgeQuery::ShapeIndexTarget target(&aIndex); + S2ClosestEdgeQuery query(&bPolygon->index()); + if (inclusive) { + return query.IsDistanceLessOrEqual(&target, + S2Earth::ToChordAngle(util::units::Meters(distance))); + } else { + return query.IsDistanceLess(&target, S2Earth::ToChordAngle(util::units::Meters(distance))); + } +} + +} // namespace nebula diff --git a/src/common/geo/function/DWithin.h b/src/common/geo/function/DWithin.h new file mode 100644 index 00000000000..7a12d916c6d --- /dev/null +++ b/src/common/geo/function/DWithin.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/datatypes/Geography.h" + +namespace nebula { + +// We don't need to find the closest points. We just need to find the first point pair whose +// distance is less than or less equal than the given distance. (Early quit) +bool dWithin(const Geography& a, const Geography& b, double distance, bool inclusive); + +bool s2PointAndS2PolylineAreWithinDistance(const S2Point& aPoint, + const S2Polyline* bLine, + double distance, + bool inclusive); + +bool s2PointAndS2PolygonAreWithinDistance(const S2Point& aPoint, + const S2Polygon* bPolygon, + double distance, + bool inclusive); + +bool s2PolylineAndS2PolygonAreWithinDistance(const S2Polyline* aLine, + const S2Polygon* bPolygon, + double distance, + bool inclusive); + +} // namespace nebula diff --git a/src/common/geo/function/Distance.cpp b/src/common/geo/function/Distance.cpp new file mode 100644 index 00000000000..b9cc267635f --- /dev/null +++ b/src/common/geo/function/Distance.cpp @@ -0,0 +1,115 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/geo/function/Distance.h" + +#include +#include +#include +#include + +namespace nebula { + +// Find the closest distance of a and b +double distance(const Geography& a, const Geography& b) { + auto aRegion = a.asS2(); + auto bRegion = b.asS2(); + + switch (a.shape()) { + case ShapeType::Point: { + const S2Point& aPoint = static_cast(aRegion)->point(); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + return S2Earth::GetDistanceMeters(aPoint, bPoint); + } + case ShapeType::LineString: { + S2Polyline* bLine = static_cast(bRegion); + return distanceOfS2PolylineWithS2Point(bLine, aPoint); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + return distanceOfS2PolygonWithS2Point(bPolygon, aPoint); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::LineString: { + S2Polyline* aLine = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + return distanceOfS2PolylineWithS2Point(aLine, bPoint); + } + case ShapeType::LineString: { + const S2Polyline* bLine = static_cast(bRegion); + MutableS2ShapeIndex aIndex, bIndex; + aIndex.Add(std::make_unique(aLine)); + bIndex.Add(std::make_unique(bLine)); + S2ClosestEdgeQuery query(&aIndex); + S2ClosestEdgeQuery::ShapeIndexTarget target(&bIndex); + return S2Earth::ToMeters(query.GetDistance(&target)); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + return distanceOfS2PolygonWithS2Polyline(bPolygon, aLine); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::Polygon: { + S2Polygon* aPolygon = static_cast(aRegion); + switch (b.shape()) { + case ShapeType::Point: { + const S2Point& bPoint = static_cast(bRegion)->point(); + return distanceOfS2PolygonWithS2Point(aPolygon, bPoint); + } + case ShapeType::LineString: { + S2Polyline* bLine = static_cast(bRegion); + return distanceOfS2PolygonWithS2Polyline(aPolygon, bLine); + } + case ShapeType::Polygon: { + S2Polygon* bPolygon = static_cast(bRegion); + S2ClosestEdgeQuery query(&aPolygon->index()); + S2ClosestEdgeQuery::ShapeIndexTarget target(&bPolygon->index()); + return S2Earth::ToMeters(query.GetDistance(&target)); + } + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + } + + return false; +} + +double distanceOfS2PolylineWithS2Point(const S2Polyline* aLine, const S2Point& bPoint) { + int tmp; + S2Point cloestPointOnLine = aLine->Project(bPoint, &tmp); + return S2Earth::GetDistanceMeters(cloestPointOnLine, bPoint); +} + +double distanceOfS2PolygonWithS2Polyline(const S2Polygon* aPolygon, const S2Polyline* bLine) { + S2ClosestEdgeQuery query(&aPolygon->index()); + MutableS2ShapeIndex bIndex; + bIndex.Add(std::make_unique(bLine)); + S2ClosestEdgeQuery::ShapeIndexTarget target(&bIndex); + return S2Earth::ToMeters(query.GetDistance(&target)); +} + +double distanceOfS2PolygonWithS2Point(const S2Polygon* aPolygon, const S2Point& bPoint) { + return S2Earth::ToMeters(aPolygon->GetDistance(bPoint)); +} + +} // namespace nebula diff --git a/src/common/geo/function/Distance.h b/src/common/geo/function/Distance.h new file mode 100644 index 00000000000..0f247a63ad2 --- /dev/null +++ b/src/common/geo/function/Distance.h @@ -0,0 +1,22 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/datatypes/Geography.h" + +namespace nebula { + +// Find the closest distance of a and b +double distance(const Geography& a, const Geography& b); + +double distanceOfS2PolylineWithS2Point(const S2Polyline* aLine, const S2Point& bPoint); + +double distanceOfS2PolygonWithS2Polyline(const S2Polygon* aPolygon, const S2Polyline* bLine); + +double distanceOfS2PolygonWithS2Point(const S2Polygon* aPolygon, const S2Point& bPoint); + +} // namespace nebula diff --git a/src/common/geo/function/Intersects.cpp b/src/common/geo/function/Intersects.cpp new file mode 100644 index 00000000000..7e6cec71a9f --- /dev/null +++ b/src/common/geo/function/Intersects.cpp @@ -0,0 +1,74 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#include "common/geo/function/Intersects.h" + +#include +#include + +namespace nebula { + +// Intersects returns whether geography b intersects geography b. +// If any point in the set that comprises A is also a member of the set of points that make up B, +// they intersects; +bool intersects(const Geography& a, const Geography& b) { + auto aRegion = a.asS2(); + auto bRegion = b.asS2(); + + switch (a.shape()) { + case ShapeType::Point: { + switch (b.shape()) { + case ShapeType::Point: + return static_cast(aRegion)->MayIntersect( + S2Cell(static_cast(bRegion)->point())); + case ShapeType::LineString: + return static_cast(bRegion)->MayIntersect( + S2Cell(static_cast(aRegion)->point())); + case ShapeType::Polygon: + return static_cast(bRegion)->MayIntersect( + S2Cell(static_cast(aRegion)->point())); + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::LineString: { + switch (b.shape()) { + case ShapeType::Point: + return static_cast(aRegion)->MayIntersect( + S2Cell(static_cast(bRegion)->point())); + case ShapeType::LineString: + return static_cast(aRegion)->Intersects(static_cast(bRegion)); + case ShapeType::Polygon: + return static_cast(bRegion)->Intersects(*static_cast(aRegion)); + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + case ShapeType::Polygon: { + switch (b.shape()) { + case ShapeType::Point: + return static_cast(aRegion)->MayIntersect( + S2Cell(static_cast(bRegion)->point())); + case ShapeType::LineString: + return static_cast(aRegion)->Intersects(*static_cast(bRegion)); + case ShapeType::Polygon: + return static_cast(aRegion)->Intersects(static_cast(bRegion)); + default: + LOG(FATAL) + << "Geography shapes other than Point/LineString/Polygon are not currently supported"; + return -1.0; + } + } + } + + return false; +} + +} // namespace nebula diff --git a/src/common/geo/function/Intersects.h b/src/common/geo/function/Intersects.h new file mode 100644 index 00000000000..f41380d5ac7 --- /dev/null +++ b/src/common/geo/function/Intersects.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +#pragma once + +#include "common/datatypes/Geography.h" + +namespace nebula { + +// Intersects returns whether geography b intersects geography b. +// If any point in the set that comprises A is also a member of the set of points that make up B, +// they intersects; +bool intersects(const Geography& a, const Geography& b); + +} // namespace nebula diff --git a/src/common/geo/wkb/WKBReader.h b/src/common/geo/wkb/WKBReader.h new file mode 100644 index 00000000000..3a47fd8cd8d --- /dev/null +++ b/src/common/geo/wkb/WKBReader.h @@ -0,0 +1,7 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ + +s2_region* parseS2RegionFromWkb(const std::string& wkb) {} diff --git a/src/common/geo/wkb/WKBWriter.h b/src/common/geo/wkb/WKBWriter.h new file mode 100644 index 00000000000..2ab9cc62b19 --- /dev/null +++ b/src/common/geo/wkb/WKBWriter.h @@ -0,0 +1,5 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ diff --git a/src/common/geo/wkt/WKTReader.h b/src/common/geo/wkt/WKTReader.h new file mode 100644 index 00000000000..2ab9cc62b19 --- /dev/null +++ b/src/common/geo/wkt/WKTReader.h @@ -0,0 +1,5 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ diff --git a/src/common/geo/wkt/WKTWriter.h b/src/common/geo/wkt/WKTWriter.h new file mode 100644 index 00000000000..2ab9cc62b19 --- /dev/null +++ b/src/common/geo/wkt/WKTWriter.h @@ -0,0 +1,5 @@ +/* Copyright (c) 2020 vesoft inc. All rights reserved. + * + * This source code is licensed under Apache 2.0 License, + * attached with Common Clause Condition 1.0, found in the LICENSES directory. + */ diff --git a/src/common/meta/NebulaSchemaProvider.cpp b/src/common/meta/NebulaSchemaProvider.cpp index 72a72402a2b..dc5aab4a2c5 100644 --- a/src/common/meta/NebulaSchemaProvider.cpp +++ b/src/common/meta/NebulaSchemaProvider.cpp @@ -160,6 +160,8 @@ std::size_t NebulaSchemaProvider::fieldSize(cpp2::PropertyType type, std::size_t sizeof(int8_t) + // minute sizeof(int8_t) + // sec sizeof(int32_t); // microsec + case cpp2::PropertyType::GEOGRAPHY: + return 8; // as same as STRING case cpp2::PropertyType::UNKNOWN: break; } diff --git a/src/common/thread/GenericWorker.h b/src/common/thread/GenericWorker.h index 5d10ba92cad..83e3de063a8 100644 --- a/src/common/thread/GenericWorker.h +++ b/src/common/thread/GenericWorker.h @@ -52,7 +52,7 @@ class GenericWorker final : public nebula::cpp::NonCopyable, public nebula::cpp: * A GenericWorker MUST be `start'ed successfully before invoking * any other interfaces. */ - bool MUST_USE_RESULT start(std::string name = ""); + bool NG_MUST_USE_RESULT start(std::string name = ""); /** * Asynchronouly to notify the worker to stop handling further new tasks. diff --git a/src/common/time/TimezoneInfo.h b/src/common/time/TimezoneInfo.h index 34795c5f5c4..221dba3c3fa 100644 --- a/src/common/time/TimezoneInfo.h +++ b/src/common/time/TimezoneInfo.h @@ -24,7 +24,7 @@ class Timezone { public: Timezone() = default; - static MUST_USE_RESULT Status init() { + static NG_MUST_USE_RESULT Status init() { try { tzdb.load_from_file(FLAGS_timezone_file); } catch (const std::exception &e) { @@ -34,7 +34,7 @@ class Timezone { return Status::OK(); } - MUST_USE_RESULT Status loadFromDb(const std::string ®ion) { + NG_MUST_USE_RESULT Status loadFromDb(const std::string ®ion) { zoneInfo_ = tzdb.time_zone_from_region(region); if (zoneInfo_ == nullptr) { return Status::Error("Not supported timezone `%s'.", region.c_str()); @@ -44,7 +44,7 @@ class Timezone { // see the posix timezone literal format in // https://man7.org/linux/man-pages/man3/tzset.3.html - MUST_USE_RESULT Status parsePosixTimezone(const std::string &posixTimezone) { + NG_MUST_USE_RESULT Status parsePosixTimezone(const std::string &posixTimezone) { try { zoneInfo_.reset(new ::boost::local_time::posix_time_zone(posixTimezone)); } catch (const std::exception &e) { diff --git a/src/common/utils/IndexKeyUtils.cpp b/src/common/utils/IndexKeyUtils.cpp index 864e6c7c99c..342beb3edd9 100644 --- a/src/common/utils/IndexKeyUtils.cpp +++ b/src/common/utils/IndexKeyUtils.cpp @@ -26,7 +26,8 @@ std::string IndexKeyUtils::encodeValues(std::vector&& values, if (!values[i].isNull()) { // string index need to fill with '\0' if length is less than schema - if (cols[i].type.type == meta::cpp2::PropertyType::FIXED_STRING) { + if (cols[i].type.type == + meta::cpp2::PropertyType::FIXED_STRING) { // TODO(jie) FIXED_STRING or STRING? auto len = static_cast(*cols[i].type.get_type_length()); index.append(encodeValue(values[i], len)); } else { diff --git a/src/common/utils/IndexKeyUtils.h b/src/common/utils/IndexKeyUtils.h index 71d0b619ccf..82947da3f97 100644 --- a/src/common/utils/IndexKeyUtils.h +++ b/src/common/utils/IndexKeyUtils.h @@ -10,6 +10,7 @@ #include "codec/RowReader.h" #include "common/base/Base.h" #include "common/base/StatusOr.h" +#include "common/geo/GeoIndex.h" #include "common/utils/Types.h" #include "interface/gen-cpp2/meta_types.h" @@ -48,6 +49,8 @@ class IndexKeyUtils final { return Value::Type::TIME; case PropertyType::DATETIME: return Value::Type::DATETIME; + case PropertyType::GEOGRAPHY: + return Value::Type::GEOGRAPHY; case PropertyType::UNKNOWN: return Value::Type::__EMPTY__; } @@ -85,6 +88,10 @@ class IndexKeyUtils final { len = sizeof(int32_t) + sizeof(int16_t) + sizeof(int8_t) * 5; break; } + case Value::Type::GEOGRAPHY: { + len = sizeof(uint64_t); // S2CellId + break; + } default: LOG(ERROR) << "Unsupported default value type"; } @@ -131,6 +138,9 @@ class IndexKeyUtils final { case Value::Type::DATETIME: { return encodeDateTime(v.getDateTime()); } + case Value::Type::GEOGRAPHY: { + return encodeGeography(v.getGeography()); + } default: LOG(ERROR) << "Unsupported default value type"; } @@ -164,6 +174,10 @@ class IndexKeyUtils final { return val; } + static std::string encodeUint64(uint64_t v) { + return {reinterpret_cast(&v), sizeof(uint64_t)}; + } + static std::string encodeRank(EdgeRanking rank) { return IndexKeyUtils::encodeInt64(rank); } static EdgeRanking decodeRank(const folly::StringPiece& raw) { @@ -277,6 +291,19 @@ class IndexKeyUtils final { return buf; } + static std::string encodeGeography(const nebula::Geography& gg) { + // get params from index meta + RegionCoverParams rc; + GeographyIndex geogIndex(rc, false); // get schema meta to know if it's point only + auto cellIds = geogIndex.indexCells(gg); + std::vector bufs; + for (auto cellId : cellIds) { + bufs.emplace_back(encodeUint64(cellId)); + } + + return bufs[0]; // just support index point here. + } + static nebula::DateTime decodeDateTime(const folly::StringPiece& raw) { int16_t year = *reinterpret_cast(raw.data()); int8_t month = *reinterpret_cast(raw.data() + sizeof(int16_t)); @@ -332,6 +359,10 @@ class IndexKeyUtils final { v.setDateTime(decodeDateTime(raw)); break; } + case Value::Type::GEOGRAPHY: { + // unable to get geography value from index key + return Value::kNullBadData; + } default: return Value(NullType::BAD_DATA); } @@ -396,6 +427,11 @@ class IndexKeyUtils final { len = sizeof(int32_t) + sizeof(int16_t) + sizeof(int8_t) * 5; break; } + case Value::Type::GEOGRAPHY: { + // LOG(FATAL) << "unable to get geography value from index key" + len = sizeof(uint64_t); + break; + } default: len = 0; } diff --git a/src/common/utils/test/CMakeLists.txt b/src/common/utils/test/CMakeLists.txt index 0f58aa14fd8..879e163f1f8 100644 --- a/src/common/utils/test/CMakeLists.txt +++ b/src/common/utils/test/CMakeLists.txt @@ -5,6 +5,7 @@ nebula_add_test( NebulaKeyUtilsTest.cpp OBJECTS $ + $ $ $ $ @@ -22,6 +23,7 @@ nebula_add_test( IndexKeyUtilsTest.cpp OBJECTS $ + $ $ $ $ @@ -39,6 +41,7 @@ nebula_add_test( OperationKeyUtilsTest.cpp OBJECTS $ + $ $ $ $ diff --git a/src/daemons/CMakeLists.txt b/src/daemons/CMakeLists.txt index ce7ec27cb95..1479aff1bcf 100644 --- a/src/daemons/CMakeLists.txt +++ b/src/daemons/CMakeLists.txt @@ -37,6 +37,7 @@ set(storage_meta_deps $ $ $ + $ $ $ $ diff --git a/src/graph/optimizer/OptimizerUtils.cpp b/src/graph/optimizer/OptimizerUtils.cpp index 53f7e89c240..2fb82011b0c 100644 --- a/src/graph/optimizer/OptimizerUtils.cpp +++ b/src/graph/optimizer/OptimizerUtils.cpp @@ -190,6 +190,7 @@ Value OptimizerUtils::boundValueWithGT(const meta::cpp2::ColumnDef& col, const V case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; @@ -336,6 +337,7 @@ Value OptimizerUtils::boundValueWithLT(const meta::cpp2::ColumnDef& col, const V case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; @@ -395,6 +397,7 @@ Value OptimizerUtils::boundValueWithMax(const meta::cpp2::ColumnDef& col) { case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; @@ -437,6 +440,7 @@ Value OptimizerUtils::boundValueWithMin(const meta::cpp2::ColumnDef& col) { case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; @@ -482,6 +486,7 @@ Value OptimizerUtils::normalizeValue(const meta::cpp2::ColumnDef& col, const Val case Value::Type::SET: case Value::Type::MAP: case Value::Type::DATASET: + case Value::Type::GEOGRAPHY: // TODO(jie) case Value::Type::PATH: { DLOG(FATAL) << "Not supported value type " << type << "for index."; return Value::kNullBadType; diff --git a/src/graph/service/Authenticator.h b/src/graph/service/Authenticator.h index 7c4d90439bc..ea390b8a148 100644 --- a/src/graph/service/Authenticator.h +++ b/src/graph/service/Authenticator.h @@ -16,7 +16,7 @@ class Authenticator { public: virtual ~Authenticator() {} - virtual bool MUST_USE_RESULT auth(const std::string &user, const std::string &password) = 0; + virtual bool NG_MUST_USE_RESULT auth(const std::string &user, const std::string &password) = 0; }; } // namespace graph diff --git a/src/graph/service/GraphService.h b/src/graph/service/GraphService.h index f72c36bb39e..76dc0f811b1 100644 --- a/src/graph/service/GraphService.h +++ b/src/graph/service/GraphService.h @@ -25,8 +25,8 @@ class GraphService final : public cpp2::GraphServiceSvIf { GraphService() = default; ~GraphService() = default; - Status MUST_USE_RESULT init(std::shared_ptr ioExecutor, - const HostAddr& hostAddr); + Status NG_MUST_USE_RESULT init(std::shared_ptr ioExecutor, + const HostAddr& hostAddr); folly::Future future_authenticate(const std::string& username, const std::string& password) override; diff --git a/src/graph/util/SchemaUtil.cpp b/src/graph/util/SchemaUtil.cpp index d24c604229e..ccbcb9ca028 100644 --- a/src/graph/util/SchemaUtil.cpp +++ b/src/graph/util/SchemaUtil.cpp @@ -294,6 +294,8 @@ Value::Type SchemaUtil::propTypeToValueType(meta::cpp2::PropertyType propType) { return Value::Type::DATE; case meta::cpp2::PropertyType::DATETIME: return Value::Type::DATETIME; + case meta::cpp2::PropertyType::GEOGRAPHY: + return Value::Type::GEOGRAPHY; case meta::cpp2::PropertyType::UNKNOWN: return Value::Type::__EMPTY__; } diff --git a/src/graph/validator/Validator.h b/src/graph/validator/Validator.h index 69e8bcbf8d1..c01fd59451f 100644 --- a/src/graph/validator/Validator.h +++ b/src/graph/validator/Validator.h @@ -32,7 +32,7 @@ class Validator { Status validate(); - MUST_USE_RESULT Status appendPlan(PlanNode* tail); + NG_MUST_USE_RESULT Status appendPlan(PlanNode* tail); void setInputVarName(std::string name) { inputVarName_ = std::move(name); } diff --git a/src/interface/common.thrift b/src/interface/common.thrift index 6f05fb32fbf..9143409e214 100644 --- a/src/interface/common.thrift +++ b/src/interface/common.thrift @@ -25,6 +25,7 @@ cpp_include "common/datatypes/SetOps-inl.h" cpp_include "common/datatypes/DataSetOps-inl.h" cpp_include "common/datatypes/KeyValueOps-inl.h" cpp_include "common/datatypes/HostAddrOps-inl.h" +cpp_include "common/datatypes/GeographyOps-inl.h" /* * @@ -85,7 +86,6 @@ struct DateTime { 7: i32 microsec; // Micro-second: 0 - 999,999 } (cpp.type = "nebula::DateTime") - enum NullType { __NULL__ = 0, NaN = 1, @@ -115,6 +115,7 @@ union Value { 13: NMap (cpp.type = "nebula::Map") mVal (cpp.ref_type = "unique"); 14: NSet (cpp.type = "nebula::Set") uVal (cpp.ref_type = "unique"); 15: DataSet (cpp.type = "nebula::DataSet") gVal (cpp.ref_type = "unique"); + 16: Geography (cpp.type = "nebula::Geography") ggVal (cpp.ref_type = "unique"); } (cpp.type = "nebula::Value") @@ -146,6 +147,10 @@ struct DataSet { 2: list rows; } (cpp.type = "nebula::DataSet") +struct Geography { + 1: string wkb; +} (cpp.type = "nebula::Geography") + struct Tag { 1: binary name, diff --git a/src/interface/meta.thrift b/src/interface/meta.thrift index 922736f0f30..44eaa9b312e 100644 --- a/src/interface/meta.thrift +++ b/src/interface/meta.thrift @@ -60,6 +60,15 @@ union ID { } +// Geo shape type +enum GeoShape { + ANY = 0, + POINT = 1, + LINESTRING = 2, + POLYGON = 3, +} (cpp.enum_strict) + + // These are all data types supported in the graph properties enum PropertyType { UNKNOWN = 0, @@ -83,12 +92,17 @@ enum PropertyType { DATE = 24, DATETIME = 25, TIME = 26, + + // Geo spatial + GEOGRAPHY = 31, } (cpp.enum_strict) struct ColumnTypeDef { 1: required PropertyType type, // type_length is valid for fixed_string type 2: optional i16 type_length = 0, + // geo_shape is valid for geography type + 3: optional GeoShape geo_shape, } struct ColumnDef { diff --git a/src/kvstore/test/CMakeLists.txt b/src/kvstore/test/CMakeLists.txt index 17d043ba6c2..d4c6ca7bf65 100644 --- a/src/kvstore/test/CMakeLists.txt +++ b/src/kvstore/test/CMakeLists.txt @@ -4,6 +4,7 @@ set(KVSTORE_TEST_LIBS $ $ $ + $ $ $ $ @@ -199,4 +200,4 @@ nebula_add_test( ${PROXYGEN_LIBRARIES} wangle gtest -) \ No newline at end of file +) diff --git a/src/meta/CMakeLists.txt b/src/meta/CMakeLists.txt index 63e2d75dde1..6f541f4755e 100644 --- a/src/meta/CMakeLists.txt +++ b/src/meta/CMakeLists.txt @@ -130,6 +130,7 @@ set(meta_test_deps $ $ $ + $ $ $ $ diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 54888f2fec5..2462e7847b9 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -60,6 +60,7 @@ static constexpr size_t kCommentLengthLimit = 256; int64_t intval; double doubleval; std::string *strval; + nebula::meta::cpp2::GeoShape geo_shape; nebula::meta::cpp2::ColumnTypeDef *type; nebula::Expression *expr; nebula::Sentence *sentence; @@ -154,7 +155,7 @@ static constexpr size_t kCommentLengthLimit = 256; // Expression related memory will be managed by object pool %destructor {} %destructor {} -%destructor {} +%destructor {} %destructor { delete $$; } <*> /* keywords */ @@ -199,6 +200,7 @@ static constexpr size_t kCommentLengthLimit = 256; %token KW_REDUCE %token KW_SESSIONS KW_SESSION %token KW_KILL KW_QUERY KW_QUERIES KW_TOP +%token KW_GEOGRAPHY KW_POINT KW_LINESTRING KW_POLYGON /* symbols */ %token L_PAREN R_PAREN L_BRACKET R_BRACKET L_BRACE R_BRACE COMMA @@ -241,6 +243,7 @@ static constexpr size_t kCommentLengthLimit = 256; %type constant_expression %type query_unique_identifier_value %type argument_list opt_argument_list +%type geo_shape_type %type type_spec %type step_clause %type from_clause @@ -524,6 +527,9 @@ unreserved_keyword | KW_QUERY { $$ = new std::string("query"); } | KW_KILL { $$ = new std::string("kill"); } | KW_TOP { $$ = new std::string("top"); } + | KW_POINT { $$ = new std::string("point"); } + | KW_LINESTRING { $$ = new std::string("linestring"); } + | KW_POLYGON { $$ = new std::string("polygon"); } ; expression @@ -1062,6 +1068,18 @@ argument_list } ; +geo_shape_type + : KW_POINT { + $$ = meta::cpp2::GeoShape::POINT; + } + | KW_LINESTRING { + $$ = meta::cpp2::GeoShape::LINESTRING; + } + | KW_POLYGON { + $$ = meta::cpp2::GeoShape::POLYGON; + } + ; + type_spec : KW_BOOL { $$ = new meta::cpp2::ColumnTypeDef(); @@ -1123,6 +1141,16 @@ type_spec $$ = new meta::cpp2::ColumnTypeDef(); $$->set_type(meta::cpp2::PropertyType::DATETIME); } + | KW_GEOGRAPHY { + $$ = new meta::cpp2::ColumnTypeDef(); + $$->set_type(meta::cpp2::PropertyType::GEOGRAPHY); + $$->set_geo_shape(meta::cpp2::GeoShape::ANY); + } + | KW_GEOGRAPHY L_PAREN geo_shape_type R_PAREN { + $$ = new meta::cpp2::ColumnTypeDef(); + $$->set_type(meta::cpp2::PropertyType::GEOGRAPHY); + $$->set_geo_shape($3); + } ; diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index 5fc535a20cb..6c316e4b2bf 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -255,6 +255,10 @@ IP_OCTET ([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]) "QUERY" { return TokenType::KW_QUERY; } "KILL" { return TokenType::KW_KILL; } "TOP" { return TokenType::KW_TOP; } +"GEOGRAPHY" { return TokenType::KW_GEOGRAPHY; } +"POINT" { return TokenType::KW_POINT; } +"LINESTRING" { return TokenType::KW_LINESTRING; } +"POLYGON" { return TokenType::KW_POLYGON; } "TRUE" { yylval->boolval = true; return TokenType::BOOL; } "FALSE" { yylval->boolval = false; return TokenType::BOOL; } diff --git a/src/storage/index/LookupBaseProcessor-inl.h b/src/storage/index/LookupBaseProcessor-inl.h index a01fb095497..e15ecd2646e 100644 --- a/src/storage/index/LookupBaseProcessor-inl.h +++ b/src/storage/index/LookupBaseProcessor-inl.h @@ -207,7 +207,10 @@ StatusOr> LookupBaseProcessor::buildPlan( auto it = std::find_if(fields.begin(), fields.end(), [&yieldCol](const auto& columnDef) { return yieldCol == columnDef.get_name(); }); - if (it == fields.end()) { + if (it == fields.end() || + it->get_type().get_type() == + nebula::meta::cpp2::PropertyType::GEOGRAPHY) { // geography index just stores cellId, + // so must need read data here. needData = true; break; } diff --git a/src/storage/test/CMakeLists.txt b/src/storage/test/CMakeLists.txt index 1ee64ef9607..41856f29336 100644 --- a/src/storage/test/CMakeLists.txt +++ b/src/storage/test/CMakeLists.txt @@ -17,6 +17,7 @@ set(storage_test_deps $ $ $ + $ $ $ $ diff --git a/src/tools/db-dump/CMakeLists.txt b/src/tools/db-dump/CMakeLists.txt index ae25df40ea2..49f681905d7 100644 --- a/src/tools/db-dump/CMakeLists.txt +++ b/src/tools/db-dump/CMakeLists.txt @@ -15,6 +15,7 @@ set(tools_test_deps $ $ $ + $ $ $ $ diff --git a/src/tools/db-upgrade/CMakeLists.txt b/src/tools/db-upgrade/CMakeLists.txt index 0259465280e..b165b449523 100644 --- a/src/tools/db-upgrade/CMakeLists.txt +++ b/src/tools/db-upgrade/CMakeLists.txt @@ -23,6 +23,7 @@ nebula_add_executable( $ $ $ + $ $ $ $ diff --git a/src/tools/meta-dump/CMakeLists.txt b/src/tools/meta-dump/CMakeLists.txt index b715015e61f..7ccf0499dad 100644 --- a/src/tools/meta-dump/CMakeLists.txt +++ b/src/tools/meta-dump/CMakeLists.txt @@ -20,6 +20,7 @@ nebula_add_executable( $ $ $ + $ $ $ $ diff --git a/src/tools/simple-kv-verify/CMakeLists.txt b/src/tools/simple-kv-verify/CMakeLists.txt index 9d988f9cd18..034adb75b89 100644 --- a/src/tools/simple-kv-verify/CMakeLists.txt +++ b/src/tools/simple-kv-verify/CMakeLists.txt @@ -17,6 +17,7 @@ nebula_add_executable( $ $ $ + $ $ $ $ diff --git a/src/tools/storage-perf/CMakeLists.txt b/src/tools/storage-perf/CMakeLists.txt index ee298ac6737..4b4baa3895d 100644 --- a/src/tools/storage-perf/CMakeLists.txt +++ b/src/tools/storage-perf/CMakeLists.txt @@ -13,6 +13,7 @@ set(perf_test_deps $ $ $ + $ $ $ $ diff --git a/src/webservice/WebService.h b/src/webservice/WebService.h index 518ac61a0f7..e4a373689f6 100644 --- a/src/webservice/WebService.h +++ b/src/webservice/WebService.h @@ -33,7 +33,7 @@ class WebService final { explicit WebService(const std::string& name = ""); ~WebService(); - MUST_USE_RESULT web::Router& router() { + NG_MUST_USE_RESULT web::Router& router() { CHECK(!started_) << "Don't add routes after starting web server!"; return *router_; } @@ -42,7 +42,7 @@ class WebService final { // Two ports would be bound, one for HTTP, another one for HTTP2. // If FLAGS_ws_http_port or FLAGS_ws_h2_port is zero, an ephemeral port // would be assigned and set back to the gflag, respectively. - MUST_USE_RESULT Status start(); + NG_MUST_USE_RESULT Status start(); // Check whether web service is started bool started() const { return started_; }