Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialize r-tree index scan and plan #407

Merged
merged 4 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#pragma once
#include "duckdb/planner/operator/logical_extension_operator.hpp"
#include "spatial/common.hpp"
#include "duckdb/parser/parsed_data/create_index_info.hpp"

#include "spatial/common.hpp"
namespace spatial {

namespace core {
Expand All @@ -22,16 +23,61 @@ class LogicalCreateRTreeIndex final : public LogicalExtensionOperator {
TableCatalogEntry &table_p);
void ResolveTypes() override;
void ResolveColumnBindings(ColumnBindingResolver &res, vector<ColumnBinding> &bindings) override;
string GetExtensionName() const override;

// Actually create and plan the index creation
unique_ptr<PhysicalOperator> CreatePlan(ClientContext &context, PhysicalPlanGenerator &generator) override;

void Serialize(Serializer &writer) const override {
throw InternalException("Cannot serialize RTree Create index");
LogicalExtensionOperator::Serialize(writer);
writer.WritePropertyWithDefault(300, "operator_type", string("logical_rtree_create_index"));
writer.WritePropertyWithDefault<unique_ptr<CreateIndexInfo>>(400, "info", info);
writer.WritePropertyWithDefault<vector<unique_ptr<Expression>>>(401, "unbound_expressions", unbound_expressions);
}

string GetExtensionName() const override {
return "duckdb_spatial";
}
};

class LogicalCreateRTreeIndexOperatorExtension final : public OperatorExtension {
public:
LogicalCreateRTreeIndexOperatorExtension() {
Bind = [](ClientContext &, Binder &, OperatorExtensionInfo *, SQLStatement &) -> BoundStatement {
// For some reason all operator extensions require this callback to be implemented
// even though it is useless for us as we construct this operator through the optimizer instead.
BoundStatement result;
result.plan = nullptr;
return result;
};
}

std::string GetName() override {
return "duckdb_spatial";
}
unique_ptr<LogicalExtensionOperator> Deserialize(Deserializer &reader) override {
const auto operator_type = reader.ReadPropertyWithDefault<string>(300, "operator_type");
// We only have one extension operator type right now
if (operator_type != "logical_rtree_create_index") {
throw SerializationException("This version of the spatial extension does not support operator type '%s!", operator_type);
}
auto create_info = reader.ReadPropertyWithDefault<unique_ptr<CreateInfo>>(400, "info");
auto unbound_expressions = reader.ReadPropertyWithDefault<vector<unique_ptr<Expression>>>(401, "unbound_expressions");

auto info = unique_ptr_cast<CreateInfo, CreateIndexInfo>(std::move(create_info));

// We also need to rebind the table
auto &context = reader.Get<ClientContext &>();
const auto &catalog = info->catalog;
const auto &schema = info->schema;
const auto &table_name = info->table;
auto &table_entry = Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table_name);

// Return the new operator
return make_uniq<LogicalCreateRTreeIndex>(std::move(info), std::move(unbound_expressions), table_entry);
}
};


} // namespace core

} // namespace spatial
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ void LogicalCreateRTreeIndex::ResolveColumnBindings(ColumnBindingResolver &res,
[&](unique_ptr<Expression> *child) { res.VisitExpression(child); });
}

string LogicalCreateRTreeIndex::GetExtensionName() const {
return "rtree_create_index";
}

static unique_ptr<PhysicalOperator> CreateNullFilter(const LogicalCreateRTreeIndex &op,
const vector<LogicalType> &types, ClientContext &context) {
vector<unique_ptr<Expression>> filter_select_list;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class RTreeIndexInsertionRewriter : public OptimizerExtension {
void RTreeModule::RegisterIndexPlanCreate(DatabaseInstance &db) {
// Register the optimizer extension
db.config.optimizer_extensions.push_back(RTreeIndexInsertionRewriter());

db.config.operator_extensions.push_back(make_uniq<LogicalCreateRTreeIndexOperatorExtension>());
}

} // namespace core
Expand Down
64 changes: 64 additions & 0 deletions spatial/src/spatial/core/index/rtree/rtree_index_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,68 @@ static string RTreeIndexScanToString(const FunctionData *bind_data_p) {
return bind_data.table.name + " (RTREE INDEX SCAN : " + bind_data.index.GetIndexName() + ")";
}

//-------------------------------------------------------------------------
// De/Serialize
//-------------------------------------------------------------------------
static void RTreeScanSerialize(Serializer &serializer, const optional_ptr<FunctionData> bind_data_p,
const TableFunction &function) {
auto &bind_data = bind_data_p->Cast<RTreeIndexScanBindData>();
serializer.WriteProperty(100, "catalog", bind_data.table.schema.catalog.GetName());
serializer.WriteProperty(101, "schema", bind_data.table.schema.name);
serializer.WriteProperty(102, "table", bind_data.table.name);
serializer.WriteProperty(103, "index_name", bind_data.index.GetIndexName());

serializer.WriteObject(104, "bbox", [&](Serializer &ser){
ser.WriteProperty<float>(10, "min_x", bind_data.bbox.min.x);
ser.WriteProperty<float>(11, "min_y", bind_data.bbox.min.y);
ser.WriteProperty<float>(20, "max_x", bind_data.bbox.max.x);
ser.WriteProperty<float>(21, "max_y", bind_data.bbox.max.y);
});
}

static unique_ptr<FunctionData> RTreeScanDeserialize(Deserializer &deserializer, TableFunction &function) {
auto &context = deserializer.Get<ClientContext &>();

const auto catalog = deserializer.ReadProperty<string>(100, "catalog");
const auto schema = deserializer.ReadProperty<string>(101, "schema");
const auto table = deserializer.ReadProperty<string>(102, "table");
auto &catalog_entry =
Catalog::GetEntry<TableCatalogEntry>(context, catalog, schema, table);
if (catalog_entry.type != CatalogType::TABLE_ENTRY) {
throw SerializationException("Cant find table for %s.%s", schema, table);
}

// Now also lookup the index by name
const auto index_name = deserializer.ReadProperty<string>(103, "index_name");
RTreeBounds bbox;
deserializer.ReadObject(104, "bbox", [&](Deserializer &ser){
bbox.min.x = ser.ReadProperty<float>(10, "min_x");
bbox.min.y = ser.ReadProperty<float>(11, "min_y");
bbox.max.x = ser.ReadProperty<float>(20, "max_x");
bbox.max.y = ser.ReadProperty<float>(21, "max_y");
});

auto &duck_table = catalog_entry.Cast<DuckTableEntry>();
auto &table_info = *catalog_entry.GetStorage().GetDataTableInfo();

unique_ptr<RTreeIndexScanBindData> result = nullptr;

table_info.GetIndexes().BindAndScan<RTreeIndex>(context, table_info, [&](RTreeIndex &index_entry) {
if (index_entry.GetIndexName() == index_name) {
result = make_uniq<RTreeIndexScanBindData>(duck_table, index_entry, bbox);
return true;
}
return false;
});

if(!result) {
throw SerializationException("Could not find index %s on table %s.%s", index_name, schema, table);
}
return std::move(result);
}



//-------------------------------------------------------------------------
// Get Function
//-------------------------------------------------------------------------
Expand All @@ -150,6 +212,8 @@ TableFunction RTreeIndexScanFunction::GetFunction() {
func.projection_pushdown = true;
func.filter_pushdown = false;
func.get_bind_info = RTreeIndexScanBindInfo;
func.serialize = RTreeScanSerialize;
func.deserialize = RTreeScanDeserialize;

return func;
}
Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_basic.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (geom GEOMETRY);

Expand Down
9 changes: 6 additions & 3 deletions test/sql/index/rtree_basic_points.test
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 1_000_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 100_00, 1337);

query I
SELECT count(*) FROM t1 WHERE ST_Within(geom, ST_MakeEnvelope(450, 450, 650, 650));
----
352
390

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom);
Expand All @@ -20,4 +23,4 @@ physical_plan <REGEX>:.*RTREE_INDEX_SCAN.*
query I
SELECT count(*) FROM t1 WHERE ST_Within(geom, ST_MakeEnvelope(450, 450, 650, 650));
----
352
390
3 changes: 3 additions & 0 deletions test/sql/index/rtree_block_reclaim.test_slow
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ require spatial

load __TEST_DIR__/rtree_reclaim_space.db

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE tbl AS SELECT row_number() over () as i, geom::GEOMETRY as geom FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337) as pts(geom);

Expand Down
6 changes: 3 additions & 3 deletions test/sql/index/rtree_crud.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ require spatial

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 10000, 1337);

statement ok
INSERT INTO t1 (geom) VALUES ('POINT(1 1)');
Expand All @@ -23,12 +23,12 @@ statement ok
DROP INDEX my_idx;

statement ok
INSERT INTO t1 (geom) SELECT * FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 1000, 1337);
INSERT INTO t1 (geom) SELECT * FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 1000, max_y: 1000}::BOX_2D, 1000, 1337);

query I
SELECT count(*) FROM t1;
----
101000
11000

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom);
Expand Down
4 changes: 2 additions & 2 deletions test/sql/index/rtree_crud_noreinsert.test
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ require spatial

statement ok
CREATE TABLE t1 AS SELECT point::GEOMETRY as geom
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 100_000, 1337);
FROM st_generatepoints({min_x: 0, min_y: 0, max_x: 10000, max_y: 10000}::BOX_2D, 10000, 1337);

query I
SELECT count(*) FROM t1;
----
100000
10000

statement ok
CREATE INDEX my_idx ON t1 USING RTREE (geom) WITH (min_node_capacity = 0);
Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_empty.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1(i INT, g GEOMETRY);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_projection.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (id int, geom GEOMETRY);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_pushdown.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1 (geom GEOMETRY, id INT);

Expand Down
3 changes: 3 additions & 0 deletions test/sql/index/rtree_single.test
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
require spatial

statement ok
PRAGMA enable_verification;

statement ok
CREATE TABLE t1(i INT, g GEOMETRY);

Expand Down
Loading