diff --git a/c-deps/libroach/ccl/db.cc b/c-deps/libroach/ccl/db.cc index 3bf0b9c1c749..e6b0801cbf28 100644 --- a/c-deps/libroach/ccl/db.cc +++ b/c-deps/libroach/ccl/db.cc @@ -7,6 +7,7 @@ // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt #include "../db.h" +#include "../encoding.h" #include #include #include diff --git a/c-deps/libroach/db.h b/c-deps/libroach/db.h index 452e1b3e2803..33d48043e43e 100644 --- a/c-deps/libroach/db.h +++ b/c-deps/libroach/db.h @@ -61,11 +61,6 @@ inline std::string ToString(DBString s) { return std::string(s.data, s.len); } inline rocksdb::Slice ToSlice(DBSlice s) { return rocksdb::Slice(s.data, s.len); } inline rocksdb::Slice ToSlice(DBString s) { return rocksdb::Slice(s.data, s.len); } -// MVCC keys are encoded as [[]]<#timestamp-bytes>. A -// custom RocksDB comparator (DBComparator) is used to maintain the desired -// ordering as these keys do not sort lexicographically correctly. -std::string EncodeKey(DBKey k); - // MVCCComputeStatsInternal returns the mvcc stats of the data in an iterator. // Stats are only computed for keys between the given range. MVCCStatsResult MVCCComputeStatsInternal(::rocksdb::Iterator* const iter_rep, DBKey start, diff --git a/c-deps/libroach/encoding.cc b/c-deps/libroach/encoding.cc index b6623b72dca7..704f4f60c71b 100644 --- a/c-deps/libroach/encoding.cc +++ b/c-deps/libroach/encoding.cc @@ -158,7 +158,7 @@ std::string EncodeTimestamp(DBTimestamp ts) { return s; } -// MVCC keys are encoded as [[]]<#timestamp-bytes>. A +// MVCC keys are encoded as \x00[[]]<#timestamp-bytes>. A // custom RocksDB comparator (DBComparator) is used to maintain the desired // ordering as these keys do not sort lexicographically correctly. std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logical) { @@ -176,7 +176,7 @@ std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logi return s; } -// MVCC keys are encoded as [[]]<#timestamp-bytes>. A +// MVCC keys are encoded as \x00[[]]<#timestamp-bytes>. A // custom RocksDB comparator (DBComparator) is used to maintain the desired // ordering as these keys do not sort lexicographically correctly. std::string EncodeKey(DBKey k) { return EncodeKey(ToSlice(k.key), k.wall_time, k.logical); } diff --git a/c-deps/libroach/encoding.h b/c-deps/libroach/encoding.h index 2fc42f4e07f5..b0ab0eedb6d9 100644 --- a/c-deps/libroach/encoding.h +++ b/c-deps/libroach/encoding.h @@ -56,12 +56,12 @@ const int kMVCCVersionTimestampSize = 12; void EncodeTimestamp(std::string& s, int64_t wall_time, int32_t logical); std::string EncodeTimestamp(DBTimestamp ts); -// MVCC keys are encoded as [[]]<#timestamp-bytes>. A +// MVCC keys are encoded as \x00[[]]<#timestamp-bytes>. A // custom RocksDB comparator (DBComparator) is used to maintain the desired // ordering as these keys do not sort lexicographically correctly. std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logical); -// MVCC keys are encoded as [[]]<#timestamp-bytes>. A +// MVCC keys are encoded as \x00[[]]<#timestamp-bytes>. A // custom RocksDB comparator (DBComparator) is used to maintain the desired // ordering as these keys do not sort lexicographically correctly. std::string EncodeKey(DBKey k); diff --git a/docs/RFCS/20160720_backup_restore.md b/docs/RFCS/20160720_backup_restore.md index 56106f1914bf..8f535be15d00 100644 --- a/docs/RFCS/20160720_backup_restore.md +++ b/docs/RFCS/20160720_backup_restore.md @@ -155,7 +155,7 @@ intermediate state. An alternate, much faster method that doesn't store the data bytes in the raft log, and thus avoids the write amplification, is described in the [Raft SSTable -Sideloading RFC](raft_sstable_sideloading.md). +Sideloading RFC](20170601_raft_sstable_sideloading.md). The restored table's NULL, DEFAULT, and PRIMARY KEY constraints do not need to be checked. NOT NULL and CHECK are verified for each row as it is inserted. The diff --git a/docs/tech-notes/life_of_a_query.md b/docs/tech-notes/life_of_a_query.md index 9f16e0bdb33b..f5b2488863a2 100644 --- a/docs/tech-notes/life_of_a_query.md +++ b/docs/tech-notes/life_of_a_query.md @@ -458,7 +458,7 @@ Now let's see how these `planNode`s run: from SQL92/99. Another interesting fact is that, if we're sorting by a non-trivial expression (e.g. `SELECT a, b ... ORDER BY a + b`), we need the `a + b` values (for every row) to be produced by a - lower-level node. This is achieved through a patter that's also + lower-level node. This is achieved through a pattern that's also present in other node: the lower node capable of evaluating expressions and rendering their results is the `renderNode`; the `sortNode` constructor checks if the expressions it needs are already diff --git a/docs/tech-notes/sql-principles.md b/docs/tech-notes/sql-principles.md index 7ce5f890152d..a6817faa2a57 100644 --- a/docs/tech-notes/sql-principles.md +++ b/docs/tech-notes/sql-principles.md @@ -134,7 +134,7 @@ Some examples in the second group: | The overall latency of a query is solely decided by the number of rows processed in memory and the number of page accesses on disk. | Network effects and implicit retries dominate. | | The costing estimates use a fixed ratio between disk and CPU performance. | Each node in a cluster can have a different CPU/disk performance ratio. Network performance evolves over time. | | Each area of contiguous storage contains data from at most one table or one index. Storage locality maps 1-to-1 to query locality. | [Interleaved tables.](../RFCS/20160624_sql_interleaved_tables.md) | -| A logical range of values, in a SQL query, corresponds to a contiguous area of storage when the columns are indexed. | Partitioned tables. (soon) | +| A logical range of values, in a SQL query, corresponds to a contiguous area of storage when the columns are indexed. | [Partitioned tables.](../RFCS/20170921_sql_partitioning.md) | For this second group, original research will be needed to construct, from the ground up, CockroachDB-specific replacements to the best diff --git a/docs/tech-notes/sql.md b/docs/tech-notes/sql.md index 038427ddbee9..30599e7a275c 100644 --- a/docs/tech-notes/sql.md +++ b/docs/tech-notes/sql.md @@ -64,7 +64,7 @@ For example, if the architecture calls out a thing called e.g. "query runner", which takes as input a logical query plan (a data structure) and outputs result rows (another data structure), you'd usually expect a thing in the source code called "query runner" that looks like a -class whose instances would carry the execution's internal state, a +class whose instances would carry the execution's internal state providing some methods that take a logical plan as input, and returning result rows as results. diff --git a/pkg/sql/opt/exec/execbuilder/testdata/stats b/pkg/sql/opt/exec/execbuilder/testdata/stats index 1c91b405da17..f4ca0abe7636 100644 --- a/pkg/sql/opt/exec/execbuilder/testdata/stats +++ b/pkg/sql/opt/exec/execbuilder/testdata/stats @@ -1,49 +1,55 @@ -# # Tests that verify we retrieve the stats correctly. +# Tests that verify we retrieve the stats correctly. -# exec-raw -# CREATE DATABASE t -# ---- +exec-raw +CREATE DATABASE t +---- -# exec-raw -# CREATE TABLE t.a (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u)); -# INSERT INTO t.a VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7) -# ---- +exec-raw +CREATE TABLE t.a (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u)); +---- -# build -# SELECT * FROM t.a -# ---- -# project -# ├── columns: u:1(int) v:2(int) -# ├── stats: [rows=1000] -# ├── cost: 1000.00 -# ├── scan a -# │ ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null) -# │ ├── stats: [rows=1000] -# │ ├── cost: 1000.00 -# │ └── keys: (3) -# └── projections [outer=(1,2)] -# ├── variable: a.u [type=int, outer=(1)] -# └── variable: a.v [type=int, outer=(2)] +build +SELECT * FROM t.a +---- +project + ├── columns: u:1(int) v:2(int) + ├── stats: [rows=1000] + ├── cost: 1000.00 + ├── scan a + │ ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null) + │ ├── stats: [rows=1000] + │ ├── cost: 1000.00 + │ └── keys: (3) + └── projections [outer=(1,2)] + ├── variable: a.u [type=int, outer=(1)] + └── variable: a.v [type=int, outer=(2)] -# exec-raw -# CREATE STATISTICS u ON u FROM t.a -# ---- +# Create a new table to avoid depending on the asynchronous stat cache +# invalidation. +exec-raw +CREATE TABLE t.b (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u)); +INSERT INTO t.b VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7) +---- -# build -# SELECT * FROM t.a -# ---- -# project -# ├── columns: u:1(int) v:2(int) -# ├── stats: [rows=8] -# ├── cost: 8.00 -# ├── scan a -# │ ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null) -# │ ├── stats: [rows=8] -# │ ├── cost: 8.00 -# │ └── keys: (3) -# └── projections [outer=(1,2)] -# ├── variable: a.u [type=int, outer=(1)] -# └── variable: a.v [type=int, outer=(2)] +exec-raw +CREATE STATISTICS u ON u FROM t.b +---- -# # TODO(radu): once we use cardinality, verify we choose the index with the -# # smaller cardinality (for a WHERE condition on both columns). +build +SELECT * FROM t.b +---- +project + ├── columns: u:1(int) v:2(int) + ├── stats: [rows=8, distinct(1)=2] + ├── cost: 8.00 + ├── scan b + │ ├── columns: b.u:1(int) b.v:2(int) b.rowid:3(int!null) + │ ├── stats: [rows=8, distinct(1)=2] + │ ├── cost: 8.00 + │ └── keys: (3) + └── projections [outer=(1,2)] + ├── variable: b.u [type=int, outer=(1)] + └── variable: b.v [type=int, outer=(2)] + +# TODO(radu): once we use cardinality, verify we choose the index with the +# smaller cardinality (for a WHERE condition on both columns).