Merge #24807 #24826 #24863 #24865

24807: doc: fix some typos and update outdated description r=justinj a=yaojingguo fix some typos and update outdated description. Release note: None 24826: engine: clean up EncodeKey function r=petermattis a=yaojingguo 1. Remove duplicated declaration of EncodeKey in db.h. 2. Correct the wrong description of EncodeKey. 3. Remove the unnecessary 1 byte reserved space for string s. Release note: None 24863: doc: fix broken link to Raft SSTable Sideloading RFC r=knz a=yaojingguo Release note: None 24865: opt: fix and reenable stats test r=RaduBerinde a=RaduBerinde The stats test was flaky because we were depending on the asynchronous stat cache invalidation to happen by the time we expect to see the statistic. Fixing by using a different table. Fixes #24773. Release note: None Co-authored-by: Jingguo Yao <[email protected]> Co-authored-by: Radu Berinde <[email protected]>
cockroachdb · Apr 17, 2018 · f75fa39 · f75fa39
5 parents e7d98d0 + a032d80 + b5e90fd + 8b6940f + dc9efa8
commit f75fa39
Show file tree

Hide file tree

Showing 9 changed files with 58 additions and 56 deletions.
diff --git a/c-deps/libroach/ccl/db.cc b/c-deps/libroach/ccl/db.cc
@@ -7,6 +7,7 @@
 //     https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
 
 #include "../db.h"
+#include "../encoding.h"
 #include <iostream>
 #include <libroachccl.h>
 #include <memory>

diff --git a/c-deps/libroach/db.h b/c-deps/libroach/db.h
@@ -61,11 +61,6 @@ inline std::string ToString(DBString s) { return std::string(s.data, s.len); }
 inline rocksdb::Slice ToSlice(DBSlice s) { return rocksdb::Slice(s.data, s.len); }
 inline rocksdb::Slice ToSlice(DBString s) { return rocksdb::Slice(s.data, s.len); }
 
-// MVCC keys are encoded as <key>[<wall_time>[<logical>]]<#timestamp-bytes>. A
-// custom RocksDB comparator (DBComparator) is used to maintain the desired
-// ordering as these keys do not sort lexicographically correctly.
-std::string EncodeKey(DBKey k);
-
 // MVCCComputeStatsInternal returns the mvcc stats of the data in an iterator.
 // Stats are only computed for keys between the given range.
 MVCCStatsResult MVCCComputeStatsInternal(::rocksdb::Iterator* const iter_rep, DBKey start,

diff --git a/c-deps/libroach/encoding.cc b/c-deps/libroach/encoding.cc
@@ -158,7 +158,7 @@ std::string EncodeTimestamp(DBTimestamp ts) {
   return s;
 }
 
-// MVCC keys are encoded as <key>[<wall_time>[<logical>]]<#timestamp-bytes>. A
+// MVCC keys are encoded as <key>\x00[<wall_time>[<logical>]]<#timestamp-bytes>. A
 // custom RocksDB comparator (DBComparator) is used to maintain the desired
 // ordering as these keys do not sort lexicographically correctly.
 std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logical) {
@@ -176,7 +176,7 @@ std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logi
   return s;
 }
 
-// MVCC keys are encoded as <key>[<wall_time>[<logical>]]<#timestamp-bytes>. A
+// MVCC keys are encoded as <key>\x00[<wall_time>[<logical>]]<#timestamp-bytes>. A
 // custom RocksDB comparator (DBComparator) is used to maintain the desired
 // ordering as these keys do not sort lexicographically correctly.
 std::string EncodeKey(DBKey k) { return EncodeKey(ToSlice(k.key), k.wall_time, k.logical); }

diff --git a/c-deps/libroach/encoding.h b/c-deps/libroach/encoding.h
@@ -56,12 +56,12 @@ const int kMVCCVersionTimestampSize = 12;
 void EncodeTimestamp(std::string& s, int64_t wall_time, int32_t logical);
 std::string EncodeTimestamp(DBTimestamp ts);
 
-// MVCC keys are encoded as <key>[<wall_time>[<logical>]]<#timestamp-bytes>. A
+// MVCC keys are encoded as <key>\x00[<wall_time>[<logical>]]<#timestamp-bytes>. A
 // custom RocksDB comparator (DBComparator) is used to maintain the desired
 // ordering as these keys do not sort lexicographically correctly.
 std::string EncodeKey(const rocksdb::Slice& key, int64_t wall_time, int32_t logical);
 
-// MVCC keys are encoded as <key>[<wall_time>[<logical>]]<#timestamp-bytes>. A
+// MVCC keys are encoded as <key>\x00[<wall_time>[<logical>]]<#timestamp-bytes>. A
 // custom RocksDB comparator (DBComparator) is used to maintain the desired
 // ordering as these keys do not sort lexicographically correctly.
 std::string EncodeKey(DBKey k);

diff --git a/docs/RFCS/20160720_backup_restore.md b/docs/RFCS/20160720_backup_restore.md
@@ -155,7 +155,7 @@ intermediate state.
 
 An alternate, much faster method that doesn't store the data bytes in the raft
 log, and thus avoids the write amplification, is described in the [Raft SSTable
-Sideloading RFC](raft_sstable_sideloading.md).
+Sideloading RFC](20170601_raft_sstable_sideloading.md).
 
 The restored table's NULL, DEFAULT, and PRIMARY KEY constraints do not need to
 be checked. NOT NULL and CHECK are verified for each row as it is inserted. The

diff --git a/docs/tech-notes/life_of_a_query.md b/docs/tech-notes/life_of_a_query.md
@@ -458,7 +458,7 @@ Now let's see how these `planNode`s run:
    from SQL92/99. Another interesting fact is that, if we're sorting by a
    non-trivial expression (e.g. `SELECT a, b ... ORDER BY a + b`), we
    need the `a + b` values (for every row) to be produced by a
-   lower-level node. This is achieved through a patter that's also
+   lower-level node. This is achieved through a pattern that's also
    present in other node: the lower node capable of evaluating
    expressions and rendering their results is the `renderNode`; the
    `sortNode` constructor checks if the expressions it needs are already

diff --git a/docs/tech-notes/sql-principles.md b/docs/tech-notes/sql-principles.md
@@ -134,7 +134,7 @@ Some examples in the second group:
 | The overall latency of a query is solely decided by the number of rows processed in memory and the number of page accesses on disk. | Network effects and implicit retries dominate. |
 | The costing estimates use a fixed ratio between disk and CPU performance. | Each node in a cluster can have a different CPU/disk performance ratio. Network performance evolves over time. |
 | Each area of contiguous storage contains data from at most one table or one index. Storage locality maps 1-to-1 to query locality. | [Interleaved tables.](../RFCS/20160624_sql_interleaved_tables.md) |
-| A logical range of values, in a SQL query, corresponds to a contiguous area of storage when the columns are indexed. | Partitioned tables. (soon) |
+| A logical range of values, in a SQL query, corresponds to a contiguous area of storage when the columns are indexed. | [Partitioned tables.](../RFCS/20170921_sql_partitioning.md) |
 
 For this second group, original research will be needed to construct,
 from the ground up, CockroachDB-specific replacements to the best

diff --git a/docs/tech-notes/sql.md b/docs/tech-notes/sql.md
@@ -64,7 +64,7 @@ For example, if the architecture calls out a thing called e.g. "query
 runner", which takes as input a logical query plan (a data structure)
 and outputs result rows (another data structure), you'd usually expect
 a thing in the source code called "query runner" that looks like a
-class whose instances would carry the execution's internal state, a
+class whose instances would carry the execution's internal state
 providing some methods that take a logical plan as input, and returning
 result rows as results.
 

diff --git a/pkg/sql/opt/exec/execbuilder/testdata/stats b/pkg/sql/opt/exec/execbuilder/testdata/stats
@@ -1,49 +1,55 @@
-# # Tests that verify we retrieve the stats correctly.
+# Tests that verify we retrieve the stats correctly.
 
-# exec-raw
-# CREATE DATABASE t
-# ----
+exec-raw
+CREATE DATABASE t
+----
 
-# exec-raw
-# CREATE TABLE t.a (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
-# INSERT INTO t.a VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7)
-# ----
+exec-raw
+CREATE TABLE t.a (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
+----
 
-# build
-# SELECT * FROM t.a
-# ----
-# project
-#  ├── columns: u:1(int) v:2(int)
-#  ├── stats: [rows=1000]
-#  ├── cost: 1000.00
-#  ├── scan a
-#  │    ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null)
-#  │    ├── stats: [rows=1000]
-#  │    ├── cost: 1000.00
-#  │    └── keys: (3)
-#  └── projections [outer=(1,2)]
-#       ├── variable: a.u [type=int, outer=(1)]
-#       └── variable: a.v [type=int, outer=(2)]
+build
+SELECT * FROM t.a
+----
+project
+ ├── columns: u:1(int) v:2(int)
+ ├── stats: [rows=1000]
+ ├── cost: 1000.00
+ ├── scan a
+ │    ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null)
+ │    ├── stats: [rows=1000]
+ │    ├── cost: 1000.00
+ │    └── keys: (3)
+ └── projections [outer=(1,2)]
+      ├── variable: a.u [type=int, outer=(1)]
+      └── variable: a.v [type=int, outer=(2)]
 
-# exec-raw
-# CREATE STATISTICS u ON u FROM t.a
-# ----
+# Create a new table to avoid depending on the asynchronous stat cache
+# invalidation.
+exec-raw
+CREATE TABLE t.b (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
+INSERT INTO t.b VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7)
+----
 
-# build
-# SELECT * FROM t.a
-# ----
-# project
-#  ├── columns: u:1(int) v:2(int)
-#  ├── stats: [rows=8]
-#  ├── cost: 8.00
-#  ├── scan a
-#  │    ├── columns: a.u:1(int) a.v:2(int) a.rowid:3(int!null)
-#  │    ├── stats: [rows=8]
-#  │    ├── cost: 8.00
-#  │    └── keys: (3)
-#  └── projections [outer=(1,2)]
-#       ├── variable: a.u [type=int, outer=(1)]
-#       └── variable: a.v [type=int, outer=(2)]
+exec-raw
+CREATE STATISTICS u ON u FROM t.b
+----
 
-# # TODO(radu): once we use cardinality, verify we choose the index with the
-# # smaller cardinality (for a WHERE condition on both columns).
+build
+SELECT * FROM t.b
+----
+project
+ ├── columns: u:1(int) v:2(int)
+ ├── stats: [rows=8, distinct(1)=2]
+ ├── cost: 8.00
+ ├── scan b
+ │    ├── columns: b.u:1(int) b.v:2(int) b.rowid:3(int!null)
+ │    ├── stats: [rows=8, distinct(1)=2]
+ │    ├── cost: 8.00
+ │    └── keys: (3)
+ └── projections [outer=(1,2)]
+      ├── variable: b.u [type=int, outer=(1)]
+      └── variable: b.v [type=int, outer=(2)]
+
+# TODO(radu): once we use cardinality, verify we choose the index with the
+# smaller cardinality (for a WHERE condition on both columns).