Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
89524: backupccl: use klauspost gzip library r=benbardin a=benbardin

Expect some degree of speedup in manifest reading/writing, as in cockroachdb#88632.

Release note: None

89903: sql: make NOTHING and INDEX non-reserved keywords r=rafiss a=knz

Fixes cockroachdb#69420.

Release note (bug fix): It is now possible again to create tables,
views, columns, etc with the name `nothing` (e.g. `CREATE TABLE
nothing...`) without having to quote the name, like in
PostgreSQL. This bug was introduced in CockroachDB v2.0.

Release note (bug fix): It is now possible again to create tables,
views, columns, etc with the name `index` (e.g. `CREATE TABLE
index...`) without having to quote the name, like in
PostgreSQL. This bug was introduced in CockroachDB v1.0.


90000: workload/kv: fix --splits  r=ajwerner a=kvoli

Previously the `--splits` flag would generate splits assuming the kv
workload would always access keys in `[MinInt64, MaxInt64)`.
However, it was only true for the default uniform hash key generator.
While it was disabled for `--sequential` and `--zipfian` generates keys
in `[0, MaxInt64)`.

The previous `--split` flag also resulted in an integer overflow when
`--cycle-length` was greater than 0, only generating splits in
`[MinInt64, 0)`. This patch resolves the overflow issue.

This patch enables splits for `--sequential` and adjusts the `--splits`
logic to equally partition the keyspace, according to the access
distribution. The valid range for each is shown below.

uniform    `[MinInt64, MaxInt64)`
sequential `[0, --cycle-length)`
zipfian    `[0, MaxInt64)`

resolves cockroachdb#82906

Release note (cli change): In the kv workload enable `--splits` with the
`--sequential` flag and adjust splitting to uniformly partition the
keyspace.

Co-authored-by: Ben Bardin <[email protected]>
Co-authored-by: Raphael 'kena' Poss <[email protected]>
Co-authored-by: Austen McClernon <[email protected]>
  • Loading branch information
4 people committed Oct 17, 2022
4 parents dccfc47 + 14fa834 + d7e34e6 + 5a11c4a commit 1c02afc
Show file tree
Hide file tree
Showing 16 changed files with 485 additions and 71 deletions.
26 changes: 17 additions & 9 deletions docs/generated/sql/bnf/index_def.bnf
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
index_def ::=
'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'COVERING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'USING' 'HASH' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
Expand All @@ -15,5 +23,5 @@ index_def ::=
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'STORING' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' 'INCLUDE' '(' name_list ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' name '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' '(' index_elem ( ( ',' index_elem ) )* ')' ( 'PARTITION' ( 'ALL' | ) 'BY' partition_by_inner | ) opt_with_storage_parameter_list opt_where_clause opt_index_visible
11 changes: 9 additions & 2 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1127,6 +1127,7 @@ unreserved_keyword ::=
| 'INCREMENT'
| 'INCREMENTAL'
| 'INCREMENTAL_LOCATION'
| 'INDEX'
| 'INDEXES'
| 'INHERITS'
| 'INJECT'
Expand Down Expand Up @@ -1194,6 +1195,7 @@ unreserved_keyword ::=
| 'NEXT'
| 'NO'
| 'NORMAL'
| 'NOTHING'
| 'NO_INDEX_JOIN'
| 'NO_ZIGZAG_JOIN'
| 'NO_FULL_SCAN'
Expand Down Expand Up @@ -2615,6 +2617,8 @@ type_func_name_crdb_extra_keyword ::=

cockroachdb_extra_reserved_keyword ::=
'INDEX'
| 'INDEX'
| 'INDEX'
| 'NOTHING'

type_func_name_keyword ::=
Expand Down Expand Up @@ -3338,6 +3342,7 @@ interval_qualifier ::=
func_name ::=
type_function_name
| prefixed_column_path
| 'INDEX'

single_sort_clause ::=
'ORDER' 'BY' sortby
Expand Down Expand Up @@ -3729,9 +3734,11 @@ storage_parameter_key ::=
| 'SCONST'

index_def ::=
'INDEX' opt_index_name '(' index_params ')' opt_hash_sharded opt_storing opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
'INDEX' '(' index_params ')' opt_hash_sharded opt_storing opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INDEX' name '(' index_params ')' opt_hash_sharded opt_storing opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'UNIQUE' 'INDEX' opt_index_name '(' index_params ')' opt_hash_sharded opt_storing opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' opt_name '(' index_params ')' opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' '(' index_params ')' opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible
| 'INVERTED' 'INDEX' name '(' index_params ')' opt_partition_by_index opt_with_storage_parameter_list opt_where_clause opt_index_visible

like_table_option_list ::=
( ) ( ( 'INCLUDING' like_table_option | 'EXCLUDING' like_table_option ) )*
Expand Down
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,7 @@ ALL_TESTS = [
"//pkg/workload/cli:cli_test",
"//pkg/workload/faker:faker_test",
"//pkg/workload/histogram:histogram_test",
"//pkg/workload/kv:kv_test",
"//pkg/workload/movr:movr_test",
"//pkg/workload/rand:rand_test",
"//pkg/workload/tpcc:tpcc_test",
Expand Down Expand Up @@ -2114,6 +2115,7 @@ GO_TARGETS = [
"//pkg/workload/indexes:indexes",
"//pkg/workload/jsonload:jsonload",
"//pkg/workload/kv:kv",
"//pkg/workload/kv:kv_test",
"//pkg/workload/ledger:ledger",
"//pkg/workload/movr:movr",
"//pkg/workload/movr:movr_test",
Expand Down
1 change: 1 addition & 0 deletions pkg/ccl/backupccl/backupinfo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ go_library(
"//pkg/util/timeutil",
"//pkg/util/tracing",
"@com_github_cockroachdb_errors//:errors",
"@com_github_klauspost_compress//gzip",
],
)

Expand Down
7 changes: 5 additions & 2 deletions pkg/ccl/backupccl/backupinfo/manifest_handling.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ package backupinfo

import (
"bytes"
"compress/gzip"
"context"
"crypto/sha256"
"encoding/hex"
Expand Down Expand Up @@ -55,6 +54,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
gzip "github.com/klauspost/compress/gzip"
)

// Files that may appear in a backup directory.
Expand Down Expand Up @@ -189,7 +189,10 @@ func DecompressData(ctx context.Context, mem *mon.BoundAccount, descBytes []byte
if err != nil {
return nil, err
}
defer r.Close()
defer func() {
// Swallow any errors, this is only a read operation.
_ = r.Close()
}()
return mon.ReadAll(ctx, ioctx.ReaderAdapter(r), mem)
}

Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/docgen/diagrams.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,12 @@ func runParse(
return nil, errors.Wrap(err, "inline")
}
b, err := g.ExtractProduction(topStmt, descend, nosplit, match, exclude)
b = bytes.Replace(b, []byte("NOTHING_AFTER_RETURNING"), []byte("NOTHING"), -1)
b = bytes.Replace(b, []byte("'IDENT'"), []byte("'identifier'"), -1)
b = bytes.Replace(b, []byte("_LA"), []byte(""), -1)
b = bytes.Replace(b, []byte("INDEX_BEFORE_PAREN"), []byte("INDEX"), -1)
b = bytes.Replace(b, []byte("INDEX_BEFORE_NAME_THEN_PAREN"), []byte("INDEX"), -1)
b = bytes.Replace(b, []byte("INDEX_AFTER_ORDER_BY_BEFORE_AT"), []byte("INDEX"), -1)
return b, err
}

Expand Down
125 changes: 125 additions & 0 deletions pkg/sql/parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,131 @@ func (l *lexer) Lex(lval *sqlSymType) int {
*lval = l.tokens[l.lastPos]

switch lval.id {
case NOTHING:
// Introducing the "RETURNING NOTHING" syntax in CockroachDB
// was a terrible idea, given that it is not even used any more!
// We should really deprecate it and remove this special case.
if l.lastPos > 0 && l.tokens[l.lastPos-1].id == RETURNING {
lval.id = NOTHING_AFTER_RETURNING
}
case INDEX:
// The following complex logic is a consternation, really.
//
// It flows from a profoundly mistaken decision to allow the INDEX
// keyword inside the column definition list of CREATE, a place
// where PostgreSQL did not allow it, for a very good reason:
// applications legitimately want to name columns with the name
// "index".
//
// After this mistaken decision was first made, the INDEX keyword
// was also allowed in CockroachDB in another place where it is
// partially ambiguous with other identifiers: ORDER BY
// (`ORDER BY INDEX foo@bar`, ambiguous with `ORDER BY index`).
//
// Sadly it took a very long time before we realized this mistake,
// and by that time these uses of INDEX have become legitimate
// CockroachDB features.
//
// We are thus left with the need to disambiguate between:
//
// CREATE TABLE t(index a) -- column name "index", column type "a"
// CREATE TABLE t(index (a)) -- keyword INDEX, column name "a"
// CREATE TABLE t(index a (b)) -- keyword INDEX, index name "a", column name "b"
//
// Thankfully, a coldef for a column named "index" and an index
// specification differ unambiguously, *given sufficient
// lookaheaed*: an index specification always has an open '('
// after INDEX, with or without an identifier in-between. A column
// definition never has this.
//
// Likewise, between:
//
// ORDER BY index
// ORDER BY index a@idx
// ORDER BY index a.b@idx
// ORDER BY index a.b.c@idx
//
// We can unambiguously distinguish by the presence of the '@' sign
// with a maximum of 6 token lookahead.
//
var pprevID, prevID int32
if l.lastPos > 0 {
prevID = l.tokens[l.lastPos-1].id
}
if l.lastPos > 1 {
pprevID = l.tokens[l.lastPos-2].id
}
var nextID, secondID int32
if l.lastPos+1 < len(l.tokens) {
nextID = l.tokens[l.lastPos+1].id
}
if l.lastPos+2 < len(l.tokens) {
secondID = l.tokens[l.lastPos+2].id
}
afterCommaOrParen := prevID == ',' || prevID == '('
afterCommaOrOPTIONS := prevID == ',' || prevID == OPTIONS
afterCommaOrParenThenINVERTED := prevID == INVERTED && (pprevID == ',' || pprevID == '(')
followedByParen := nextID == '('
followedByNonPunctThenParen := nextID > 255 /* non-punctuation */ && secondID == '('
if //
// CREATE ... (INDEX (
// CREATE ... (x INT, y INT, INDEX (
(afterCommaOrParen && followedByParen) ||
// SCRUB ... WITH OPTIONS INDEX (...
// SCRUB ... WITH OPTIONS a, INDEX (...
(afterCommaOrOPTIONS && followedByParen) ||
// CREATE ... (INVERTED INDEX (
// CREATE ... (x INT, y INT, INVERTED INDEX (
(afterCommaOrParenThenINVERTED && followedByParen) {
lval.id = INDEX_BEFORE_PAREN
break
}
if //
// CREATE ... (INDEX abc (
// CREATE ... (x INT, y INT, INDEX abc (
(afterCommaOrParen && followedByNonPunctThenParen) ||
// CREATE ... (INVERTED INDEX abc (
// CREATE ... (x INT, y INT, INVERTED INDEX abc (
(afterCommaOrParenThenINVERTED && followedByNonPunctThenParen) {
lval.id = INDEX_BEFORE_NAME_THEN_PAREN
break
}
// The rules above all require that the INDEX keyword be
// followed ultimately by an open parenthesis, with no '@'
// in-between. The rule below is strictly exclusive with this
// situation.
afterCommaOrOrderBy := prevID == ',' || (prevID == BY && pprevID == ORDER)
if afterCommaOrOrderBy {
// SORT BY INDEX <objname> @
// SORT BY a, b, INDEX <objname> @
atSignAfterObjectName := false
// An object name has one of the following forms:
// name
// name.name
// name.name.name
// So it is between 1 and 5 tokens in length.
for i := l.lastPos + 1; i < len(l.tokens) && i < l.lastPos+7; i++ {
curToken := l.tokens[i].id
// An object name can only contain keyword/identifiers, and
// the punctuation '.'.
if curToken < 255 /* not ident/keyword */ && curToken != '.' && curToken != '@' {
// Definitely not object name.
break
}
if curToken == '@' {
if i == l.lastPos+1 {
/* The '@' cannot follow the INDEX keyword directly. */
break
}
atSignAfterObjectName = true
break
}
}
if atSignAfterObjectName {
lval.id = INDEX_AFTER_ORDER_BY_BEFORE_AT
}
}

case NOT, WITH, AS, GENERATED, NULLS, RESET, ROLE, USER, ON, TENANT, SET:
nextToken := sqlSymType{}
if l.lastPos+1 < len(l.tokens) {
Expand Down
Loading

0 comments on commit 1c02afc

Please sign in to comment.