Skip to content

Commit

Permalink
Merge #93757 #94122
Browse files Browse the repository at this point in the history
93757: trigram: support multi-byte string trigrams; perf improvements r=jordanlewis a=jordanlewis

Fixes #93744 
Related to #93830

- Add multi-byte character support
- Improve performance

```
name           old time/op    new time/op    delta
Similarity-32    1.72µs ± 0%    0.60µs ± 3%  -64.98%  (p=0.000 n=9+10)

name           old alloc/op   new alloc/op   delta
Similarity-32    1.32kB ± 0%    0.37kB ± 0%  -72.10%  (p=0.000 n=10+10)

name           old allocs/op  new allocs/op  delta
Similarity-32      15.0 ± 0%       6.0 ± 0%  -60.00%  (p=0.000 n=10+10)
```

Release note (sql change): previously, trigrams ignored multi-byte characters from input strings. This is now corrected.

94122: sql: implement the pg_timezone_names table r=rafiss a=otan

Informs #84505

Release note (sql change): Implement the `pg_timezone_names` pg_catalog table, which lists all supported timezones.

Co-authored-by: Jordan Lewis <[email protected]>
Co-authored-by: Oliver Tan <[email protected]>
  • Loading branch information
3 people committed Dec 23, 2022
3 parents b589379 + e4abe68 + e1be665 commit 4232883
Show file tree
Hide file tree
Showing 11 changed files with 220 additions and 35 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/logictest/testdata/logic_test/crdb_internal
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ pg_subscription_rel true
pg_tables false
pg_tablespace false
pg_timezone_abbrevs true
pg_timezone_names true
pg_timezone_names false
pg_transform true
pg_trigger true
pg_ts_config true
Expand Down
4 changes: 2 additions & 2 deletions pkg/sql/logictest/testdata/logic_test/crdb_internal_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ SELECT id, strip_volatile(descriptor) FROM crdb_internal.kv_catalog_descriptor
4294967009 {"table": {"columns": [{"id": 1, "name": "mapcfg", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 2, "name": "maptokentype", "nullable": true, "type": {"family": "IntFamily", "oid": 23, "width": 32}}, {"id": 3, "name": "mapseqno", "nullable": true, "type": {"family": "IntFamily", "oid": 23, "width": 32}}, {"id": 4, "name": "mapdict", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}], "formatVersion": 3, "id": 4294967009, "name": "pg_ts_config_map", "nextColumnId": 5, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967010 {"table": {"columns": [{"id": 1, "name": "oid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 2, "name": "tgrelid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 3, "name": "tgname", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 4, "name": "tgfoid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 5, "name": "tgtype", "nullable": true, "type": {"family": "IntFamily", "oid": 21, "width": 16}}, {"id": 6, "name": "tgenabled", "nullable": true, "type": {"family": "StringFamily", "oid": 18, "visibleType": 9, "width": 1}}, {"id": 7, "name": "tgisinternal", "nullable": true, "type": {"oid": 16}}, {"id": 8, "name": "tgconstrrelid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 9, "name": "tgconstrindid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 10, "name": "tgconstraint", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 11, "name": "tgdeferrable", "nullable": true, "type": {"oid": 16}}, {"id": 12, "name": "tginitdeferred", "nullable": true, "type": {"oid": 16}}, {"id": 13, "name": "tgnargs", "nullable": true, "type": {"family": "IntFamily", "oid": 21, "width": 16}}, {"id": 14, "name": "tgattr", "nullable": true, "type": {"arrayContents": {"family": "IntFamily", "oid": 21, "width": 16}, "arrayElemType": "IntFamily", "family": 200, "oid": 22, "width": 16}}, {"id": 15, "name": "tgargs", "nullable": true, "type": {"family": "BytesFamily", "oid": 17}}, {"id": 16, "name": "tgqual", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 17, "name": "tgoldtable", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 18, "name": "tgnewtable", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 19, "name": "tgparentid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}], "formatVersion": 3, "id": 4294967010, "name": "pg_trigger", "nextColumnId": 20, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967011 {"table": {"columns": [{"id": 1, "name": "oid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 2, "name": "trftype", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 3, "name": "trflang", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 4, "name": "trffromsql", "nullable": true, "type": {"family": "OidFamily", "oid": 24}}, {"id": 5, "name": "trftosql", "nullable": true, "type": {"family": "OidFamily", "oid": 24}}], "formatVersion": 3, "id": 4294967011, "name": "pg_transform", "nextColumnId": 6, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967012 {"table": {"columns": [{"id": 1, "name": "name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 2, "name": "abbrev", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "utc_offset", "nullable": true, "type": {"family": "IntervalFamily", "intervalDurationField": {}, "oid": 1186}}, {"id": 4, "name": "is_dst", "nullable": true, "type": {"oid": 16}}], "formatVersion": 3, "id": 4294967012, "name": "pg_timezone_names", "nextColumnId": 5, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967012 {"table": {"columns": [{"id": 1, "name": "name", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 2, "name": "abbrev", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 3, "name": "utc_offset", "nullable": true, "type": {"family": "IntervalFamily", "intervalDurationField": {}, "oid": 1186}}, {"id": 4, "name": "is_dst", "nullable": true, "type": {"oid": 16}}], "formatVersion": 3, "id": 4294967012, "indexes": [{"foreignKey": {}, "geoConfig": {}, "id": 2, "interleave": {}, "keyColumnDirections": ["ASC"], "keyColumnIds": [1], "keyColumnNames": ["name"], "name": "pg_timezone_names_name_idx", "partitioning": {}, "sharded": {}, "storeColumnIds": [2, 3, 4], "storeColumnNames": ["abbrev", "utc_offset", "is_dst"], "version": 3}], "name": "pg_timezone_names", "nextColumnId": 5, "nextConstraintId": 2, "nextIndexId": 3, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967013 {"table": {"columns": [{"id": 1, "name": "abbrev", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 2, "name": "utc_offset", "nullable": true, "type": {"family": "IntervalFamily", "intervalDurationField": {}, "oid": 1186}}, {"id": 3, "name": "is_dst", "nullable": true, "type": {"oid": 16}}], "formatVersion": 3, "id": 4294967013, "name": "pg_timezone_abbrevs", "nextColumnId": 4, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967014 {"table": {"columns": [{"id": 1, "name": "oid", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 2, "name": "spcname", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 3, "name": "spcowner", "nullable": true, "type": {"family": "OidFamily", "oid": 26}}, {"id": 4, "name": "spclocation", "nullable": true, "type": {"family": "StringFamily", "oid": 25}}, {"id": 5, "name": "spcacl", "nullable": true, "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}, {"id": 6, "name": "spcoptions", "nullable": true, "type": {"arrayContents": {"family": "StringFamily", "oid": 25}, "arrayElemType": "StringFamily", "family": "ArrayFamily", "oid": 1009}}], "formatVersion": 3, "id": 4294967014, "name": "pg_tablespace", "nextColumnId": 7, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
4294967015 {"table": {"columns": [{"id": 1, "name": "schemaname", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 2, "name": "tablename", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 3, "name": "tableowner", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 4, "name": "tablespace", "nullable": true, "type": {"family": 11, "oid": 19}}, {"id": 5, "name": "hasindexes", "nullable": true, "type": {"oid": 16}}, {"id": 6, "name": "hasrules", "nullable": true, "type": {"oid": 16}}, {"id": 7, "name": "hastriggers", "nullable": true, "type": {"oid": 16}}, {"id": 8, "name": "rowsecurity", "nullable": true, "type": {"oid": 16}}], "formatVersion": 3, "id": 4294967015, "name": "pg_tables", "nextColumnId": 9, "nextConstraintId": 2, "nextIndexId": 2, "nextMutationId": 1, "primaryIndex": {"constraintId": 1, "foreignKey": {}, "geoConfig": {}, "id": 1, "interleave": {}, "partitioning": {}, "sharded": {}}, "privileges": {"ownerProto": "node", "users": [{"privileges": 32, "userProto": "public"}], "version": 2}, "replacementOf": {"time": {}}, "unexposedParentSchemaId": 4294967129, "version": "1"}}
Expand Down Expand Up @@ -562,7 +562,7 @@ TableCommentType 4294967008 0 "pg_ts_config was created for compatibilit
TableCommentType 4294967009 0 "pg_ts_config_map was created for compatibility and is currently unimplemented"
TableCommentType 4294967010 0 "triggers (empty - feature does not exist)\nhttps://www.postgresql.org/docs/9.5/catalog-pg-trigger.html"
TableCommentType 4294967011 0 "pg_transform was created for compatibility and is currently unimplemented"
TableCommentType 4294967012 0 "pg_timezone_names was created for compatibility and is currently unimplemented"
TableCommentType 4294967012 0 "pg_timezone_names lists all the timezones that are supported by SET timezone"
TableCommentType 4294967013 0 "pg_timezone_abbrevs was created for compatibility and is currently unimplemented"
TableCommentType 4294967014 0 "available tablespaces (incomplete; concept inapplicable to CockroachDB)\nhttps://www.postgresql.org/docs/9.5/catalog-pg-tablespace.html"
TableCommentType 4294967015 0 "tables summary (see also information_schema.tables, pg_catalog.pg_class)\nhttps://www.postgresql.org/docs/9.5/view-pg-tables.html"
Expand Down
21 changes: 20 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/pg_catalog
Original file line number Diff line number Diff line change
Expand Up @@ -2424,7 +2424,7 @@ objoid classoid objsubid description
4294967009 4294967117 0 pg_ts_config_map was created for compatibility and is currently unimplemented
4294967010 4294967117 0 triggers (empty - feature does not exist)
4294967011 4294967117 0 pg_transform was created for compatibility and is currently unimplemented
4294967012 4294967117 0 pg_timezone_names was created for compatibility and is currently unimplemented
4294967012 4294967117 0 pg_timezone_names lists all the timezones that are supported by SET timezone
4294967013 4294967117 0 pg_timezone_abbrevs was created for compatibility and is currently unimplemented
4294967014 4294967117 0 available tablespaces (incomplete; concept inapplicable to CockroachDB)
4294967015 4294967117 0 tables summary (see also information_schema.tables, pg_catalog.pg_class)
Expand Down Expand Up @@ -4764,6 +4764,25 @@ WHERE
----
1

# pg_timezone_names

# Test using LIKE to scan the whole table.
# Use Honolulu as there is no DST.
query TTTB
SELECT * from pg_timezone_names WHERE name LIKE 'Pacific/Honolulu%'
----
Pacific/Honolulu HST -10:00:00 false

# Test with the virtual index.
query TTTB
SELECT * from pg_timezone_names WHERE name = 'Pacific/Honolulu'
----
Pacific/Honolulu HST -10:00:00 false

query TTTB
SELECT * from pg_timezone_names WHERE name = 'DoesNotExist'
----

# Regression test for incorrectly handling left anti virtual lookup joins
# (#88096).
statement ok
Expand Down
2 changes: 2 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/trigram_builtins
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ SELECT show_trgm(str) FROM (VALUES
('ab'),
('abc'),
('abcd'),
('Приветhi'),
(NULL)
) tbl(str)
----
Expand All @@ -13,6 +14,7 @@ SELECT show_trgm(str) FROM (VALUES
{" a"," ab","ab "}
{" a"," ab",abc,"bc "}
{" a"," ab",abc,bcd,"cd "}
{" п"," пр","hi ",вет,етh,иве,при,рив,тhi}
NULL

# Test that we sort the output trigrams.
Expand Down
28 changes: 27 additions & 1 deletion pkg/sql/logictest/testdata/logic_test/trigram_indexes
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ CREATE INDEX ON a USING GIST(t gist_trgm_ops)
statement ok
INSERT INTO a VALUES (1, 'foozoopa'),
(2, 'Foo'),
(3, 'blah')
(3, 'blah'),
(4, 'Приветhi')

query IT rowsort
SELECT * FROM a@a_t_idx WHERE t ILIKE '%Foo%'
Expand Down Expand Up @@ -84,6 +85,31 @@ query IT
SELECT * FROM a@a_t_idx WHERE t LIKE 'fblah'
----

query IT
SELECT * FROM a@a_t_idx WHERE t LIKE 'Приветhi'
----
4 Приветhi

query IT
SELECT * FROM a@a_t_idx WHERE t LIKE 'Привет%'
----
4 Приветhi

query IT
SELECT * FROM a@a_t_idx WHERE t LIKE 'Приве%'
----
4 Приветhi

query IT
SELECT * FROM a@a_t_idx WHERE t LIKE '%иве%'
----
4 Приветhi

query IT
SELECT * FROM a@a_t_idx WHERE t LIKE '%тhi%'
----
4 Приветhi

# Test the acceleration of the % similarity operator.
# By default, the threshold for searching is .3.
query FIT
Expand Down
39 changes: 37 additions & 2 deletions pkg/sql/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,10 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sqlerrors"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/sql/vtable"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/iterutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/errors"
"github.com/lib/pq/oid"
"golang.org/x/text/collate"
Expand Down Expand Up @@ -4118,12 +4120,45 @@ var pgCatalogStatioUserTablesTable = virtualSchemaTable{
}

var pgCatalogTimezoneNamesTable = virtualSchemaTable{
comment: "pg_timezone_names was created for compatibility and is currently unimplemented",
comment: "pg_timezone_names lists all the timezones that are supported by SET timezone",
schema: vtable.PgCatalogTimezoneNames,
populate: func(ctx context.Context, p *planner, _ catalog.DatabaseDescriptor, addRow func(...tree.Datum) error) error {
for _, tz := range timeutil.TimeZones() {
loc, err := timeutil.LoadLocation(tz)
if err != nil {
return err
}
if err := addRowForTimezoneNames(tz, p.extendedEvalCtx.StmtTimestamp.In(loc), addRow); err != nil {
return err
}
}
return nil
},
unimplemented: true,
indexes: []virtualIndex{
{
populate: func(ctx context.Context, unwrappedConstraint tree.Datum, p *planner, db catalog.DatabaseDescriptor,
addRow func(...tree.Datum) error,
) (bool, error) {
tz := string(tree.MustBeDString(unwrappedConstraint))
loc, err := timeutil.LoadLocation(tz)
if err != nil {
return false, nil //nolint:returnerrcheck
}
return true, addRowForTimezoneNames(tz, p.extendedEvalCtx.StmtTimestamp.In(loc), addRow)
},
},
},
}

func addRowForTimezoneNames(tz string, t time.Time, addRow func(...tree.Datum) error) error {
abbrev, offset := t.Zone()
utcOffsetInterval := duration.MakeDuration(int64(offset)*int64(time.Second), 0, 0)
return addRow(
tree.NewDString(tz), // name
tree.NewDString(abbrev), // abbrev
tree.NewDInterval(utcOffsetInterval, types.DefaultIntervalTypeMetadata), // utc_offset
tree.MakeDBool(tree.DBool(t.IsDST())), // is_dst
)
}

var pgCatalogTsDictTable = virtualSchemaTable{
Expand Down
9 changes: 5 additions & 4 deletions pkg/sql/pg_metadata_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,10 @@ var none = struct{}{}
var mappedPopulateFunctions = map[string]string{
// Currently pg_type cannot be found automatically by this code because it is
// not the populate function. Same for pg_proc.
"addPGTypeRow": "PGCatalogType",
"addPgProcUDFRow": "PGCatalogProc",
"addPgProcBuiltinRow": "PgCatalogProc",
"addPGTypeRow": "PGCatalogType",
"addPgProcUDFRow": "PGCatalogProc",
"addPgProcBuiltinRow": "PgCatalogProc",
"addRowForTimezoneNames": "PgCatalogTimezoneNames",
}

// schemaCodeFixer have specific configurations to fix the files with virtual
Expand Down Expand Up @@ -1580,7 +1581,7 @@ func validatePGCatalogCodeParser(t *testing.T) {
constants := readAllVTableConstants(t)
for _, vTableConstant := range constants {
if _, ok := pgCode.fixableTables[vTableConstant]; !ok {
t.Errorf("virtual table with constant %s cannot be fixed because could not found populate function", vTableConstant)
t.Errorf("virtual table with constant %s cannot be fixed because we could not find the populate function", vTableConstant)
}
}

Expand Down
5 changes: 3 additions & 2 deletions pkg/sql/vtable/pg_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1828,13 +1828,14 @@ CREATE TABLE pg_catalog.pg_statio_user_tables (
tidx_blks_hit INT
)`

// PgCatalogTimezoneNames is an empty table in the pg_catalog that is not implemented yet
// PgCatalogTimezoneNames describes the schema of pg_catalog.pg_timezone_names.
const PgCatalogTimezoneNames = `
CREATE TABLE pg_catalog.pg_timezone_names (
name STRING,
abbrev STRING,
utc_offset INTERVAL,
is_dst BOOL
is_dst BOOL,
INDEX (name)
)`

// PgCatalogPartitionedTable is an empty table in the pg_catalog that is not implemented yet
Expand Down
17 changes: 17 additions & 0 deletions pkg/util/timeutil/zoneinfo.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
package timeutil

import (
"sort"
"strings"
"sync"
"time"
// embed tzdata in case system tzdata is not available.
_ "time/tzdata"
Expand Down Expand Up @@ -44,3 +46,18 @@ func LoadLocation(name string) (*time.Location, error) {
}
return time.LoadLocation(name)
}

var tzsOnce sync.Once
var tzs []string

// TimeZones lists all supported timezones.
func TimeZones() []string {
tzsOnce.Do(func() {
tzs = make([]string, 0, len(lowercaseTimezones))
for _, tz := range lowercaseTimezones {
tzs = append(tzs, tz)
}
sort.Strings(tzs)
})
return tzs
}
Loading

0 comments on commit 4232883

Please sign in to comment.