Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

builtins: add timestamp to crdb_internal.scan/list_sql_keys_in_range #90956

Merged
merged 1 commit into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/generated/sql/functions.md
Original file line number Diff line number Diff line change
Expand Up @@ -1252,9 +1252,9 @@ the locality flag on node startup. Returns an error if no region is set.</p>
<tbody>
<tr><td><a name="aclexplode"></a><code>aclexplode(aclitems: <a href="string.html">string</a>[]) &rarr; tuple{oid AS grantor, oid AS grantee, string AS privilege_type, bool AS is_grantable}</code></td><td><span class="funcdesc"><p>Produces a virtual table containing aclitem stuff (returns no rows as this feature is unsupported in CockroachDB)</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="crdb_internal.scan"></a><code>crdb_internal.scan(span: <a href="bytes.html">bytes</a>[]) &rarr; tuple{bytes AS key, bytes AS value}</code></td><td><span class="funcdesc"><p>Returns the raw keys and values from the specified span</p>
<tr><td><a name="crdb_internal.scan"></a><code>crdb_internal.scan(span: <a href="bytes.html">bytes</a>[]) &rarr; tuple{bytes AS key, bytes AS value, string AS ts}</code></td><td><span class="funcdesc"><p>Returns the raw keys and values from the specified span</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="crdb_internal.scan"></a><code>crdb_internal.scan(start_key: <a href="bytes.html">bytes</a>, end_key: <a href="bytes.html">bytes</a>) &rarr; tuple{bytes AS key, bytes AS value}</code></td><td><span class="funcdesc"><p>Returns the raw keys and values from the specified span</p>
<tr><td><a name="crdb_internal.scan"></a><code>crdb_internal.scan(start_key: <a href="bytes.html">bytes</a>, end_key: <a href="bytes.html">bytes</a>) &rarr; tuple{bytes AS key, bytes AS value, string AS ts}</code></td><td><span class="funcdesc"><p>Returns the raw keys and values with their timestamp from the specified span</p>
</span></td><td>Stable</td></tr>
<tr><td><a name="crdb_internal.testing_callback"></a><code>crdb_internal.testing_callback(name: <a href="string.html">string</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>For internal CRDB testing only. The function calls a callback identified by <code>name</code> registered with the server by the test.</p>
</span></td><td>Volatile</td></tr>
Expand Down Expand Up @@ -3081,7 +3081,7 @@ active for the current transaction.</p>
</span></td><td>Volatile</td></tr>
<tr><td><a name="crdb_internal.lease_holder"></a><code>crdb_internal.lease_holder(key: <a href="bytes.html">bytes</a>) &rarr; <a href="int.html">int</a></code></td><td><span class="funcdesc"><p>This function is used to fetch the leaseholder corresponding to a request key</p>
</span></td><td>Volatile</td></tr>
<tr><td><a name="crdb_internal.list_sql_keys_in_range"></a><code>crdb_internal.list_sql_keys_in_range(range_id: <a href="int.html">int</a>) &rarr; tuple{string AS key, string AS value}</code></td><td><span class="funcdesc"><p>Returns all SQL K/V pairs within the requested range.</p>
<tr><td><a name="crdb_internal.list_sql_keys_in_range"></a><code>crdb_internal.list_sql_keys_in_range(range_id: <a href="int.html">int</a>) &rarr; tuple{string AS key, string AS value, string AS ts}</code></td><td><span class="funcdesc"><p>Returns all SQL K/V pairs within the requested range.</p>
</span></td><td>Volatile</td></tr>
<tr><td><a name="crdb_internal.locality_value"></a><code>crdb_internal.locality_value(key: <a href="string.html">string</a>) &rarr; <a href="string.html">string</a></code></td><td><span class="funcdesc"><p>Returns the value of the specified locality key.</p>
</span></td><td>Stable</td></tr>
Expand Down
6 changes: 3 additions & 3 deletions pkg/sql/sem/builtins/fixed_oids.go
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ var builtinOidsBySignature = map[string]oid.Oid{
`crdb_internal.kv_set_queue_active(queue_name: string, active: bool) -> bool`: 1383,
`crdb_internal.kv_set_queue_active(queue_name: string, active: bool, store_id: int) -> bool`: 1384,
`crdb_internal.lease_holder(key: bytes) -> int`: 1316,
`crdb_internal.list_sql_keys_in_range(range_id: int) -> tuple{string AS key, string AS value}`: 348,
`crdb_internal.list_sql_keys_in_range(range_id: int) -> tuple{string AS key, string AS value, string AS ts}`: 348,
`crdb_internal.locality_value(key: string) -> string`: 1292,
`crdb_internal.merge_statement_stats(input: jsonb[]) -> jsonb`: 1277,
`crdb_internal.merge_stats_metadata(input: jsonb[]) -> jsonb`: 1279,
Expand Down Expand Up @@ -466,8 +466,8 @@ var builtinOidsBySignature = map[string]oid.Oid{
`crdb_internal.revalidate_unique_constraints_in_table(table_name: string) -> void`: 1380,
`crdb_internal.round_decimal_values(val: decimal, scale: int) -> decimal`: 1342,
`crdb_internal.round_decimal_values(val: decimal[], scale: int) -> decimal[]`: 1343,
`crdb_internal.scan(start_key: bytes, end_key: bytes) -> tuple{bytes AS key, bytes AS value}`: 315,
`crdb_internal.scan(span: bytes[]) -> tuple{bytes AS key, bytes AS value}`: 316,
`crdb_internal.scan(start_key: bytes, end_key: bytes) -> tuple{bytes AS key, bytes AS value, string AS ts}`: 315,
`crdb_internal.scan(span: bytes[]) -> tuple{bytes AS key, bytes AS value, string AS ts}`: 316,
`crdb_internal.schedule_sql_stats_compaction() -> bool`: 1377,
`crdb_internal.serialize_session() -> bytes`: 1370,
`crdb_internal.set_compaction_concurrency(node_id: int, store_id: int, compaction_concurrency: int) -> bool`: 1389,
Expand Down
14 changes: 8 additions & 6 deletions pkg/sql/sem/builtins/generator_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ var generators = map[string]builtinDefinition{
EndKey: endKey,
}), nil
},
"Returns the raw keys and values from the specified span",
"Returns the raw keys and values with their timestamp from the specified span",
volatility.Stable,
),
makeGeneratorOverload(
Expand Down Expand Up @@ -2065,13 +2065,13 @@ const spanKeyIteratorChunkBytes = 8 << 20 // 8MiB
var rangeKeyIteratorType = types.MakeLabeledTuple(
// TODO(rohany): These could be bytes if we don't want to display the
// prettified versions of the key and value.
[]*types.T{types.String, types.String},
[]string{"key", "value"},
[]*types.T{types.String, types.String, types.String},
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder: Should we make this new column types.Decimal? That is the type of crdb_internal_mvcc_timestamp column is. I could go either way..

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In https://github.com/cockroachdb/cockroach/pull/90848/files#diff-ed2b0d994e60d34e9f853343fd43ee6e14ca234027786774a143e5b73b5a2698R111 im currently hashing the hlc.Timestamp.String() representation of an MVCC timestamp. As discussed offline I wanted uniformity between the output of this builtin and what we hash so that in the future we can write statements such as:

WITH sfp AS (SELECT xor_agg(fnv64(key, ts::BYTES, value)) AS scanfp from crdb_internal.scan(crdb_internal.table_span('t'::regclass::int)))
  SELECT count(*) FROM crdb_internal.fingerprint(crdb_internal.table_span('t'::regclass::int),'$before_insert'::TIMESTAMPTZ, false) AS efp, sfp WHERE efp = sfp.scanfp;

to test that our fingerprinting is working as expected. We could go either way here:

  1. hash the decimal representation of the timestamp and change scan to return a decimal instead
  2. hash the string representation of the timestamp and keep hashing behaviour what it is at the moment

In the interest of moving things ahead, I'll leave this as a string for now but while reviewing #90848 if we decide decimal is a better representation in both places, I'll make the switch there.

[]string{"key", "value", "ts"},
)

var spanKeyIteratorType = types.MakeLabeledTuple(
[]*types.T{types.Bytes, types.Bytes},
[]string{"key", "value"},
[]*types.T{types.Bytes, types.Bytes, types.String},
[]string{"key", "value", "ts"},
)

// spanKeyIterator is a ValueGenerator that iterates over all
Expand All @@ -2092,7 +2092,7 @@ type spanKeyIterator struct {
// index maintains the current position of the iterator in kvs.
index int
// A buffer to avoid allocating an array on every call to Values().
buf [2]tree.Datum
buf [3]tree.Datum
}

func newSpanKeyIterator(evalCtx *eval.Context, span roachpb.Span) *spanKeyIterator {
Expand Down Expand Up @@ -2166,6 +2166,7 @@ func (sp *spanKeyIterator) Values() (tree.Datums, error) {
kv := sp.kvs[sp.index]
sp.buf[0] = tree.NewDBytes(tree.DBytes(kv.Key))
sp.buf[1] = tree.NewDBytes(tree.DBytes(kv.Value.RawBytes))
sp.buf[2] = tree.NewDString(kv.Value.Timestamp.String())
return sp.buf[:], nil
}

Expand Down Expand Up @@ -2234,6 +2235,7 @@ func (rk *rangeKeyIterator) Values() (tree.Datums, error) {
kv := rk.kvs[rk.index]
rk.buf[0] = tree.NewDString(kv.Key.String())
rk.buf[1] = tree.NewDString(kv.Value.PrettyPrint())
rk.buf[2] = tree.NewDString(kv.Value.Timestamp.String())
return rk.buf[:], nil
}

Expand Down