-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
crdb_internal.go
4315 lines (4055 loc) · 144 KB
/
crdb_internal.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2017 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package sql
import (
"bytes"
"context"
"fmt"
"net"
"net/url"
"sort"
"strconv"
"strings"
"time"
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/build"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/config"
"github.com/cockroachdb/cockroach/pkg/config/zonepb"
"github.com/cockroachdb/cockroach/pkg/gossip"
"github.com/cockroachdb/cockroach/pkg/jobs"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/kv/kvclient"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/liveness/livenesspb"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/security"
"github.com/cockroachdb/cockroach/pkg/server/serverpb"
"github.com/cockroachdb/cockroach/pkg/server/status/statuspb"
"github.com/cockroachdb/cockroach/pkg/server/telemetry"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catalogkv"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catconstants"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/catformat"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/colinfo"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/dbdesc"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/schemadesc"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/schemaexpr"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/tabledesc"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/typedesc"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/privilege"
"github.com/cockroachdb/cockroach/pkg/sql/roleoption"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/sem/builtins"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sessiondata"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/duration"
"github.com/cockroachdb/cockroach/pkg/util/errorutil"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/stop"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/tracing"
"github.com/cockroachdb/errors"
)
// CrdbInternalName is the name of the crdb_internal schema.
const CrdbInternalName = sessiondata.CRDBInternalSchemaName
// Naming convention:
// - if the response is served from memory, prefix with node_
// - if the response is served via a kv request, prefix with kv_
// - if the response is not from kv requests but is cluster-wide (i.e. the
// answer isn't specific to the sql connection being used, prefix with cluster_.
//
// Adding something new here will require an update to `pkg/cli` for inclusion in
// a `debug zip`; the unit tests will guide you.
//
// Many existing tables don't follow the conventions above, but please apply
// them to future additions.
var crdbInternal = virtualSchema{
name: CrdbInternalName,
tableDefs: map[descpb.ID]virtualSchemaDef{
catconstants.CrdbInternalBackwardDependenciesTableID: crdbInternalBackwardDependenciesTable,
catconstants.CrdbInternalBuildInfoTableID: crdbInternalBuildInfoTable,
catconstants.CrdbInternalBuiltinFunctionsTableID: crdbInternalBuiltinFunctionsTable,
catconstants.CrdbInternalClusterContentionEventsTableID: crdbInternalClusterContentionEventsTable,
catconstants.CrdbInternalClusterQueriesTableID: crdbInternalClusterQueriesTable,
catconstants.CrdbInternalClusterTransactionsTableID: crdbInternalClusterTxnsTable,
catconstants.CrdbInternalClusterSessionsTableID: crdbInternalClusterSessionsTable,
catconstants.CrdbInternalClusterSettingsTableID: crdbInternalClusterSettingsTable,
catconstants.CrdbInternalCreateStmtsTableID: crdbInternalCreateStmtsTable,
catconstants.CrdbInternalCreateTypeStmtsTableID: crdbInternalCreateTypeStmtsTable,
catconstants.CrdbInternalDatabasesTableID: crdbInternalDatabasesTable,
catconstants.CrdbInternalFeatureUsageID: crdbInternalFeatureUsage,
catconstants.CrdbInternalForwardDependenciesTableID: crdbInternalForwardDependenciesTable,
catconstants.CrdbInternalGossipNodesTableID: crdbInternalGossipNodesTable,
catconstants.CrdbInternalGossipAlertsTableID: crdbInternalGossipAlertsTable,
catconstants.CrdbInternalGossipLivenessTableID: crdbInternalGossipLivenessTable,
catconstants.CrdbInternalGossipNetworkTableID: crdbInternalGossipNetworkTable,
catconstants.CrdbInternalIndexColumnsTableID: crdbInternalIndexColumnsTable,
catconstants.CrdbInternalInflightTraceSpanTableID: crdbInternalInflightTraceSpanTable,
catconstants.CrdbInternalJobsTableID: crdbInternalJobsTable,
catconstants.CrdbInternalKVNodeStatusTableID: crdbInternalKVNodeStatusTable,
catconstants.CrdbInternalKVStoreStatusTableID: crdbInternalKVStoreStatusTable,
catconstants.CrdbInternalLeasesTableID: crdbInternalLeasesTable,
catconstants.CrdbInternalLocalContentionEventsTableID: crdbInternalLocalContentionEventsTable,
catconstants.CrdbInternalLocalQueriesTableID: crdbInternalLocalQueriesTable,
catconstants.CrdbInternalLocalTransactionsTableID: crdbInternalLocalTxnsTable,
catconstants.CrdbInternalLocalSessionsTableID: crdbInternalLocalSessionsTable,
catconstants.CrdbInternalLocalMetricsTableID: crdbInternalLocalMetricsTable,
catconstants.CrdbInternalPartitionsTableID: crdbInternalPartitionsTable,
catconstants.CrdbInternalPredefinedCommentsTableID: crdbInternalPredefinedCommentsTable,
catconstants.CrdbInternalRangesNoLeasesTableID: crdbInternalRangesNoLeasesTable,
catconstants.CrdbInternalRangesViewID: crdbInternalRangesView,
catconstants.CrdbInternalRuntimeInfoTableID: crdbInternalRuntimeInfoTable,
catconstants.CrdbInternalSchemaChangesTableID: crdbInternalSchemaChangesTable,
catconstants.CrdbInternalSessionTraceTableID: crdbInternalSessionTraceTable,
catconstants.CrdbInternalSessionVariablesTableID: crdbInternalSessionVariablesTable,
catconstants.CrdbInternalStmtStatsTableID: crdbInternalStmtStatsTable,
catconstants.CrdbInternalTableColumnsTableID: crdbInternalTableColumnsTable,
catconstants.CrdbInternalTableIndexesTableID: crdbInternalTableIndexesTable,
catconstants.CrdbInternalTablesTableLastStatsID: crdbInternalTablesTableLastStats,
catconstants.CrdbInternalTablesTableID: crdbInternalTablesTable,
catconstants.CrdbInternalTransactionStatsTableID: crdbInternalTransactionStatisticsTable,
catconstants.CrdbInternalTxnStatsTableID: crdbInternalTxnStatsTable,
catconstants.CrdbInternalZonesTableID: crdbInternalZonesTable,
catconstants.CrdbInternalInvalidDescriptorsTableID: crdbInternalInvalidDescriptorsTable,
catconstants.CrdbInternalClusterDatabasePrivilegesTableID: crdbInternalClusterDatabasePrivilegesTable,
catconstants.CrdbInternalInterleaved: crdbInternalInterleaved,
catconstants.CrdbInternalCrossDbRefrences: crdbInternalCrossDbReferences,
},
validWithNoDatabaseContext: true,
}
var crdbInternalBuildInfoTable = virtualSchemaTable{
comment: `detailed identification strings (RAM, local node only)`,
schema: `
CREATE TABLE crdb_internal.node_build_info (
node_id INT NOT NULL,
field STRING NOT NULL,
value STRING NOT NULL
)`,
populate: func(_ context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
execCfg := p.ExecCfg()
nodeID, _ := execCfg.NodeID.OptionalNodeID() // zero if not available
info := build.GetInfo()
for k, v := range map[string]string{
"Name": "CockroachDB",
"ClusterID": execCfg.ClusterID().String(),
"Organization": execCfg.Organization(),
"Build": info.Short(),
"Version": info.Tag,
"Channel": info.Channel,
} {
if err := addRow(
tree.NewDInt(tree.DInt(nodeID)),
tree.NewDString(k),
tree.NewDString(v),
); err != nil {
return err
}
}
return nil
},
}
var crdbInternalRuntimeInfoTable = virtualSchemaTable{
comment: `server parameters, useful to construct connection URLs (RAM, local node only)`,
schema: `
CREATE TABLE crdb_internal.node_runtime_info (
node_id INT NOT NULL,
component STRING NOT NULL,
field STRING NOT NULL,
value STRING NOT NULL
)`,
populate: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
if err := p.RequireAdminRole(ctx, "access the node runtime information"); err != nil {
return err
}
node := p.ExecCfg().NodeInfo
nodeID, _ := node.NodeID.OptionalNodeID() // zero if not available
dbURL, err := node.PGURL(url.User(security.RootUser))
if err != nil {
return err
}
for _, item := range []struct {
component string
url *url.URL
}{
{"DB", dbURL}, {"UI", node.AdminURL()},
} {
var user string
if item.url.User != nil {
user = item.url.User.String()
}
host, port, err := net.SplitHostPort(item.url.Host)
if err != nil {
return err
}
for _, kv := range [][2]string{
{"URL", item.url.String()},
{"Scheme", item.url.Scheme},
{"User", user},
{"Host", host},
{"Port", port},
{"URI", item.url.RequestURI()},
} {
k, v := kv[0], kv[1]
if err := addRow(
tree.NewDInt(tree.DInt(nodeID)),
tree.NewDString(item.component),
tree.NewDString(k),
tree.NewDString(v),
); err != nil {
return err
}
}
}
return nil
},
}
var crdbInternalDatabasesTable = virtualSchemaTable{
comment: `databases accessible by the current user (KV scan)`,
schema: `
CREATE TABLE crdb_internal.databases (
id INT NOT NULL,
name STRING NOT NULL,
owner NAME NOT NULL,
primary_region STRING,
regions STRING[],
survival_goal STRING
)`,
populate: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
return forEachDatabaseDesc(ctx, p, nil /* all databases */, true, /* requiresPrivileges */
func(db *dbdesc.Immutable) error {
var survivalGoal tree.Datum = tree.DNull
var primaryRegion tree.Datum = tree.DNull
regions := tree.NewDArray(types.String)
if db.IsMultiRegion() {
switch db.RegionConfig.SurvivalGoal {
case descpb.SurvivalGoal_ZONE_FAILURE:
survivalGoal = tree.NewDString("zone")
case descpb.SurvivalGoal_REGION_FAILURE:
survivalGoal = tree.NewDString("region")
default:
return errors.Newf("unknown survival goal: %d", db.RegionConfig.SurvivalGoal)
}
primaryRegion = tree.NewDString(string(db.RegionConfig.PrimaryRegion))
for _, region := range db.RegionConfig.Regions {
if err := regions.Append(tree.NewDString(string(region.Name))); err != nil {
return err
}
}
}
return addRow(
tree.NewDInt(tree.DInt(db.GetID())), // id
tree.NewDString(db.GetName()), // name
tree.NewDName(getOwnerOfDesc(db).Normalized()), // owner
primaryRegion, // primary_region
regions, // regions
survivalGoal, // survival_goal
)
})
},
}
// TODO(tbg): prefix with kv_.
var crdbInternalTablesTable = virtualSchemaTable{
comment: `table descriptors accessible by current user, including non-public and virtual (KV scan; expensive!)`,
schema: `
CREATE TABLE crdb_internal.tables (
table_id INT NOT NULL,
parent_id INT NOT NULL,
name STRING NOT NULL,
database_name STRING,
version INT NOT NULL,
mod_time TIMESTAMP NOT NULL,
mod_time_logical DECIMAL NOT NULL,
format_version STRING NOT NULL,
state STRING NOT NULL,
sc_lease_node_id INT,
sc_lease_expiration_time TIMESTAMP,
drop_time TIMESTAMP,
audit_mode STRING NOT NULL,
schema_name STRING NOT NULL,
parent_schema_id INT NOT NULL,
locality TEXT
)`,
generator: func(ctx context.Context, p *planner, dbDesc *dbdesc.Immutable, stopper *stop.Stopper) (virtualTableGenerator, cleanupFunc, error) {
row := make(tree.Datums, 14)
worker := func(pusher rowPusher) error {
descs, err := p.Descriptors().GetAllDescriptors(ctx, p.txn)
if err != nil {
return err
}
dbNames := make(map[descpb.ID]string)
scNames := make(map[descpb.ID]string)
scNames[keys.PublicSchemaID] = sessiondata.PublicSchemaName
// Record database descriptors for name lookups.
for _, desc := range descs {
if dbDesc, ok := desc.(*dbdesc.Immutable); ok {
dbNames[dbDesc.GetID()] = dbDesc.GetName()
}
if scDesc, ok := desc.(*schemadesc.Immutable); ok {
scNames[scDesc.GetID()] = scDesc.GetName()
}
}
addDesc := func(table catalog.TableDescriptor, dbName tree.Datum, scName string) error {
leaseNodeDatum := tree.DNull
leaseExpDatum := tree.DNull
if lease := table.GetLease(); lease != nil {
leaseNodeDatum = tree.NewDInt(tree.DInt(int64(lease.NodeID)))
leaseExpDatum, err = tree.MakeDTimestamp(
timeutil.Unix(0, lease.ExpirationTime), time.Nanosecond,
)
if err != nil {
return err
}
}
dropTimeDatum := tree.DNull
if dropTime := table.GetDropTime(); dropTime != 0 {
dropTimeDatum, err = tree.MakeDTimestamp(
timeutil.Unix(0, dropTime), time.Nanosecond,
)
if err != nil {
return err
}
}
locality := tree.DNull
if c := table.GetLocalityConfig(); c != nil {
f := tree.NewFmtCtx(tree.FmtSimple)
if err := tabledesc.FormatTableLocalityConfig(c, f); err != nil {
return err
}
locality = tree.NewDString(f.String())
}
row = row[:0]
row = append(row,
tree.NewDInt(tree.DInt(int64(table.GetID()))),
tree.NewDInt(tree.DInt(int64(table.GetParentID()))),
tree.NewDString(table.GetName()),
dbName,
tree.NewDInt(tree.DInt(int64(table.GetVersion()))),
tree.TimestampToInexactDTimestamp(table.GetModificationTime()),
tree.TimestampToDecimalDatum(table.GetModificationTime()),
tree.NewDString(table.GetFormatVersion().String()),
tree.NewDString(table.GetState().String()),
leaseNodeDatum,
leaseExpDatum,
dropTimeDatum,
tree.NewDString(table.GetAuditMode().String()),
tree.NewDString(scName),
tree.NewDInt(tree.DInt(int64(table.GetParentSchemaID()))),
locality,
)
return pusher.pushRow(row...)
}
// Note: we do not use forEachTableDesc() here because we want to
// include added and dropped descriptors.
for _, desc := range descs {
table, ok := desc.(catalog.TableDescriptor)
if !ok || p.CheckAnyPrivilege(ctx, table) != nil {
continue
}
dbName := dbNames[table.GetParentID()]
if dbName == "" {
// The parent database was deleted. This is possible e.g. when
// a database is dropped with CASCADE, and someone queries
// this virtual table before the dropped table descriptors are
// effectively deleted.
dbName = fmt.Sprintf("[%d]", table.GetParentID())
}
schemaName := scNames[table.GetParentSchemaID()]
if schemaName == "" {
// The parent schema was deleted, possibly due to reasons mentioned above.
schemaName = fmt.Sprintf("[%d]", table.GetParentSchemaID())
}
if err := addDesc(table, tree.NewDString(dbName), schemaName); err != nil {
return err
}
}
// Also add all the virtual descriptors.
vt := p.getVirtualTabler()
vEntries := vt.getEntries()
for _, virtSchemaName := range vt.getSchemaNames() {
e := vEntries[virtSchemaName]
for _, tName := range e.orderedDefNames {
vTableEntry := e.defs[tName]
if err := addDesc(vTableEntry.desc, tree.DNull, virtSchemaName); err != nil {
return err
}
}
}
return nil
}
return setupGenerator(ctx, worker, stopper)
},
}
// statsAsOfTimeClusterMode controls the cluster setting for the duration which
// is used to define the AS OF time for querying the system.table_statistics
// table when building crdb_internal.table_row_statistics.
var statsAsOfTimeClusterMode = settings.RegisterDurationSetting(
"sql.crdb_internal.table_row_statistics.as_of_time",
"historical query time used to build the crdb_internal.table_row_statistics table",
-10*time.Second,
)
var crdbInternalTablesTableLastStats = virtualSchemaTable{
comment: "stats for all tables accessible by current user in current database as of 10s ago",
schema: `
CREATE TABLE crdb_internal.table_row_statistics (
table_id INT NOT NULL,
table_name STRING NOT NULL,
estimated_row_count INT
)`,
populate: func(ctx context.Context, p *planner, db *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
// Collect the statistics for all tables AS OF 10 seconds ago to avoid
// contention on the stats table. We pass a nil transaction so that the AS
// OF clause can be independent of any outer query.
query := fmt.Sprintf(`
SELECT s."tableID", max(s."rowCount")
FROM system.table_statistics AS s
JOIN (
SELECT "tableID", max("createdAt") AS last_dt
FROM system.table_statistics
GROUP BY "tableID"
) AS l ON l."tableID" = s."tableID" AND l.last_dt = s."createdAt"
AS OF SYSTEM TIME '%s'
GROUP BY s."tableID"`, statsAsOfTimeClusterMode.String(&p.ExecCfg().Settings.SV))
statRows, err := p.ExtendedEvalContext().ExecCfg.InternalExecutor.QueryBufferedEx(
ctx, "crdb-internal-statistics-table", nil,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
query)
if err != nil {
// This query is likely to cause errors due to SHOW TABLES being run less
// than 10 seconds after cluster startup (10s is the default AS OF time
// for the query), causing the error "descriptor not found". We should
// tolerate this error and return nil.
if errors.Is(err, catalog.ErrDescriptorNotFound) {
return nil
}
return err
}
// Convert statistics into map: tableID -> rowCount.
statMap := make(map[tree.DInt]tree.Datum, len(statRows))
for _, r := range statRows {
statMap[tree.MustBeDInt(r[0])] = r[1]
}
// Walk over all available tables and show row count for each of them
// using collected statistics.
return forEachTableDescAll(ctx, p, db, virtualMany,
func(db *dbdesc.Immutable, _ string, table catalog.TableDescriptor) error {
tableID := tree.DInt(table.GetID())
rowCount := tree.DNull
// For Virtual Tables report NULL row count.
if !table.IsVirtualTable() {
rowCount = tree.NewDInt(0)
if cnt, ok := statMap[tableID]; ok {
rowCount = cnt
}
}
return addRow(
tree.NewDInt(tableID),
tree.NewDString(table.GetName()),
rowCount,
)
},
)
},
}
// TODO(tbg): prefix with kv_.
var crdbInternalSchemaChangesTable = virtualSchemaTable{
comment: `ongoing schema changes, across all descriptors accessible by current user (KV scan; expensive!)`,
schema: `
CREATE TABLE crdb_internal.schema_changes (
table_id INT NOT NULL,
parent_id INT NOT NULL,
name STRING NOT NULL,
type STRING NOT NULL,
target_id INT,
target_name STRING,
state STRING NOT NULL,
direction STRING NOT NULL
)`,
populate: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
descs, err := p.Descriptors().GetAllDescriptors(ctx, p.txn)
if err != nil {
return err
}
// Note: we do not use forEachTableDesc() here because we want to
// include added and dropped descriptors.
for _, desc := range descs {
table, ok := desc.(catalog.TableDescriptor)
if !ok || p.CheckAnyPrivilege(ctx, table) != nil {
continue
}
tableID := tree.NewDInt(tree.DInt(int64(table.GetID())))
parentID := tree.NewDInt(tree.DInt(int64(table.GetParentID())))
tableName := tree.NewDString(table.GetName())
for _, mut := range table.TableDesc().Mutations {
mutType := "UNKNOWN"
targetID := tree.DNull
targetName := tree.DNull
switch d := mut.Descriptor_.(type) {
case *descpb.DescriptorMutation_Column:
mutType = "COLUMN"
targetID = tree.NewDInt(tree.DInt(int64(d.Column.ID)))
targetName = tree.NewDString(d.Column.Name)
case *descpb.DescriptorMutation_Index:
mutType = "INDEX"
targetID = tree.NewDInt(tree.DInt(int64(d.Index.ID)))
targetName = tree.NewDString(d.Index.Name)
case *descpb.DescriptorMutation_Constraint:
mutType = "CONSTRAINT VALIDATION"
targetName = tree.NewDString(d.Constraint.Name)
}
if err := addRow(
tableID,
parentID,
tableName,
tree.NewDString(mutType),
targetID,
targetName,
tree.NewDString(mut.State.String()),
tree.NewDString(mut.Direction.String()),
); err != nil {
return err
}
}
}
return nil
},
}
// TODO(tbg): prefix with node_.
var crdbInternalLeasesTable = virtualSchemaTable{
comment: `acquired table leases (RAM; local node only)`,
schema: `
CREATE TABLE crdb_internal.leases (
node_id INT NOT NULL,
table_id INT NOT NULL,
name STRING NOT NULL,
parent_id INT NOT NULL,
expiration TIMESTAMP NOT NULL,
deleted BOOL NOT NULL
)`,
populate: func(
ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error,
) (err error) {
nodeID, _ := p.execCfg.NodeID.OptionalNodeID() // zero if not available
p.LeaseMgr().VisitLeases(func(desc catalog.Descriptor, takenOffline bool, _ int, expiration tree.DTimestamp) (wantMore bool) {
if p.CheckAnyPrivilege(ctx, desc) != nil {
// TODO(ajwerner): inspect what type of error got returned.
return true
}
err = addRow(
tree.NewDInt(tree.DInt(nodeID)),
tree.NewDInt(tree.DInt(int64(desc.GetID()))),
tree.NewDString(desc.GetName()),
tree.NewDInt(tree.DInt(int64(desc.GetParentID()))),
&expiration,
tree.MakeDBool(tree.DBool(takenOffline)),
)
return err == nil
})
return err
},
}
func tsOrNull(micros int64) (tree.Datum, error) {
if micros == 0 {
return tree.DNull, nil
}
ts := timeutil.Unix(0, micros*time.Microsecond.Nanoseconds())
return tree.MakeDTimestamp(ts, time.Microsecond)
}
// TODO(tbg): prefix with kv_.
var crdbInternalJobsTable = virtualSchemaTable{
schema: `
CREATE TABLE crdb_internal.jobs (
job_id INT,
job_type STRING,
description STRING,
statement STRING,
user_name STRING,
descriptor_ids INT[],
status STRING,
running_status STRING,
created TIMESTAMP,
started TIMESTAMP,
finished TIMESTAMP,
modified TIMESTAMP,
fraction_completed FLOAT,
high_water_timestamp DECIMAL,
error STRING,
coordinator_id INT
)`,
comment: `decoded job metadata from system.jobs (KV scan)`,
generator: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, _ *stop.Stopper) (virtualTableGenerator, cleanupFunc, error) {
currentUser := p.SessionData().User()
isAdmin, err := p.HasAdminRole(ctx)
if err != nil {
return nil, nil, err
}
hasControlJob, err := p.HasRoleOption(ctx, roleoption.CONTROLJOB)
if err != nil {
return nil, nil, err
}
// Beware: we're querying system.jobs as root; we need to be careful to filter
// out results that the current user is not able to see.
query := `SELECT id, status, created, payload, progress FROM system.jobs`
it, err := p.ExtendedEvalContext().ExecCfg.InternalExecutor.QueryIteratorEx(
ctx, "crdb-internal-jobs-table", p.txn,
sessiondata.InternalExecutorOverride{User: security.RootUserName()},
query)
if err != nil {
return nil, nil, err
}
cleanup := func() {
if err := it.Close(); err != nil {
// TODO(yuzefovich): this error should be propagated further up
// and not simply being logged. Fix it (#61123).
//
// Doing that as a return parameter would require changes to
// `planNode.Close` signature which is a bit annoying. One other
// possible solution is to panic here and catch the error
// somewhere.
log.Warningf(ctx, "error closing an iterator: %v", err)
}
}
// We'll reuse this container on each loop.
container := make(tree.Datums, 0, 16)
return func() (datums tree.Datums, e error) {
// Loop while we need to skip a row.
for {
ok, err := it.Next(ctx)
if !ok {
return nil, err
}
r := it.Cur()
id, status, created, payloadBytes, progressBytes := r[0], r[1], r[2], r[3], r[4]
var jobType, description, statement, username, descriptorIDs, started, runningStatus,
finished, modified, fractionCompleted, highWaterTimestamp, errorStr, leaseNode = tree.DNull,
tree.DNull, tree.DNull, tree.DNull, tree.DNull, tree.DNull, tree.DNull, tree.DNull,
tree.DNull, tree.DNull, tree.DNull, tree.DNull, tree.DNull
// Extract data from the payload.
payload, err := jobs.UnmarshalPayload(payloadBytes)
// We filter out masked rows before we allocate all the
// datums. Needless allocate when not necessary.
ownedByAdmin := false
var sqlUsername security.SQLUsername
if payload != nil {
sqlUsername = payload.UsernameProto.Decode()
ownedByAdmin, err = p.UserHasAdminRole(ctx, sqlUsername)
if err != nil {
errorStr = tree.NewDString(fmt.Sprintf("error decoding payload: %v", err))
}
}
sameUser := payload != nil && sqlUsername == currentUser
// The user can access the row if the meet one of the conditions:
// 1. The user is an admin.
// 2. The job is owned by the user.
// 3. The user has CONTROLJOB privilege and the job is not owned by
// an admin.
if canAccess := isAdmin || !ownedByAdmin && hasControlJob || sameUser; !canAccess {
continue
}
if err != nil {
errorStr = tree.NewDString(fmt.Sprintf("error decoding payload: %v", err))
} else {
jobType = tree.NewDString(payload.Type().String())
description = tree.NewDString(payload.Description)
statement = tree.NewDString(payload.Statement)
username = tree.NewDString(sqlUsername.Normalized())
descriptorIDsArr := tree.NewDArray(types.Int)
for _, descID := range payload.DescriptorIDs {
if err := descriptorIDsArr.Append(tree.NewDInt(tree.DInt(int(descID)))); err != nil {
return nil, err
}
}
descriptorIDs = descriptorIDsArr
started, err = tsOrNull(payload.StartedMicros)
if err != nil {
return nil, err
}
finished, err = tsOrNull(payload.FinishedMicros)
if err != nil {
return nil, err
}
if payload.Lease != nil {
leaseNode = tree.NewDInt(tree.DInt(payload.Lease.NodeID))
}
errorStr = tree.NewDString(payload.Error)
}
// Extract data from the progress field.
if progressBytes != tree.DNull {
progress, err := jobs.UnmarshalProgress(progressBytes)
if err != nil {
baseErr := ""
if s, ok := errorStr.(*tree.DString); ok {
baseErr = string(*s)
if baseErr != "" {
baseErr += "\n"
}
}
errorStr = tree.NewDString(fmt.Sprintf("%serror decoding progress: %v", baseErr, err))
} else {
// Progress contains either fractionCompleted for traditional jobs,
// or the highWaterTimestamp for change feeds.
if highwater := progress.GetHighWater(); highwater != nil {
highWaterTimestamp = tree.TimestampToDecimalDatum(*highwater)
} else {
fractionCompleted = tree.NewDFloat(tree.DFloat(progress.GetFractionCompleted()))
}
modified, err = tsOrNull(progress.ModifiedMicros)
if err != nil {
return nil, err
}
if len(progress.RunningStatus) > 0 {
if s, ok := status.(*tree.DString); ok {
if jobs.Status(string(*s)) == jobs.StatusRunning {
runningStatus = tree.NewDString(progress.RunningStatus)
}
}
}
}
}
container = container[:0]
container = append(container,
id,
jobType,
description,
statement,
username,
descriptorIDs,
status,
runningStatus,
created,
started,
finished,
modified,
fractionCompleted,
highWaterTimestamp,
errorStr,
leaseNode,
)
return container, nil
}
}, cleanup, nil
},
}
type stmtList []stmtKey
func (s stmtList) Len() int {
return len(s)
}
func (s stmtList) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s stmtList) Less(i, j int) bool {
return s[i].anonymizedStmt < s[j].anonymizedStmt
}
type txnList []txnKey
func (t txnList) Len() int {
return len(t)
}
func (t txnList) Swap(i, j int) {
t[i], t[j] = t[j], t[i]
}
func (t txnList) Less(i, j int) bool {
return t[i] < t[j]
}
// execStatAvg is a helper for execution stats shown in virtual tables. Returns
// NULL when the count is 0, or the mean of the given NumericStat.
func execStatAvg(count int64, n roachpb.NumericStat) tree.Datum {
if count == 0 {
return tree.DNull
}
return tree.NewDFloat(tree.DFloat(n.Mean))
}
// execStatVar is a helper for execution stats shown in virtual tables. Returns
// NULL when the count is 0, or the variance of the given NumericStat.
func execStatVar(count int64, n roachpb.NumericStat) tree.Datum {
if count == 0 {
return tree.DNull
}
return tree.NewDFloat(tree.DFloat(n.GetVariance(count)))
}
var crdbInternalStmtStatsTable = virtualSchemaTable{
comment: `statement statistics (in-memory, not durable; local node only). ` +
`This table is wiped periodically (by default, at least every two hours)`,
schema: `
CREATE TABLE crdb_internal.node_statement_statistics (
node_id INT NOT NULL,
application_name STRING NOT NULL,
flags STRING NOT NULL,
key STRING NOT NULL,
anonymized STRING,
count INT NOT NULL,
first_attempt_count INT NOT NULL,
max_retries INT NOT NULL,
last_error STRING,
rows_avg FLOAT NOT NULL,
rows_var FLOAT NOT NULL,
parse_lat_avg FLOAT NOT NULL,
parse_lat_var FLOAT NOT NULL,
plan_lat_avg FLOAT NOT NULL,
plan_lat_var FLOAT NOT NULL,
run_lat_avg FLOAT NOT NULL,
run_lat_var FLOAT NOT NULL,
service_lat_avg FLOAT NOT NULL,
service_lat_var FLOAT NOT NULL,
overhead_lat_avg FLOAT NOT NULL,
overhead_lat_var FLOAT NOT NULL,
bytes_read_avg FLOAT NOT NULL,
bytes_read_var FLOAT NOT NULL,
rows_read_avg FLOAT NOT NULL,
rows_read_var FLOAT NOT NULL,
network_bytes_avg FLOAT,
network_bytes_var FLOAT,
max_mem_usage_avg FLOAT,
max_mem_usage_var FLOAT,
contention_time_avg FLOAT,
contention_time_var FLOAT,
network_msgs_avg FLOAT,
network_msgs_var FLOAT,
max_disk_usage_avg FLOAT,
max_disk_usage_var FLOAT,
implicit_txn BOOL NOT NULL
)`,
populate: func(ctx context.Context, p *planner, _ *dbdesc.Immutable, addRow func(...tree.Datum) error) error {
hasViewActivity, err := p.HasRoleOption(ctx, roleoption.VIEWACTIVITY)
if err != nil {
return err
}
if !hasViewActivity {
return pgerror.Newf(pgcode.InsufficientPrivilege,
"user %s does not have %s privilege", p.User(), roleoption.VIEWACTIVITY)
}
sqlStats := p.extendedEvalCtx.sqlStatsCollector.sqlStats
if sqlStats == nil {
return errors.AssertionFailedf(
"cannot access sql statistics from this context")
}
nodeID, _ := p.execCfg.NodeID.OptionalNodeID() // zero if not available
// Retrieve the application names and sort them to ensure the
// output is deterministic.
var appNames []string
sqlStats.Lock()
for n := range sqlStats.apps {
appNames = append(appNames, n)
}
sqlStats.Unlock()
sort.Strings(appNames)
// Now retrieve the application stats proper.
for _, appName := range appNames {
appStats := sqlStats.getStatsForApplication(appName)
// Retrieve the statement keys and sort them to ensure the
// output is deterministic.
var stmtKeys stmtList
appStats.Lock()
for k := range appStats.stmts {
stmtKeys = append(stmtKeys, k)
}
appStats.Unlock()
sort.Sort(stmtKeys)
// Now retrieve the per-stmt stats proper.
for _, stmtKey := range stmtKeys {
anonymized := tree.DNull
anonStr, ok := scrubStmtStatKey(p.getVirtualTabler(), stmtKey.anonymizedStmt)
if ok {
anonymized = tree.NewDString(anonStr)
}
stmtID := constructStatementIDFromStmtKey(stmtKey)
s := appStats.getStatsForStmtWithKey(stmtKey, stmtID, true /* createIfNonexistent */)
s.mu.Lock()
errString := tree.DNull
if s.mu.data.SensitiveInfo.LastErr != "" {
errString = tree.NewDString(s.mu.data.SensitiveInfo.LastErr)
}
var flags string
if s.mu.distSQLUsed {
flags = "+"
}
if stmtKey.failed {
flags = "!" + flags
}
err := addRow(
tree.NewDInt(tree.DInt(nodeID)),
tree.NewDString(appName),
tree.NewDString(flags),
tree.NewDString(stmtKey.anonymizedStmt),
anonymized,
tree.NewDInt(tree.DInt(s.mu.data.Count)),
tree.NewDInt(tree.DInt(s.mu.data.FirstAttemptCount)),
tree.NewDInt(tree.DInt(s.mu.data.MaxRetries)),
errString,
tree.NewDFloat(tree.DFloat(s.mu.data.NumRows.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.NumRows.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.ParseLat.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.ParseLat.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.PlanLat.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.PlanLat.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.RunLat.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.RunLat.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.ServiceLat.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.ServiceLat.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.OverheadLat.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.OverheadLat.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.BytesRead.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.BytesRead.GetVariance(s.mu.data.Count))),
tree.NewDFloat(tree.DFloat(s.mu.data.RowsRead.Mean)),
tree.NewDFloat(tree.DFloat(s.mu.data.RowsRead.GetVariance(s.mu.data.Count))),
execStatAvg(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.NetworkBytes),
execStatVar(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.NetworkBytes),
execStatAvg(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.MaxMemUsage),
execStatVar(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.MaxMemUsage),
execStatAvg(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.ContentionTime),
execStatVar(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.ContentionTime),
execStatAvg(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.NetworkMessages),
execStatVar(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.NetworkMessages),
execStatAvg(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.MaxDiskUsage),
execStatVar(s.mu.data.ExecStats.Count, s.mu.data.ExecStats.MaxDiskUsage),
tree.MakeDBool(tree.DBool(stmtKey.implicitTxn)),
)
s.mu.Unlock()
if err != nil {
return err
}
}
}
return nil
},
}
// TODO(arul): Explore updating the schema below to have key be an INT and
// statement_ids be INT[] now that we've moved to having uint64 as the type of
// StmtID and TxnKey. Issue #55284
var crdbInternalTransactionStatisticsTable = virtualSchemaTable{
comment: `finer-grained transaction statistics (in-memory, not durable; local node only). ` +
`This table is wiped periodically (by default, at least every two hours)`,
schema: `
CREATE TABLE crdb_internal.node_transaction_statistics (
node_id INT NOT NULL,
application_name STRING NOT NULL,
key STRING,
statement_ids STRING[],
count INT,
max_retries INT,
service_lat_avg FLOAT NOT NULL,