From df0ef24978c51fb0a183d1ac379c1c48b372235d Mon Sep 17 00:00:00 2001 From: Raphael 'kena' Poss Date: Tue, 20 Dec 2022 18:25:54 +0100 Subject: [PATCH] cli,workload: don't hide useful workloads Prior to this patch, at least half of the workloads were hidden from view in the output of `cockroach --help`. There was no good reason for this: most of the workloads are useful for teaching/learning and for experimentation. They all deserve more exposure, so that folk can learn about them without being told by the one person who built the workload in the first place. So this patch fixes that by exposing of all of them through the online help. One question that could remain is how much teaching value there is in letting someone experiment with a tool that was built for the benefit of one team only. One specific workload is under consideration here: `bulkingest`, used for benchmarking inside the D&R team, does not really do anything akin to what an end-user would possibly expect to do with a database. For that workload, and the benefit of future workloas akin to it, this patch adds a notice in its help text that it was developed for internal testing only. Release note: None --- pkg/cli/demo.go | 5 +++-- pkg/workload/bank/bank.go | 7 +++---- pkg/workload/bulkingest/bulkingest.go | 17 +++++++++-------- pkg/workload/cli/run.go | 12 ++++++------ pkg/workload/examples/intro.go | 9 ++++----- pkg/workload/examples/startrek.go | 9 ++++----- pkg/workload/geospatial/geospatial.go | 9 ++++----- pkg/workload/insights/insights.go | 7 +++---- pkg/workload/kv/kv.go | 11 +++++------ pkg/workload/movr/movr.go | 7 +++---- pkg/workload/tpcc/tpcc.go | 5 ++--- pkg/workload/tpch/tpch.go | 16 ++++++++-------- pkg/workload/ttlbench/ttlbench.go | 24 +++++++++++++----------- pkg/workload/ttllogger/ttllogger.go | 17 ++++++++--------- pkg/workload/workload.go | 12 ++++++------ pkg/workload/ycsb/ycsb.go | 15 +++++++-------- 16 files changed, 88 insertions(+), 94 deletions(-) diff --git a/pkg/cli/demo.go b/pkg/cli/demo.go index 57d19a04852e..618e70191c35 100644 --- a/pkg/cli/demo.go +++ b/pkg/cli/demo.go @@ -82,13 +82,14 @@ func init() { genDemoCmd := &cobra.Command{ Use: meta.Name, Short: meta.Description, + Long: meta.Description + meta.Details, Args: cobra.ArbitraryArgs, RunE: clierrorplus.MaybeDecorateError(func(cmd *cobra.Command, _ []string) error { return runDemo(cmd, gen) }), } - if !meta.PublicFacing { - genDemoCmd.Hidden = true + if meta.TestInfraOnly { + demoCmd.Long = "THIS COMMAND WAS DEVELOPED FOR INTERNAL TESTING ONLY.\n\n" + demoCmd.Long } demoCmd.AddCommand(genDemoCmd) genDemoCmd.Flags().AddFlagSet(genFlags) diff --git a/pkg/workload/bank/bank.go b/pkg/workload/bank/bank.go index 580c24e5c265..2d03010a21df 100644 --- a/pkg/workload/bank/bank.go +++ b/pkg/workload/bank/bank.go @@ -56,10 +56,9 @@ func init() { } var bankMeta = workload.Meta{ - Name: `bank`, - Description: `Bank models a set of accounts with currency balances`, - Version: `1.0.0`, - PublicFacing: true, + Name: `bank`, + Description: `Bank models a set of accounts with currency balances.`, + Version: `1.0.0`, New: func() workload.Generator { g := &bank{} g.flags.FlagSet = pflag.NewFlagSet(`bank`, pflag.ContinueOnError) diff --git a/pkg/workload/bulkingest/bulkingest.go b/pkg/workload/bulkingest/bulkingest.go index 08095d801b2d..7e4770dd5c92 100644 --- a/pkg/workload/bulkingest/bulkingest.go +++ b/pkg/workload/bulkingest/bulkingest.go @@ -91,18 +91,19 @@ func init() { } var bulkingestMeta = workload.Meta{ - Name: `bulkingest`, - Description: `bulkingest testdata is designed to produce a skewed distribution of KVs when ingested (in initial import or during later indexing)`, - Version: `1.0.0`, + Name: `bulkingest`, + Description: `This workload is designed to produce a skewed distribution of KVs when ingested (in initial import or during later indexing).`, + Version: `1.0.0`, + TestInfraOnly: true, New: func() workload.Generator { g := &bulkingest{} g.flags.FlagSet = pflag.NewFlagSet(`bulkingest`, pflag.ContinueOnError) g.flags.Int64Var(&g.seed, `seed`, 1, `Key hash seed.`) - g.flags.IntVar(&g.aCount, `a`, 10, `number of values of A (i.e. pk prefix)`) - g.flags.IntVar(&g.bCount, `b`, 10, `number of values of B (i.e. idx prefix)`) - g.flags.IntVar(&g.cCount, `c`, 1000, `number of values of C (i.e. rows per A/B pair)`) - g.flags.BoolVar(&g.generateBsFirst, `batches-by-b`, false, `generate all B batches for given A first`) - g.flags.BoolVar(&g.indexBCA, `index-b-c-a`, true, `include an index on (B, C, A)`) + g.flags.IntVar(&g.aCount, `a`, 10, `Number of values of A (i.e. pk prefix).`) + g.flags.IntVar(&g.bCount, `b`, 10, `Number of values of B (i.e. idx prefix).`) + g.flags.IntVar(&g.cCount, `c`, 1000, `Number of values of C (i.e. rows per A/B pair).`) + g.flags.BoolVar(&g.generateBsFirst, `batches-by-b`, false, `Generate all B batches for given A first.`) + g.flags.BoolVar(&g.indexBCA, `index-b-c-a`, true, `Include an index on (B, C, A).`) g.flags.IntVar(&g.payloadBytes, `payload-bytes`, defaultPayloadBytes, `Size of the payload field in each row.`) g.connFlags = workload.NewConnFlags(&g.flags) return g diff --git a/pkg/workload/cli/run.go b/pkg/workload/cli/run.go index f741cb5dc1c7..7dcba528193a 100644 --- a/pkg/workload/cli/run.go +++ b/pkg/workload/cli/run.go @@ -115,8 +115,8 @@ func init() { genInitCmd.Flags().AddFlagSet(genFlags) genInitCmd.Flags().AddFlagSet(securityFlags) genInitCmd.Run = CmdHelper(gen, runInit) - if userFacing && !meta.PublicFacing { - genInitCmd.Hidden = true + if meta.TestInfraOnly { + genInitCmd.Long = "THIS COMMAND WAS DEVELOPED FOR INTERNAL TESTING ONLY.\n\n" + genInitCmd.Long } initCmd.AddCommand(genInitCmd) } @@ -156,10 +156,10 @@ func init() { f.Usage += ` (implies --init)` genRunCmd.Flags().AddFlag(&f) }) - genRunCmd.Run = CmdHelper(gen, runRun) - if userFacing && !meta.PublicFacing { - genRunCmd.Hidden = true + if meta.TestInfraOnly { + genRunCmd.Long = "THIS COMMAND WAS DEVELOPED FOR INTERNAL TESTING ONLY.\n\n" + genRunCmd.Long } + genRunCmd.Run = CmdHelper(gen, runRun) runCmd.AddCommand(genRunCmd) } return runCmd @@ -326,7 +326,7 @@ func runInitImpl( // For example, at the time of writing, neither roachmart and ledger are // public-facing, but both support fixtures. However, returning true here // would result in "pq: unknown generator: roachmart" from the cluster. - if workload.SupportsFixtures(gen) && gen.Meta().PublicFacing { + if workload.SupportsFixtures(gen) { lc = "import" } } diff --git a/pkg/workload/examples/intro.go b/pkg/workload/examples/intro.go index 67b5aa11924a..07f7e0161c07 100644 --- a/pkg/workload/examples/intro.go +++ b/pkg/workload/examples/intro.go @@ -23,11 +23,10 @@ func init() { } var introMeta = workload.Meta{ - Name: `intro`, - Description: `Intro contains a single table with a hidden message`, - Version: `1.0.0`, - PublicFacing: true, - New: func() workload.Generator { return intro{} }, + Name: `intro`, + Description: `Intro contains a single table with a hidden message.`, + Version: `1.0.0`, + New: func() workload.Generator { return intro{} }, } // Meta implements the Generator interface. diff --git a/pkg/workload/examples/startrek.go b/pkg/workload/examples/startrek.go index 915e2e9b39da..87f72f9b8b0a 100644 --- a/pkg/workload/examples/startrek.go +++ b/pkg/workload/examples/startrek.go @@ -30,11 +30,10 @@ func init() { } var startrekMeta = workload.Meta{ - Name: `startrek`, - Description: `Star Trek models episodes and quotes from the tv show`, - Version: `1.0.0`, - PublicFacing: true, - New: func() workload.Generator { return startrek{} }, + Name: `startrek`, + Description: `Star Trek models episodes and quotes from the TV show.`, + Version: `1.0.0`, + New: func() workload.Generator { return startrek{} }, } // Meta implements the Generator interface. diff --git a/pkg/workload/geospatial/geospatial.go b/pkg/workload/geospatial/geospatial.go index 0d61a933c124..ea3d174d031d 100644 --- a/pkg/workload/geospatial/geospatial.go +++ b/pkg/workload/geospatial/geospatial.go @@ -32,11 +32,10 @@ func init() { } var geospatialMeta = workload.Meta{ - Name: `geospatial`, - Description: `geospatial contains PostGIS tutorial tables`, - Version: `1.0.0`, - PublicFacing: true, - New: func() workload.Generator { return geospatial{} }, + Name: `geospatial`, + Description: `geospatial contains PostGIS tutorial tables.`, + Version: `1.0.0`, + New: func() workload.Generator { return geospatial{} }, } // Meta implements the Generator interface. diff --git a/pkg/workload/insights/insights.go b/pkg/workload/insights/insights.go index e64c70f05f40..cafc733dcfc5 100644 --- a/pkg/workload/insights/insights.go +++ b/pkg/workload/insights/insights.go @@ -63,10 +63,9 @@ func init() { } var insightsMeta = workload.Meta{ - Name: `insights`, - Description: `This workload executes queries that will be detected by insights`, - Version: `1.0.0`, - PublicFacing: false, + Name: `insights`, + Description: `This workload executes queries that will be detected by the database insights in the web UI.`, + Version: `1.0.0`, New: func() workload.Generator { g := &insights{} g.flags.FlagSet = pflag.NewFlagSet(`insights`, pflag.ContinueOnError) diff --git a/pkg/workload/kv/kv.go b/pkg/workload/kv/kv.go index ac990d89de83..d40bb4c01719 100644 --- a/pkg/workload/kv/kv.go +++ b/pkg/workload/kv/kv.go @@ -106,8 +106,7 @@ var kvMeta = workload.Meta{ --write-seq can be used to incorporate data produced by a previous run into the current run. `, - Version: `1.0.0`, - PublicFacing: true, + Version: `1.0.0`, New: func() workload.Generator { g := &kv{} g.flags.FlagSet = pflag.NewFlagSet(`kv`, pflag.ContinueOnError) @@ -144,17 +143,17 @@ var kvMeta = workload.Meta{ g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations.`) g.flags.BoolVar(&g.secondaryIndex, `secondary-index`, false, - `Add a secondary index to the schema`) + `Add a secondary index to the schema.`) g.flags.IntVar(&g.shards, `num-shards`, 0, `Number of shards to create on the primary key.`) g.flags.Float64Var(&g.targetCompressionRatio, `target-compression-ratio`, 1.0, - `Target compression ratio for data blocks. Must be >= 1.0`) + `Target compression ratio for data blocks. Must be >= 1.0.`) g.flags.BoolVar(&g.enum, `enum`, false, - `Inject an enum column and use it`) + `Inject an enum column and use it.`) g.flags.IntVar(&g.insertCount, `insert-count`, 0, `Number of rows to insert before beginning the workload. Keys are inserted `+ `uniformly over the key range.`) - g.flags.DurationVar(&g.timeout, `timeout`, 0, `Client-side statement timeout`) + g.flags.DurationVar(&g.timeout, `timeout`, 0, `Client-side statement timeout.`) g.connFlags = workload.NewConnFlags(&g.flags) return g }, diff --git a/pkg/workload/movr/movr.go b/pkg/workload/movr/movr.go index 36ade4bc6779..c6785ea0cda7 100644 --- a/pkg/workload/movr/movr.go +++ b/pkg/workload/movr/movr.go @@ -200,10 +200,9 @@ func init() { } var movrMeta = workload.Meta{ - Name: `movr`, - Description: `MovR is a fictional vehicle sharing company`, - Version: `1.0.0`, - PublicFacing: true, + Name: `movr`, + Description: `MovR is a fictional vehicle sharing company`, + Version: `1.0.0`, New: func() workload.Generator { g := &movr{} g.flags.FlagSet = pflag.NewFlagSet(`movr`, pflag.ContinueOnError) diff --git a/pkg/workload/tpcc/tpcc.go b/pkg/workload/tpcc/tpcc.go index 918ea2c6f846..86e382e2e42d 100644 --- a/pkg/workload/tpcc/tpcc.go +++ b/pkg/workload/tpcc/tpcc.go @@ -154,9 +154,8 @@ func FromWarehouses(warehouses int) workload.Generator { var tpccMeta = workload.Meta{ Name: `tpcc`, Description: `TPC-C simulates a transaction processing workload` + - ` using a rich schema of multiple tables`, - Version: `2.2.0`, - PublicFacing: true, + ` using a rich schema of multiple tables.`, + Version: `2.2.0`, New: func() workload.Generator { g := &tpcc{} g.flags.FlagSet = pflag.NewFlagSet(`tpcc`, pflag.ContinueOnError) diff --git a/pkg/workload/tpch/tpch.go b/pkg/workload/tpch/tpch.go index 71773f70de08..0244c6ed6b9c 100644 --- a/pkg/workload/tpch/tpch.go +++ b/pkg/workload/tpch/tpch.go @@ -96,23 +96,23 @@ var tpchMeta = workload.Meta{ `enable-checks`: {RuntimeOnly: true}, `vectorize`: {RuntimeOnly: true}, } - g.flags.Uint64Var(&g.seed, `seed`, 1, `Random number generator seed`) + g.flags.Uint64Var(&g.seed, `seed`, 1, `Random number generator seed.`) g.flags.IntVar(&g.scaleFactor, `scale-factor`, 1, - `Linear scale of how much data to use (each SF is ~1GB)`) - g.flags.BoolVar(&g.fks, `fks`, true, `Add the foreign keys`) + `Linear scale of how much data to use (each SF is ~1GB).`) + g.flags.BoolVar(&g.fks, `fks`, true, `Add foreign keys relationships.`) g.flags.StringVar(&g.queriesRaw, `queries`, `1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22`, - `Queries to run. Use a comma separated list of query numbers`) + `Queries to run. Use a comma separated list of query numbers.`) g.flags.BoolVar(&g.enableChecks, `enable-checks`, false, "Enable checking the output against the expected rows (default false). "+ "Note that the checks are only supported for scale factor 1 of the backup "+ - "stored at 'gs://cockroach-fixtures/workload/tpch/scalefactor=1/backup'") + "stored at 'gs://cockroach-fixtures/workload/tpch/scalefactor=1/backup'.") g.flags.StringVar(&g.vectorize, `vectorize`, `on`, - `Set vectorize session variable`) + `Set vectorize session variable.`) g.flags.BoolVar(&g.useClusterVectorizeSetting, `default-vectorize`, false, - `Ignore vectorize option and use the current cluster setting sql.defaults.vectorize`) + `Ignore vectorize option and use the current cluster setting sql.defaults.vectorize.`) g.flags.BoolVar(&g.verbose, `verbose`, false, - `Prints out the queries being run as well as histograms`) + `Prints out the queries being run as well as histograms.`) g.connFlags = workload.NewConnFlags(&g.flags) return g }, diff --git a/pkg/workload/ttlbench/ttlbench.go b/pkg/workload/ttlbench/ttlbench.go index cb5987b0bcfb..cb40cb581069 100644 --- a/pkg/workload/ttlbench/ttlbench.go +++ b/pkg/workload/ttlbench/ttlbench.go @@ -52,8 +52,11 @@ type ttlBench struct { } var ttlBenchMeta = workload.Meta{ - Name: "ttlbench", - Description: `Measures how long it takes for the row-level TTL job to run on a table: + Name: "ttlbench", + Description: `Measures how long it takes for the row-level TTL job to run on a table.`, + Details: ` + +The workload works as follows: 1) Drop TTL table IF EXISTS. 2) Create a table without TTL. 3) Insert initialRowCount number of rows. @@ -63,19 +66,18 @@ var ttlBenchMeta = workload.Meta{ 7) Poll table until TTL job is complete. Note: Ops is a no-op and no histograms are used. Benchmarking is done inside Hooks and details are logged. `, - Version: "0.0.1", - PublicFacing: false, + Version: "0.0.1", New: func() workload.Generator { g := &ttlBench{} flags := &g.flags flags.FlagSet = pflag.NewFlagSet(`ttlbench`, pflag.ContinueOnError) - flags.Int64Var(&g.seed, `seed`, 1, `seed for randomization operations`) - flags.IntVar(&g.initialRowCount, `initial-row-count`, 0, `initial rows in table`) - flags.IntVar(&g.rowMessageLength, `row-message-length`, 128, `length of row message`) - flags.IntVar(&g.expiredRowPercentage, `expired-row-percentage`, 50, `percentage of rows that are expired`) - flags.IntVar(&g.ttlBatchSize, `ttl-batch-size`, 500, `size of TTL SELECT and DELETE batches`) - flags.IntVar(&g.rangeMinBytes, `range-min-bytes`, 134217728, `minimum number of bytes in range before merging`) - flags.IntVar(&g.rangeMaxBytes, `range-max-bytes`, 536870912, `maximum number of bytes in range before splitting`) + flags.Int64Var(&g.seed, `seed`, 1, `Seed for randomization operations.`) + flags.IntVar(&g.initialRowCount, `initial-row-count`, 0, `Initial rows in table.`) + flags.IntVar(&g.rowMessageLength, `row-message-length`, 128, `Length of row message.`) + flags.IntVar(&g.expiredRowPercentage, `expired-row-percentage`, 50, `Percentage of rows that are expired.`) + flags.IntVar(&g.ttlBatchSize, `ttl-batch-size`, 500, `Size of TTL SELECT and DELETE batches.`) + flags.IntVar(&g.rangeMinBytes, `range-min-bytes`, 134217728, `Minimum number of bytes in range before merging.`) + flags.IntVar(&g.rangeMaxBytes, `range-max-bytes`, 536870912, `Maximum number of bytes in range before splitting.`) g.connFlags = workload.NewConnFlags(flags) return g }, diff --git a/pkg/workload/ttllogger/ttllogger.go b/pkg/workload/ttllogger/ttllogger.go index 3040f711b69e..5155abd32cb0 100644 --- a/pkg/workload/ttllogger/ttllogger.go +++ b/pkg/workload/ttllogger/ttllogger.go @@ -45,18 +45,17 @@ type ttlLogger struct { } var ttlLoggerMeta = workload.Meta{ - Name: "ttllogger", - Description: "Generates a simple log table with rows expiring after the given TTL.", - Version: "0.0.1", - PublicFacing: true, + Name: "ttllogger", + Description: "Generates a simple log table with rows expiring after the given TTL.", + Version: "0.0.1", New: func() workload.Generator { g := &ttlLogger{} g.flags.FlagSet = pflag.NewFlagSet(`ttllogger`, pflag.ContinueOnError) - g.flags.DurationVar(&g.ttl, "ttl", time.Minute, `duration for the TTL to expire`) - g.flags.Int64Var(&g.seed, `seed`, 1, `seed for randomization operations`) - g.flags.IntVar(&g.minRowsPerInsert, `min-rows-per-insert`, 1, `minimum rows per insert per query`) - g.flags.IntVar(&g.maxRowsPerInsert, `max-rows-per-insert`, 100, `maximum rows per insert per query`) - g.flags.BoolVar(&g.tsAsPrimaryKey, `ts-as-primary-key`, true, `whether timestamp column for the table should be part of the primary key`) + g.flags.DurationVar(&g.ttl, "ttl", time.Minute, `Duration for the TTL to expire.`) + g.flags.Int64Var(&g.seed, `seed`, 1, `Seed for randomization operations.`) + g.flags.IntVar(&g.minRowsPerInsert, `min-rows-per-insert`, 1, `Minimum rows per insert per query.`) + g.flags.IntVar(&g.maxRowsPerInsert, `max-rows-per-insert`, 100, `Maximum rows per insert per query.`) + g.flags.BoolVar(&g.tsAsPrimaryKey, `ts-as-primary-key`, true, `Whether timestamp column for the table should be part of the primary key.`) g.connFlags = workload.NewConnFlags(&g.flags) return g }, diff --git a/pkg/workload/workload.go b/pkg/workload/workload.go index d980ec00386b..2223942cd61d 100644 --- a/pkg/workload/workload.go +++ b/pkg/workload/workload.go @@ -149,12 +149,12 @@ type Meta struct { // Version is a semantic version for this generator. It should be bumped // whenever InitialRowFn or InitialRowCount change for any of the tables. Version string - // PublicFacing indicates that this workload is also intended for use by - // users doing their own testing and evaluations. This allows hiding workloads - // that are only expected to be used in CockroachDB's internal development to - // avoid confusion. Workloads setting this to true should pay added attention - // to their documentation and help-text. - PublicFacing bool + + // TestInfraOnly indicates that a workload was primarily designed for + // internal testing by one team Cockroach Labs, and is expected to + // be of limited teaching value to other teams or end-users. + TestInfraOnly bool + // New returns an unconfigured instance of this generator. New func() Generator } diff --git a/pkg/workload/ycsb/ycsb.go b/pkg/workload/ycsb/ycsb.go index f70d609e2250..d3fb3700fd00 100644 --- a/pkg/workload/ycsb/ycsb.go +++ b/pkg/workload/ycsb/ycsb.go @@ -115,10 +115,9 @@ func init() { } var ycsbMeta = workload.Meta{ - Name: `ycsb`, - Description: `YCSB is the Yahoo! Cloud Serving Benchmark`, - Version: `1.0.0`, - PublicFacing: true, + Name: `ycsb`, + Description: `YCSB is the Yahoo! Cloud Serving Benchmark.`, + Version: `1.0.0`, New: func() workload.Generator { g := &ycsb{} g.flags.FlagSet = pflag.NewFlagSet(`ycsb`, pflag.ContinueOnError) @@ -132,10 +131,10 @@ var ycsbMeta = workload.Meta{ g.flags.IntVar(&g.insertStart, `insert-start`, 0, `Key to start initial sequential insertions from. (default 0)`) g.flags.IntVar(&g.insertCount, `insert-count`, 10000, `Number of rows to sequentially insert before beginning workload.`) g.flags.IntVar(&g.recordCount, `record-count`, 0, `Key to start workload insertions from. Must be >= insert-start + insert-count. (Default: insert-start + insert-count)`) - g.flags.BoolVar(&g.json, `json`, false, `Use JSONB rather than relational data`) - g.flags.BoolVar(&g.families, `families`, true, `Place each column in its own column family`) - g.flags.BoolVar(&g.sfu, `select-for-update`, true, `Use SELECT FOR UPDATE syntax in read-modify-write transactions`) - g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations`) + g.flags.BoolVar(&g.json, `json`, false, `Use JSONB rather than relational data.`) + g.flags.BoolVar(&g.families, `families`, true, `Place each column in its own column family.`) + g.flags.BoolVar(&g.sfu, `select-for-update`, true, `Use SELECT FOR UPDATE syntax in read-modify-write transactions.`) + g.flags.IntVar(&g.splits, `splits`, 0, `Number of splits to perform before starting normal operations.`) g.flags.StringVar(&g.workload, `workload`, `B`, `Workload type. Choose from A-F.`) g.flags.StringVar(&g.requestDistribution, `request-distribution`, ``, `Distribution for request key generation [zipfian, uniform, latest]. The default for workloads A, B, C, E, and F is zipfian, and the default for workload D is latest.`) g.flags.StringVar(&g.scanLengthDistribution, `scan-length-distribution`, `uniform`, `Distribution for scan length generation [zipfian, uniform]. Primarily used for workload E.`)