Skip to content

Commit

Permalink
Merge pull request #394 from scylladb/dk/enable-random-seeds
Browse files Browse the repository at this point in the history
feat(gemini): make schema seed and workload seed random
  • Loading branch information
dkropachev authored Jul 13, 2023
2 parents 0b6a06b + a2d5a4e commit bb5f54c
Show file tree
Hide file tree
Showing 22 changed files with 116 additions and 80 deletions.
16 changes: 11 additions & 5 deletions cmd/gemini/generators.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,29 @@ import (
func createGenerators(
schema *typedef.Schema,
schemaConfig typedef.SchemaConfig,
distributionFunc generators.DistributionFunc,
_, distributionSize uint64,
logger *zap.Logger,
) generators.Generators {
) (generators.Generators, error) {
partitionRangeConfig := schemaConfig.GetPartitionRangeConfig()

var gs []*generators.Generator
for _, table := range schema.Tables {
for id := range schema.Tables {
table := schema.Tables[id]

distFunc, err := createDistributionFunc(partitionKeyDistribution, partitionCount, seed, stdDistMean, oneStdDev)
if err != nil {
return nil, err
}

gCfg := &generators.Config{
PartitionsRangeConfig: partitionRangeConfig,
PartitionsCount: distributionSize,
PartitionsDistributionFunc: distributionFunc,
PartitionsDistributionFunc: distFunc,
Seed: seed,
PkUsedBufferSize: pkBufferReuseSize,
}
g := generators.NewGenerator(table, gCfg, logger.Named("generators"))
gs = append(gs, g)
}
return gs
return gs, nil
}
27 changes: 19 additions & 8 deletions cmd/gemini/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package main

import (
"encoding/binary"
"encoding/json"
"fmt"
"log"
Expand All @@ -39,6 +40,8 @@ import (
"github.com/scylladb/gemini/pkg/status"
"github.com/scylladb/gemini/pkg/stop"

crand "crypto/rand"

"github.com/gocql/gocql"
"github.com/hailocab/go-hostpool"
"github.com/pkg/errors"
Expand Down Expand Up @@ -178,10 +181,6 @@ func run(_ *cobra.Command, _ []string) error {
if err = printSetup(); err != nil {
return errors.Wrapf(err, "unable to print setup")
}
distFunc, err := createDistributionFunc(partitionKeyDistribution, math.MaxUint64, seed, stdDistMean, oneStdDev)
if err != nil {
return err
}

outFile, err := createFile(outFileArg, os.Stdout)
if err != nil {
Expand Down Expand Up @@ -259,9 +258,12 @@ func run(_ *cobra.Command, _ []string) error {
stopFlag := stop.NewFlag("main")
warmupStopFlag := stop.NewFlag("warmup")
stop.StartOsSignalsTransmitter(logger, stopFlag, warmupStopFlag)
pump := jobs.NewPump(ctx, logger)
pump := jobs.NewPump(stopFlag, logger)

gens := createGenerators(schema, schemaConfig, distFunc, concurrency, partitionCount, logger)
gens, err := createGenerators(schema, schemaConfig, concurrency, partitionCount, logger)
if err != nil {
return err
}
gens.StartAll(stopFlag)

if !nonInteractive {
Expand Down Expand Up @@ -457,8 +459,8 @@ func init() {
rootCmd.Flags().StringVarP(&schemaFile, "schema", "", "", "Schema JSON config file")
rootCmd.Flags().StringVarP(&mode, "mode", "m", jobs.MixedMode, "Query operation mode. Mode options: write, read, mixed (default)")
rootCmd.Flags().Uint64VarP(&concurrency, "concurrency", "c", 10, "Number of threads per table to run concurrently")
rootCmd.Flags().Uint64VarP(&seed, "seed", "s", 1, "Statement seed value")
rootCmd.Flags().Uint64VarP(&schemaSeed, "schema-seed", "", 1, "Schema seed value")
rootCmd.Flags().Uint64VarP(&seed, "seed", "s", RealRandom(), "Statement seed value")
rootCmd.Flags().Uint64VarP(&schemaSeed, "schema-seed", "", RealRandom(), "Schema seed value")
rootCmd.Flags().BoolVarP(&dropSchema, "drop-schema", "d", false, "Drop schema before starting tests run")
rootCmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "Verbose output during test run")
rootCmd.Flags().BoolVarP(&failFast, "fail-fast", "f", false, "Stop on the first failure")
Expand Down Expand Up @@ -540,3 +542,12 @@ func printSetup() error {
tw.Flush()
return nil
}

func RealRandom() uint64 {
var b [8]byte
_, err := crand.Read(b[:])
if err != nil {
return uint64(time.Now().Nanosecond() * time.Now().Second())
}
return binary.LittleEndian.Uint64(b[:])
}
9 changes: 5 additions & 4 deletions pkg/generators/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ type Generator struct {
partitions Partitions
partitionsConfig typedef.PartitionRangeConfig
partitionCount uint64
seed uint64

cntCreated uint64
cntEmitted uint64
Expand Down Expand Up @@ -87,7 +86,6 @@ func NewGenerator(table *typedef.Table, config *Config, logger *zap.Logger) *Gen
partitionCount: config.PartitionsCount,
table: table,
partitionsConfig: config.PartitionsRangeConfig,
seed: config.Seed,
idxFunc: config.PartitionsDistributionFunc,
logger: logger,
wakeUpSignal: wakeUpSignal,
Expand Down Expand Up @@ -121,7 +119,7 @@ func (g *Generator) Start(stopFlag *stop.Flag) {
g.logger.Info("starting partition key generation loop")
defer g.partitions.CloseAll()
for {
g.fillAllPartitions()
g.fillAllPartitions(stopFlag)
select {
case <-stopFlag.SignalChannel():
g.logger.Debug("stopping partition key generation loop",
Expand All @@ -137,7 +135,7 @@ func (g *Generator) Start(stopFlag *stop.Flag) {
// fillAllPartitions guarantees that each partition was tested to be full
// at least once since the function started and before it ended.
// In other words no partition will be starved.
func (g *Generator) fillAllPartitions() {
func (g *Generator) fillAllPartitions(stopFlag *stop.Flag) {
pFilled := make([]bool, len(g.partitions))
allFilled := func() bool {
for _, filled := range pFilled {
Expand All @@ -160,6 +158,9 @@ func (g *Generator) fillAllPartitions() {
case partition.values <- &typedef.ValueWithToken{Token: token, Value: values}:
g.cntEmitted++
default:
if stopFlag.IsHardOrSoft() {
return
}
if !pFilled[idx] {
pFilled[idx] = true
if allFilled() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/generators/partition.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ type Partition struct {
func (s *Partition) get() *typedef.ValueWithToken {
for {
v := s.pick()
if s.inFlight.AddIfNotPresent(v.Token) {
if v == nil || s.inFlight.AddIfNotPresent(v.Token) {
return v
}
}
Expand Down
13 changes: 5 additions & 8 deletions pkg/jobs/pump.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@
package jobs

import (
"context"
"time"

"github.com/scylladb/gemini/pkg/stop"

"go.uber.org/zap"
"golang.org/x/exp/rand"
)

func NewPump(ctx context.Context, logger *zap.Logger) chan time.Duration {
func NewPump(stopFlag *stop.Flag, logger *zap.Logger) chan time.Duration {
pump := make(chan time.Duration, 10000)
logger = logger.Named("Pump")
go func() {
Expand All @@ -31,12 +32,8 @@ func NewPump(ctx context.Context, logger *zap.Logger) chan time.Duration {
close(pump)
logger.Debug("pump channel closed")
}()
for {
select {
case <-ctx.Done():
break
case pump <- newHeartBeat():
}
for !stopFlag.IsHardOrSoft() {
pump <- newHeartBeat()
}
}()

Expand Down
4 changes: 2 additions & 2 deletions pkg/jobs/test_expected_data/check/clustering_range.json
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll.cck1 WHERE pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0\u003e? AND ck0\u003c?",
"Names": "[pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck0]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 01 00]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 01 00]",
"Types": " ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii ascii",
"QueryType": "1"
}
Expand All @@ -71,7 +71,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll.cckAll WHERE pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0=? AND ck1=? AND ck2=? AND ck3=? AND ck4=? AND ck5=? AND ck6=? AND ck7=? AND ck8=? AND ck9=? AND ck10=? AND ck11=? AND ck12=? AND ck13=? AND ck14=? AND ck15=? AND ck16=? AND ck17=? AND ck18\u003e? AND ck18\u003c?",
"Names": "[pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck1 ck2 ck3 ck4 ck5 ck6 ck7 ck8 ck9 ck10 ck11 ck12 ck13 ck14 ck15 ck16 ck17 ck18 ck18]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 1]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 1]",
"Types": " ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time time",
"QueryType": "1"
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/jobs/test_expected_data/check/clustering_range_mv.json
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll_mv.cck1_mv_1 WHERE pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0\u003e? AND ck0\u003c?",
"Names": "[pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck0]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 01 00]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 01 00]",
"Types": " ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii ascii",
"QueryType": "1"
}
Expand All @@ -104,7 +104,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll_mv.cckAll_mv_1 WHERE pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0=? AND ck1=? AND ck2=? AND ck3=? AND ck4=? AND ck5=? AND ck6=? AND ck7=? AND ck8=? AND ck9=? AND ck10=? AND ck11=? AND ck12=? AND ck13=? AND ck14=? AND ck15=? AND ck16=? AND ck17=? AND ck18\u003e? AND ck18\u003c?",
"Names": "[pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck1 ck2 ck3 ck4 ck5 ck6 ck7 ck8 ck9 ck10 ck11 ck12 ck13 ck14 ck15 ck16 ck17 ck18 ck18]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 1]",
"Values": "[01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 1]",
"Types": " ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time time",
"QueryType": "1"
}
Expand All @@ -115,7 +115,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll_mvNp.cck1_mv_1 WHERE col1=? AND pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0\u003e? AND ck0\u003c?",
"Names": "[col1 pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck0]",
"Values": "[01 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 00 00]",
"Values": "[01 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 00 00]",
"Types": " ascii ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii ascii",
"QueryType": "1"
}
Expand All @@ -126,7 +126,7 @@
"TokenValues": "",
"Query": "SELECT * FROM ks1.pkAll_ckAll_colAll_mvNp.cckAll_mv_1 WHERE col1=? AND pk0=? AND pk1=? AND pk2=? AND pk3=? AND pk4=? AND pk5=? AND pk6=? AND pk7=? AND pk8=? AND pk9=? AND pk10=? AND pk11=? AND pk12=? AND pk13=? AND pk14=? AND pk15=? AND pk16=? AND pk17=? AND pk18=? AND ck0=? AND ck1=? AND ck2=? AND ck3=? AND ck4=? AND ck5=? AND ck6=? AND ck7=? AND ck8=? AND ck9=? AND ck10=? AND ck11=? AND ck12=? AND ck13=? AND ck14=? AND ck15=? AND ck16=? AND ck17=? AND ck18\u003e? AND ck18\u003c?",
"Names": "[col1 pk0 pk1 pk2 pk3 pk4 pk5 pk6 pk7 pk8 pk9 pk10 pk11 pk12 pk13 pk14 pk15 pk16 pk17 pk18 ck0 ck1 ck2 ck3 ck4 ck5 ck6 ck7 ck8 ck9 ck10 ck11 ck12 ck13 ck14 ck15 ck16 ck17 ck18 ck18]",
"Values": "[01 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 00 1 1 00 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 0 00 1 00000001-0000-1000-8000-3132372e302e 0 00000001-0000-1000-8000-3132372e302e 01 1 1 1]",
"Values": "[01 01 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 00 1 1 00 1 3030 false 1970-01-01 0.001 1.1102230246251565e-16 1.110223e-16 1.1.1.1 0 1 00 1 00000001-0000-1000-8000-3132372e302e 1 00000001-0000-1000-8000-3132372e302e 01 1 1 1]",
"Types": " ascii ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time ascii bigint blob boolean date decimal double float inet int smallint text timestamp timeuuid tinyint uuid varchar varint time time",
"QueryType": "1"
}
Expand Down
Loading

0 comments on commit bb5f54c

Please sign in to comment.