Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase data sizes #138

Merged
merged 7 commits into from
Jun 17, 2019
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion cmd/gemini/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ var (
warmup time.Duration
compactionStrategy string
consistency string
maxPartitionKeys int
maxClusteringKeys int
maxColumns int
)

const (
Expand Down Expand Up @@ -192,7 +195,7 @@ func run(cmd *cobra.Command, args []string) {
return
}
} else {
schema = gemini.GenSchema(getCompactionStrategy(compactionStrategy))
schema = gemini.GenSchema(getCompactionStrategy(compactionStrategy), maxPartitionKeys, maxClusteringKeys, maxColumns)
}

jsonSchema, _ := json.MarshalIndent(schema, "", " ")
Expand Down Expand Up @@ -476,6 +479,9 @@ func init() {
rootCmd.Flags().DurationVarP(&warmup, "warmup", "", 30*time.Second, "Specify the warmup perid as a duration for example 30s or 10h")
rootCmd.Flags().StringVarP(&compactionStrategy, "compaction-strategy", "", "", "Specify the desired CS as either the coded short hand stcs|twcs|lcs to get the default for each type or provide the entire specification in the form {'class':'....'}")
rootCmd.Flags().StringVarP(&consistency, "consistency", "", "QUORUM", "Specify the desired consistency as ANY|ONE|TWO|THREE|QUORUM|LOCAL_QUORUM|EACH_QUORUM|LOCAL_ONE")
rootCmd.Flags().IntVarP(&maxPartitionKeys, "max-partition-keys", "", 2, "Maximum number of generated partition keys")
rootCmd.Flags().IntVarP(&maxClusteringKeys, "max-clustering-keys", "", 4, "Maximum number of generated clustering keys")
rootCmd.Flags().IntVarP(&maxColumns, "max-columns", "", 16, "Maximum number of generated columns")
}

func printSetup() error {
Expand Down
34 changes: 19 additions & 15 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"strconv"
"strings"

"github.com/pkg/errors"
"github.com/scylladb/gocqlx/qb"
)

Expand Down Expand Up @@ -181,30 +182,24 @@ func (s *Schema) GetDropSchema() []string {
}
}

const (
MaxPartitionKeys = 2
MaxClusteringKeys = 4
MaxColumns = 16
)

func GenSchema(cs *CompactionStrategy) *Schema {
func GenSchema(cs *CompactionStrategy, maxPartitionKeys, maxClusteringKeys, maxColumns int) *Schema {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could wrap these configuration parameters to a SchemaConfig struct in a follow-up patch.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense and while it gives significant refactoring possibilities it also allows the developer to "forget to apply" a certain config but that can happen with an argument as well I guess.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do it now. It's a quick fix and makes it much nicer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SchemaConfig added.

builder := NewSchemaBuilder()
keyspace := Keyspace{
Name: "ks1",
}
builder.Keyspace(keyspace)
var partitionKeys []ColumnDef
numPartitionKeys := rand.Intn(MaxPartitionKeys-1) + 1
numPartitionKeys := rand.Intn(maxPartitionKeys-1) + 1
for i := 0; i < numPartitionKeys; i++ {
partitionKeys = append(partitionKeys, ColumnDef{Name: genColumnName("pk", i), Type: TYPE_INT})
}
var clusteringKeys []ColumnDef
numClusteringKeys := rand.Intn(MaxClusteringKeys)
numClusteringKeys := rand.Intn(maxClusteringKeys)
for i := 0; i < numClusteringKeys; i++ {
clusteringKeys = append(clusteringKeys, ColumnDef{Name: genColumnName("ck", i), Type: genPrimaryKeyColumnType()})
}
var columns []ColumnDef
numColumns := rand.Intn(MaxColumns)
numColumns := rand.Intn(maxColumns)
for i := 0; i < numColumns; i++ {
columns = append(columns, ColumnDef{Name: genColumnName("col", i), Type: genColumnType(numColumns)})
}
Expand All @@ -217,7 +212,7 @@ func GenSchema(cs *CompactionStrategy) *Schema {
}
}
}
validMVColumn := func() ColumnDef {
validMVColumn := func() (ColumnDef, error) {
validCols := make([]ColumnDef, 0, len(columns))
for _, col := range columns {
valid := false
Expand All @@ -231,13 +226,22 @@ func GenSchema(cs *CompactionStrategy) *Schema {
validCols = append(validCols, col)
}
}
return validCols[rand.Intn(len(validCols))]
if len(validCols) == 0 {
return ColumnDef{}, errors.New("no valid MV columns found")
}
return validCols[rand.Intn(len(validCols))], nil
}
var mvs []MaterializedView
numMvs := 1
for i := 0; i < numMvs; i++ {
col, err := validMVColumn()
if err != nil {
fmt.Println(err)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps something more structured for error reporting?

Copy link
Contributor Author

@dahankzter dahankzter Jun 17, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what to do in this case though. It simply means that we didn't generate a proper column that can take part in the MV.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better message added.

continue
}

cols := []ColumnDef{
validMVColumn(),
col,
}
mv := MaterializedView{
Name: "table1_mv_" + strconv.Itoa(i),
Expand Down Expand Up @@ -519,8 +523,8 @@ func (s *Schema) genClusteringRangeQuery(t *Table, p *PartitionRange) *Stmt {
tableName := t.Name
partitionKeys := t.PartitionKeys
clusteringKeys := t.ClusteringKeys
view := p.Rand.Intn(len(t.MaterializedViews))
if len(t.MaterializedViews) > 0 && p.Rand.Int()%2 == 0 {
view := p.Rand.Intn(len(t.MaterializedViews))
tableName = t.MaterializedViews[view].Name
partitionKeys = t.MaterializedViews[view].PartitionKeys
clusteringKeys = t.MaterializedViews[view].ClusteringKeys
Expand Down Expand Up @@ -560,8 +564,8 @@ func (s *Schema) genMultiplePartitionClusteringRangeQuery(t *Table, p *Partition
tableName := t.Name
partitionKeys := t.PartitionKeys
clusteringKeys := t.ClusteringKeys
view := p.Rand.Intn(len(t.MaterializedViews))
if len(t.MaterializedViews) > 0 && p.Rand.Int()%2 == 0 {
view := p.Rand.Intn(len(t.MaterializedViews))
tableName = t.MaterializedViews[view].Name
partitionKeys = t.MaterializedViews[view].PartitionKeys
clusteringKeys = t.MaterializedViews[view].ClusteringKeys
Expand Down
13 changes: 9 additions & 4 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ const (
TYPE_VARCHAR = SimpleType("varchar")
TYPE_VARINT = SimpleType("varint")

MaxUDTParts = 10
MaxStringLength = 1000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This limits blob maximum size too, no? We should allow much larger blobs (for example, 1 MB or even 10 MB).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes I can add a separate one for blobs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's 1e6 bytes now perhaps enough to start with?

MinStringLength = 100
MaxTupleParts = 20
MaxUDTParts = 20
)

// TODO: Add support for time when gocql bug is fixed.
Expand Down Expand Up @@ -109,9 +112,11 @@ func (st SimpleType) GenValue(p *PartitionRange) []interface{} {
var val interface{}
switch st {
case TYPE_ASCII, TYPE_TEXT, TYPE_VARCHAR:
val = randStringWithTime(p.Rand, nonEmptyRandIntRange(p.Rand, p.Max, p.Max, 10), randTime(p.Rand))
ln := p.Rand.Intn(MaxStringLength) + MinStringLength
val = randStringWithTime(p.Rand, ln, randTime(p.Rand))
case TYPE_BLOB:
val = hex.EncodeToString([]byte(randStringWithTime(p.Rand, nonEmptyRandIntRange(p.Rand, p.Max, p.Max, 10), randTime(p.Rand))))
ln := p.Rand.Intn(MaxStringLength) + MinStringLength
val = hex.EncodeToString([]byte(randStringWithTime(p.Rand, ln, randTime(p.Rand))))
case TYPE_BIGINT:
val = p.Rand.Int63()
case TYPE_BOOLEAN:
Expand Down Expand Up @@ -530,7 +535,7 @@ func genSimpleType() SimpleType {
}

func genTupleType() Type {
n := rand.Intn(5)
n := rand.Intn(MaxTupleParts)
if n < 2 {
n = 2
}
Expand Down