Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase data sizes #138

Merged
merged 7 commits into from
Jun 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion cmd/gemini/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ var (
warmup time.Duration
compactionStrategy string
consistency string
maxPartitionKeys int
maxClusteringKeys int
maxColumns int
)

const (
Expand Down Expand Up @@ -192,7 +195,13 @@ func run(cmd *cobra.Command, args []string) {
return
}
} else {
schema = gemini.GenSchema(getCompactionStrategy(compactionStrategy))
sc := gemini.SchemaConfig{
CompactionStrategy: getCompactionStrategy(compactionStrategy),
MaxPartitionKeys: maxPartitionKeys,
MaxClusteringKeys: maxClusteringKeys,
MaxColumns: maxColumns,
}
schema = gemini.GenSchema(sc)
}

jsonSchema, _ := json.MarshalIndent(schema, "", " ")
Expand Down Expand Up @@ -476,6 +485,9 @@ func init() {
rootCmd.Flags().DurationVarP(&warmup, "warmup", "", 30*time.Second, "Specify the warmup perid as a duration for example 30s or 10h")
rootCmd.Flags().StringVarP(&compactionStrategy, "compaction-strategy", "", "", "Specify the desired CS as either the coded short hand stcs|twcs|lcs to get the default for each type or provide the entire specification in the form {'class':'....'}")
rootCmd.Flags().StringVarP(&consistency, "consistency", "", "QUORUM", "Specify the desired consistency as ANY|ONE|TWO|THREE|QUORUM|LOCAL_QUORUM|EACH_QUORUM|LOCAL_ONE")
rootCmd.Flags().IntVarP(&maxPartitionKeys, "max-partition-keys", "", 2, "Maximum number of generated partition keys")
rootCmd.Flags().IntVarP(&maxClusteringKeys, "max-clustering-keys", "", 4, "Maximum number of generated clustering keys")
rootCmd.Flags().IntVarP(&maxColumns, "max-columns", "", 16, "Maximum number of generated columns")
}

func printSetup() error {
Expand Down
27 changes: 27 additions & 0 deletions datautils.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gemini

import (
"bytes"
"encoding/base64"
"fmt"
"math/rand"
Expand Down Expand Up @@ -55,6 +56,25 @@ func nonEmptyRandFloat64Range(rnd *rand.Rand, min float64, max float64, def floa
return randFloat64Range(rnd, 1, def)
}

func randBlobWithTime(rnd *rand.Rand, len int, t time.Time) []byte {
id, _ := ksuid.NewRandomWithTime(t)

var buf bytes.Buffer
buf.Write(id.Bytes())

if buf.Len() >= len {
return buf.Bytes()[:len]
}

// Pad some extra random data
buff := make([]byte, len-buf.Len())
rnd.Read(buff)
buf.WriteString(base64.StdEncoding.EncodeToString(buff))

return buf.Bytes()[:len]

}

func randStringWithTime(rnd *rand.Rand, len int, t time.Time) string {
id, _ := ksuid.NewRandomWithTime(t)

Expand All @@ -72,6 +92,13 @@ func randStringWithTime(rnd *rand.Rand, len int, t time.Time) string {
return buf.String()[:len]
}

func nonEmptyRandBlobWithTime(rnd *rand.Rand, len int, t time.Time) []byte {
if len <= 0 {
len = 1
}
return randBlobWithTime(rnd, len, t)
}

func nonEmptyRandStringWithTime(rnd *rand.Rand, len int, t time.Time) string {
if len <= 0 {
len = 1
Expand Down
58 changes: 35 additions & 23 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,21 @@ import (
"strconv"
"strings"

"github.com/pkg/errors"
"github.com/scylladb/gocqlx/qb"
)

const (
KnownIssuesJsonWithTuples = "https://github.com/scylladb/scylla/issues/3708"
)

type SchemaConfig struct {
CompactionStrategy *CompactionStrategy
MaxPartitionKeys int
MaxClusteringKeys int
MaxColumns int
}

type Keyspace struct {
Name string `json:"name"`
}
Expand Down Expand Up @@ -181,30 +189,24 @@ func (s *Schema) GetDropSchema() []string {
}
}

const (
MaxPartitionKeys = 2
MaxClusteringKeys = 4
MaxColumns = 16
)

func GenSchema(cs *CompactionStrategy) *Schema {
func GenSchema(sc SchemaConfig) *Schema {
builder := NewSchemaBuilder()
keyspace := Keyspace{
Name: "ks1",
}
builder.Keyspace(keyspace)
var partitionKeys []ColumnDef
numPartitionKeys := rand.Intn(MaxPartitionKeys-1) + 1
numPartitionKeys := rand.Intn(sc.MaxPartitionKeys-1) + 1
for i := 0; i < numPartitionKeys; i++ {
partitionKeys = append(partitionKeys, ColumnDef{Name: genColumnName("pk", i), Type: TYPE_INT})
}
var clusteringKeys []ColumnDef
numClusteringKeys := rand.Intn(MaxClusteringKeys)
numClusteringKeys := rand.Intn(sc.MaxClusteringKeys)
for i := 0; i < numClusteringKeys; i++ {
clusteringKeys = append(clusteringKeys, ColumnDef{Name: genColumnName("ck", i), Type: genPrimaryKeyColumnType()})
}
var columns []ColumnDef
numColumns := rand.Intn(MaxColumns)
numColumns := rand.Intn(sc.MaxColumns)
for i := 0; i < numColumns; i++ {
columns = append(columns, ColumnDef{Name: genColumnName("col", i), Type: genColumnType(numColumns)})
}
Expand All @@ -217,7 +219,7 @@ func GenSchema(cs *CompactionStrategy) *Schema {
}
}
}
validMVColumn := func() ColumnDef {
validMVColumn := func() (ColumnDef, error) {
validCols := make([]ColumnDef, 0, len(columns))
for _, col := range columns {
valid := false
Expand All @@ -231,13 +233,22 @@ func GenSchema(cs *CompactionStrategy) *Schema {
validCols = append(validCols, col)
}
}
return validCols[rand.Intn(len(validCols))]
if len(validCols) == 0 {
return ColumnDef{}, errors.New("no valid MV columns found")
}
return validCols[rand.Intn(len(validCols))], nil
}
var mvs []MaterializedView
numMvs := 1
for i := 0; i < numMvs; i++ {
col, err := validMVColumn()
if err != nil {
fmt.Printf("unable to generate valid columns for materialized view, error=%s", err)
continue
}

cols := []ColumnDef{
validMVColumn(),
col,
}
mv := MaterializedView{
Name: "table1_mv_" + strconv.Itoa(i),
Expand All @@ -248,19 +259,20 @@ func GenSchema(cs *CompactionStrategy) *Schema {
}

table := Table{
Name: "table1",
PartitionKeys: partitionKeys,
ClusteringKeys: clusteringKeys,
Columns: columns,
CompactionStrategy: cs,
MaterializedViews: mvs,
Indexes: indexes,
Name: "table1",
PartitionKeys: partitionKeys,
ClusteringKeys: clusteringKeys,
Columns: columns,
MaterializedViews: mvs,
Indexes: indexes,
KnownIssues: map[string]bool{
KnownIssuesJsonWithTuples: true,
},
}
if cs == nil {
if sc.CompactionStrategy == nil {
table.CompactionStrategy = randomCompactionStrategy()
} else {
table.CompactionStrategy = &(*sc.CompactionStrategy)
}

builder.Table(&table)
Expand Down Expand Up @@ -519,8 +531,8 @@ func (s *Schema) genClusteringRangeQuery(t *Table, p *PartitionRange) *Stmt {
tableName := t.Name
partitionKeys := t.PartitionKeys
clusteringKeys := t.ClusteringKeys
view := p.Rand.Intn(len(t.MaterializedViews))
if len(t.MaterializedViews) > 0 && p.Rand.Int()%2 == 0 {
view := p.Rand.Intn(len(t.MaterializedViews))
tableName = t.MaterializedViews[view].Name
partitionKeys = t.MaterializedViews[view].PartitionKeys
clusteringKeys = t.MaterializedViews[view].ClusteringKeys
Expand Down Expand Up @@ -560,8 +572,8 @@ func (s *Schema) genMultiplePartitionClusteringRangeQuery(t *Table, p *Partition
tableName := t.Name
partitionKeys := t.PartitionKeys
clusteringKeys := t.ClusteringKeys
view := p.Rand.Intn(len(t.MaterializedViews))
if len(t.MaterializedViews) > 0 && p.Rand.Int()%2 == 0 {
view := p.Rand.Intn(len(t.MaterializedViews))
tableName = t.MaterializedViews[view].Name
partitionKeys = t.MaterializedViews[view].PartitionKeys
clusteringKeys = t.MaterializedViews[view].ClusteringKeys
Expand Down
19 changes: 13 additions & 6 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ const (
TYPE_VARCHAR = SimpleType("varchar")
TYPE_VARINT = SimpleType("varint")

MaxUDTParts = 10
MaxBlobLength = 1e6
MinBlobLength = 1000
MaxStringLength = 1000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This limits blob maximum size too, no? We should allow much larger blobs (for example, 1 MB or even 10 MB).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes I can add a separate one for blobs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's 1e6 bytes now perhaps enough to start with?

MinStringLength = 100
MaxTupleParts = 20
MaxUDTParts = 20
)

// TODO: Add support for time when gocql bug is fixed.
Expand Down Expand Up @@ -109,9 +114,11 @@ func (st SimpleType) GenValue(p *PartitionRange) []interface{} {
var val interface{}
switch st {
case TYPE_ASCII, TYPE_TEXT, TYPE_VARCHAR:
val = randStringWithTime(p.Rand, nonEmptyRandIntRange(p.Rand, p.Max, p.Max, 10), randTime(p.Rand))
ln := p.Rand.Intn(MaxStringLength) + MinStringLength
val = randStringWithTime(p.Rand, ln, randTime(p.Rand))
case TYPE_BLOB:
val = hex.EncodeToString([]byte(randStringWithTime(p.Rand, nonEmptyRandIntRange(p.Rand, p.Max, p.Max, 10), randTime(p.Rand))))
ln := p.Rand.Intn(MaxBlobLength) + MinBlobLength
val = hex.EncodeToString([]byte(randStringWithTime(p.Rand, ln, randTime(p.Rand))))
case TYPE_BIGINT:
val = p.Rand.Int63()
case TYPE_BOOLEAN:
Expand Down Expand Up @@ -165,8 +172,8 @@ func (st SimpleType) GenValueRange(p *PartitionRange) ([]interface{}, []interfac
startTime := randTime(p.Rand)
start := nonEmptyRandIntRange(p.Rand, p.Min, p.Max, 10)
end := start + nonEmptyRandIntRange(p.Rand, p.Min, p.Max, 10)
left = hex.EncodeToString([]byte(nonEmptyRandStringWithTime(p.Rand, start, startTime)))
right = hex.EncodeToString([]byte(nonEmptyRandStringWithTime(p.Rand, end, randTimeNewer(p.Rand, startTime))))
left = hex.EncodeToString(nonEmptyRandBlobWithTime(p.Rand, start, startTime))
right = hex.EncodeToString(nonEmptyRandBlobWithTime(p.Rand, end, randTimeNewer(p.Rand, startTime)))
case TYPE_BIGINT:
start := nonEmptyRandInt64Range(p.Rand, int64(p.Min), int64(p.Max), 10)
end := start + nonEmptyRandInt64Range(p.Rand, int64(p.Min), int64(p.Max), 10)
Expand Down Expand Up @@ -530,7 +537,7 @@ func genSimpleType() SimpleType {
}

func genTupleType() Type {
n := rand.Intn(5)
n := rand.Intn(MaxTupleParts)
if n < 2 {
n = 2
}
Expand Down