Skip to content

Commit

Permalink
schema: data sizes are configurable
Browse files Browse the repository at this point in the history
The data sizes are configurable through the SchemaConfig object.
It propagates most of config through to the runtime at the initial
schema generation. However the sizes of actual data such as strings
and blobs are carried through to the GenValue functions via the
PartitionRange objects.

Default data set size is "large" which is suitable for a large actual
testing setup and the development scripts is using the new switch
"dataset-size" to set it to "small" during development.
  • Loading branch information
Henrik Johansson committed Jun 20, 2019
1 parent 4681919 commit ac43aea
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 60 deletions.
67 changes: 55 additions & 12 deletions cmd/gemini/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ var (
maxPartitionKeys int
maxClusteringKeys int
maxColumns int
datasetSize string
)

const (
Expand Down Expand Up @@ -184,6 +185,7 @@ func run(cmd *cobra.Command, args []string) {
}
defer outFile.Sync()

schemaConfig := createSchemaConfig()
var schema *gemini.Schema
if len(schemaFile) > 0 {
var err error
Expand All @@ -193,13 +195,7 @@ func run(cmd *cobra.Command, args []string) {
return
}
} else {
sc := gemini.SchemaConfig{
CompactionStrategy: getCompactionStrategy(compactionStrategy),
MaxPartitionKeys: maxPartitionKeys,
MaxClusteringKeys: maxClusteringKeys,
MaxColumns: maxColumns,
}
schema = gemini.GenSchema(sc)
schema = gemini.GenSchema(schemaConfig)
}

jsonSchema, _ := json.MarshalIndent(schema, "", " ")
Expand Down Expand Up @@ -230,7 +226,49 @@ func run(cmd *cobra.Command, args []string) {
}
}

runJob(Job, schema, store, mode, outFile)
runJob(Job, schema, schemaConfig, store, mode, outFile)
}

func createSchemaConfig() *gemini.SchemaConfig {
defaultConfig := createDefaultSchemaConfig()
switch strings.ToLower(datasetSize) {
case "small":
return &gemini.SchemaConfig{
CompactionStrategy: defaultConfig.CompactionStrategy,
MaxPartitionKeys: defaultConfig.MaxPartitionKeys,
MaxClusteringKeys: defaultConfig.MaxClusteringKeys,
MaxColumns: defaultConfig.MaxColumns,
MaxUDTParts: 2,
MaxTupleParts: 2,
MaxBlobLength: 20,
MaxStringLength: 20,
}
default:
return defaultConfig
}
}

func createDefaultSchemaConfig() *gemini.SchemaConfig {
const (
MaxBlobLength = 1e4
MinBlobLength = 0
MaxStringLength = 1000
MinStringLength = 0
MaxTupleParts = 20
MaxUDTParts = 20
)
return &gemini.SchemaConfig{
CompactionStrategy: getCompactionStrategy(compactionStrategy),
MaxPartitionKeys: maxPartitionKeys,
MaxClusteringKeys: maxClusteringKeys,
MaxColumns: maxColumns,
MaxUDTParts: MaxUDTParts,
MaxTupleParts: MaxTupleParts,
MaxBlobLength: MaxBlobLength,
MinBlobLength: MinBlobLength,
MaxStringLength: MaxStringLength,
MinStringLength: MinStringLength,
}
}

func createClusters(consistency gocql.Consistency) (*gocql.ClusterConfig, *gocql.ClusterConfig) {
Expand Down Expand Up @@ -270,7 +308,7 @@ func getCompactionStrategy(cs string) *gemini.CompactionStrategy {
}
}

func runJob(f testJob, schema *gemini.Schema, s store.Store, mode string, out *os.File) {
func runJob(f testJob, schema *gemini.Schema, schemaConfig *gemini.SchemaConfig, s store.Store, mode string, out *os.File) {
defer out.Sync()
c := make(chan Status, 10000)
minRange := 0
Expand All @@ -290,9 +328,13 @@ func runJob(f testJob, schema *gemini.Schema, s store.Store, mode string, out *o
for _, table := range schema.Tables {
for i := 0; i < concurrency; i++ {
p := gemini.PartitionRange{
Min: minRange + i*maxRange,
Max: maxRange + i*maxRange,
Rand: rand.New(rand.NewSource(int64(seed))),
Min: minRange + i*maxRange,
Max: maxRange + i*maxRange,
Rand: rand.New(rand.NewSource(int64(seed))),
MaxBlobLength: schemaConfig.MaxBlobLength,
MinBlobLength: schemaConfig.MinBlobLength,
MaxStringLength: schemaConfig.MaxStringLength,
MinStringLength: schemaConfig.MinStringLength,
}
go f(workerCtx, pump.ch, &workers, schema, table, s, p, c, mode, out, warmup)
}
Expand Down Expand Up @@ -474,6 +516,7 @@ func init() {
rootCmd.Flags().IntVarP(&maxPartitionKeys, "max-partition-keys", "", 2, "Maximum number of generated partition keys")
rootCmd.Flags().IntVarP(&maxClusteringKeys, "max-clustering-keys", "", 4, "Maximum number of generated clustering keys")
rootCmd.Flags().IntVarP(&maxColumns, "max-columns", "", 16, "Maximum number of generated columns")
rootCmd.Flags().StringVarP(&datasetSize, "dataset-size", "", "large", "Specify the type of dataset size to use, small|large")
}

func printSetup() error {
Expand Down
20 changes: 15 additions & 5 deletions schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ type SchemaConfig struct {
MaxPartitionKeys int
MaxClusteringKeys int
MaxColumns int
MaxUDTParts int
MaxTupleParts int
MaxBlobLength int
MaxStringLength int
MinBlobLength int
MinStringLength int
}

type Keyspace struct {
Expand Down Expand Up @@ -178,9 +184,13 @@ type Schema struct {
}

type PartitionRange struct {
Min int `default:0`
Max int `default:100`
Rand *rand.Rand
Min int `default:0`
Max int `default:100`
Rand *rand.Rand
MaxBlobLength int
MinBlobLength int
MaxStringLength int
MinStringLength int
}

func (s *Schema) GetDropSchema() []string {
Expand All @@ -189,7 +199,7 @@ func (s *Schema) GetDropSchema() []string {
}
}

func GenSchema(sc SchemaConfig) *Schema {
func GenSchema(sc *SchemaConfig) *Schema {
builder := NewSchemaBuilder()
keyspace := Keyspace{
Name: "ks1",
Expand All @@ -208,7 +218,7 @@ func GenSchema(sc SchemaConfig) *Schema {
var columns []ColumnDef
numColumns := rand.Intn(sc.MaxColumns)
for i := 0; i < numColumns; i++ {
columns = append(columns, ColumnDef{Name: genColumnName("col", i), Type: genColumnType(numColumns)})
columns = append(columns, ColumnDef{Name: genColumnName("col", i), Type: genColumnType(numColumns, sc)})
}
var indexes []IndexDef
if numColumns > 0 {
Expand Down
1 change: 1 addition & 0 deletions scripts/gemini-launcher
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ until docker logs ${TEST_NAME} | grep "Starting listening for CQL clients" > /de
$GEMINI_CMD \
--duration=10m \
--fail-fast \
--dataset-size=small \
--test-cluster=${TEST_IP} \
--oracle-cluster=${ORACLE_IP} \
"$@"
Expand Down
57 changes: 25 additions & 32 deletions types.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,6 @@ const (
TYPE_UUID = SimpleType("uuid")
TYPE_VARCHAR = SimpleType("varchar")
TYPE_VARINT = SimpleType("varint")

MaxBlobLength = 1e4
MinBlobLength = 0
MaxStringLength = 1000
MinStringLength = 0
MaxTupleParts = 20
MaxUDTParts = 20
)

// TODO: Add support for time when gocql bug is fixed.
Expand Down Expand Up @@ -117,10 +110,10 @@ func (st SimpleType) GenValue(p *PartitionRange) []interface{} {
var val interface{}
switch st {
case TYPE_ASCII, TYPE_TEXT, TYPE_VARCHAR:
ln := p.Rand.Intn(MaxStringLength) + MinStringLength
ln := p.Rand.Intn(p.MaxStringLength) + p.MinStringLength
val = randStringWithTime(p.Rand, ln, randTime(p.Rand))
case TYPE_BLOB:
ln := p.Rand.Intn(MaxBlobLength) + MinBlobLength
ln := p.Rand.Intn(p.MaxBlobLength) + p.MinBlobLength
val = hex.EncodeToString([]byte(randStringWithTime(p.Rand, ln, randTime(p.Rand))))
case TYPE_BIGINT:
val = p.Rand.Int63()
Expand Down Expand Up @@ -517,50 +510,50 @@ func genColumnName(prefix string, idx int) string {
return fmt.Sprintf("%s%d", prefix, idx)
}

func genColumnType(numColumns int) Type {
func genColumnType(numColumns int, sc *SchemaConfig) Type {
n := rand.Intn(numColumns + 5)
switch n {
case numColumns:
return genTupleType()
return genTupleType(sc)
case numColumns + 1:
return genUDTType()
return genUDTType(sc)
case numColumns + 2:
return genSetType()
return genSetType(sc)
case numColumns + 3:
return genListType()
return genListType(sc)
case numColumns + 4:
return genMapType()
return genMapType(sc)
default:
return genSimpleType()
return genSimpleType(sc)
}
}

func genSimpleType() SimpleType {
func genSimpleType(sc *SchemaConfig) SimpleType {
return types[rand.Intn(len(types))]
}

func genTupleType() Type {
n := rand.Intn(MaxTupleParts)
func genTupleType(sc *SchemaConfig) Type {
n := rand.Intn(sc.MaxTupleParts)
if n < 2 {
n = 2
}
typeList := make([]SimpleType, n, n)
for i := 0; i < n; i++ {
typeList[i] = genSimpleType()
typeList[i] = genSimpleType(sc)
}
return TupleType{
Types: typeList,
Frozen: rand.Uint32()%2 == 0,
}
}

func genUDTType() UDTType {
func genUDTType(sc *SchemaConfig) UDTType {
udtNum := rand.Uint32()
typeName := fmt.Sprintf("udt_%d", udtNum)
ts := make(map[string]SimpleType)

for i := 0; i < rand.Intn(MaxUDTParts)+1; i++ {
ts[typeName+fmt.Sprintf("_%d", i)] = genSimpleType()
for i := 0; i < rand.Intn(sc.MaxUDTParts)+1; i++ {
ts[typeName+fmt.Sprintf("_%d", i)] = genSimpleType(sc)
}

return UDTType{
Expand All @@ -570,18 +563,18 @@ func genUDTType() UDTType {
}
}

func genSetType() BagType {
return genBagType("set")
func genSetType(sc *SchemaConfig) BagType {
return genBagType("set", sc)
}

func genListType() BagType {
return genBagType("list")
func genListType(sc *SchemaConfig) BagType {
return genBagType("list", sc)
}

func genBagType(kind string) BagType {
func genBagType(kind string, sc *SchemaConfig) BagType {
var t SimpleType
for {
t = genSimpleType()
t = genSimpleType(sc)
if t != TYPE_DURATION {
break
}
Expand All @@ -593,17 +586,17 @@ func genBagType(kind string) BagType {
}
}

func genMapType() MapType {
func genMapType(sc *SchemaConfig) MapType {
var t SimpleType
for {
t = genSimpleType()
t = genSimpleType(sc)
if t != TYPE_DURATION {
break
}
}
return MapType{
KeyType: t,
ValueType: genSimpleType(),
ValueType: genSimpleType(sc),
Frozen: rand.Uint32()%2 == 0,
}
}
Expand Down
26 changes: 15 additions & 11 deletions types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,26 +211,30 @@ func TestCQLPretty(t *testing.T) {
}

func TestMarshalUnmarshal(t *testing.T) {
sc := &SchemaConfig{
MaxTupleParts: 2,
MaxUDTParts: 2,
}
columns := Columns{
{
Name: genColumnName("col", 0),
Type: genMapType(),
Type: genMapType(sc),
},
{
Name: genColumnName("col", 1),
Type: genSetType(),
Type: genSetType(sc),
},
{
Name: genColumnName("col", 2),
Type: genListType(),
Type: genListType(sc),
},
{
Name: genColumnName("col", 3),
Type: genTupleType(),
Type: genTupleType(sc),
},
{
Name: genColumnName("col", 4),
Type: genUDTType(),
Type: genUDTType(sc),
},
}
s1 := &Schema{
Expand All @@ -240,13 +244,13 @@ func TestMarshalUnmarshal(t *testing.T) {
PartitionKeys: Columns{
{
Name: genColumnName("pk", 0),
Type: genSimpleType(),
Type: genSimpleType(sc),
},
},
ClusteringKeys: Columns{
{
Name: genColumnName("ck", 0),
Type: genSimpleType(),
Type: genSimpleType(sc),
},
},
Columns: columns,
Expand All @@ -266,21 +270,21 @@ func TestMarshalUnmarshal(t *testing.T) {
PartitionKeys: []ColumnDef{
{
Name: "pk_mv_0",
Type: genListType(),
Type: genListType(sc),
},
{
Name: "pk_mv_1",
Type: genTupleType(),
Type: genTupleType(sc),
},
},
ClusteringKeys: []ColumnDef{
{
Name: "ck_mv_0",
Type: genSetType(),
Type: genSetType(sc),
},
{
Name: "ck_mv_1",
Type: genUDTType(),
Type: genUDTType(sc),
},
},
},
Expand Down

0 comments on commit ac43aea

Please sign in to comment.