Skip to content

Commit

Permalink
workload: fix TPC-H generator to generate correct values for p_name
Browse files Browse the repository at this point in the history
The TPC-H generator creates values for p_name in the part table by selecting
5 elements from a list of words called randPartNames. It is supposed to select
5 random unique elements from the full list, but prior to this commit, it only
selected a permutation of the first 5 elements. This commit fixes the logic and
adds a unit test.

Epic: None
Release note: None
  • Loading branch information
rytaft committed Dec 16, 2022
1 parent 89a9bd1 commit 7c9dd1b
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ ALL_TESTS = [
"//pkg/workload/movr:movr_test",
"//pkg/workload/rand:rand_test",
"//pkg/workload/tpcc:tpcc_test",
"//pkg/workload/tpch:tpch_test",
"//pkg/workload/workloadimpl:workloadimpl_test",
"//pkg/workload/workloadsql:workloadsql_test",
"//pkg/workload/ycsb:ycsb_test",
Expand Down Expand Up @@ -2184,6 +2185,7 @@ GO_TARGETS = [
"//pkg/workload/tpccchecks:tpccchecks",
"//pkg/workload/tpcds:tpcds",
"//pkg/workload/tpch:tpch",
"//pkg/workload/tpch:tpch_test",
"//pkg/workload/ttlbench:ttlbench",
"//pkg/workload/ttllogger:ttllogger",
"//pkg/workload/workloadimpl:workloadimpl",
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/workloadccl/allccl/all_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ func TestDeterministicInitialData(t *testing.T) {
`sqlsmith`: 0xcbf29ce484222325,
`startrek`: 0xa0249fbdf612734c,
`tpcc`: 0xab32e4f5e899eb2f,
`tpch`: 0x65a1e18ddf4e59aa,
`tpch`: 0x72869878971e76ac,
`ycsb`: 0x1244ea1c29ef67f6,
}

Expand Down
15 changes: 14 additions & 1 deletion pkg/workload/tpch/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "tpch",
Expand Down Expand Up @@ -29,4 +29,17 @@ go_library(
],
)

go_test(
name = "tpch_test",
srcs = ["random_test.go"],
args = ["-test.timeout=295s"],
embed = [":tpch"],
deps = [
"//pkg/util/bufalloc",
"//pkg/util/timeutil",
"@com_github_stretchr_testify//assert",
"@org_golang_x_exp//rand",
],
)

get_x_data(name = "get_x_data")
5 changes: 2 additions & 3 deletions pkg/workload/tpch/random.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,8 @@ func randPartName(rng *rand.Rand, namePerm []int, a *bufalloc.ByteAllocator) []b
// do nPartNames iterations of rand.Perm, to get a random 5-subset of the
// indexes into randPartNames.
for i := 0; i < nPartNames; i++ {
j := rng.Intn(i + 1)
namePerm[i] = namePerm[j]
namePerm[j] = i
j := rng.Intn(len(namePerm))
namePerm[i], namePerm[j] = namePerm[j], namePerm[i]
}
var buf []byte
*a, buf = a.Alloc(maxPartNameLen*nPartNames+nPartNames, 0)
Expand Down
58 changes: 58 additions & 0 deletions pkg/workload/tpch/random_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2022 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package tpch

import (
"strings"
"testing"

"github.com/cockroachdb/cockroach/pkg/util/bufalloc"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/stretchr/testify/assert"
"golang.org/x/exp/rand"
)

func TestRandPartName(t *testing.T) {
var a bufalloc.ByteAllocator
rng := rand.New(rand.NewSource(uint64(timeutil.Now().UnixNano())))
seen := make(map[string]int)
runOneRound := func() {
namePerm := make([]int, len(randPartNames))
for i := range namePerm {
namePerm[i] = i
}
res := randPartName(rng, namePerm, &a)
names := strings.Split(string(res), " ")
assert.Equal(t, len(names), nPartNames)
seenLocal := make(map[string]int)
for _, name := range names {
if _, ok := seenLocal[name]; ok {
t.Errorf("names in '%s' are not unique", res)
}
seenLocal[name]++
seen[name]++
}
}

// We can't guarantee much about the global distribution of names,
// but we should make sure that we're not always using the same 5
// names. Run up to 100 times before failing.
for i := 0; i < 100; i++ {
if len(seen) > 5 {
return
}
runOneRound()
}

if len(seen) <= 5 {
t.Errorf("only saw 5 names after calling randPartName 100 times")
}
}

0 comments on commit 7c9dd1b

Please sign in to comment.