Skip to content

Commit

Permalink
Merge pull request #75172 from rytaft/backport21.2-74914
Browse files Browse the repository at this point in the history
release-21.2: opt,tree: fix bugs with Next(), Prev(), and histogram calculation for DTimeTZ
  • Loading branch information
rytaft authored Jan 20, 2022
2 parents 3119ca5 + 3d438d0 commit 252bba4
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 15 deletions.
35 changes: 35 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/timetz
Original file line number Diff line number Diff line change
Expand Up @@ -670,3 +670,38 @@ SELECT a FROM regression_44774 ORDER BY a

statement ok
DROP TABLE regression_44774

# Check that index scans of timetz columns do not miss values.
subtest regression_74912

# Create a table with two identical columns. One is indexed, and one is not.
statement ok
CREATE TABLE regression_74912 (t1 TIMETZ PRIMARY KEY, t2 TIMETZ);
INSERT INTO regression_74912 VALUES
('05:00:00.000001', '05:00:00.000001'),
('07:00:00.000001+02:00:00', '07:00:00.000001+02:00:00'),
('09:00:00.000001+04:00:00', '09:00:00.000001+04:00:00'),
('20:59:00.000001+15:59:00', '20:59:00.000001+15:59:00');

query I
SELECT count(*) FROM regression_74912@{NO_FULL_SCAN} WHERE t1 > '05:00:00';
----
4

query I
SELECT count(*) FROM regression_74912@{NO_FULL_SCAN} WHERE t1 < '05:00:00.000001';
----
3

query I
SELECT count(*) FROM regression_74912 WHERE t2 > '05:00:00';
----
4

query I
SELECT count(*) FROM regression_74912 WHERE t2 < '05:00:00.000001';
----
3

statement ok
DROP TABLE regression_74912
73 changes: 73 additions & 0 deletions pkg/sql/opt/memo/testdata/stats/groupby
Original file line number Diff line number Diff line change
Expand Up @@ -526,3 +526,76 @@ project
│ └── bool_or:5 [type=bool, outer=(5), constraints=(/5: [/true - /true]; tight), fd=()-->(5)]
└── projections
└── 1 [as="?column?":6, type=int]

# Regression test for #74667.
exec-ddl
CREATE TABLE t74667 (col TIMETZ PRIMARY KEY)
----

exec-ddl
ALTER TABLE t74667 INJECT STATISTICS '[
{
"columns": [
"col"
],
"created_at": "2000-01-01 00:00:00+00:00",
"distinct_count": 950814763580487611,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 3873172219268179689,
"num_range": 0,
"upper_bound": "00:00:00+15:59:00"
},
{
"distinct_range": 400000,
"num_eq": 3000000000,
"num_range": 400000,
"upper_bound": "04:40:23.558699+11:08:00"
},
{
"distinct_range": 381143202295070850,
"num_eq": 6399369578112136136,
"num_range": 381143202295070816,
"upper_bound": "06:12:15.740051+06:40:00"
}
],
"histo_col_type": "TIMETZ",
"name": "__auto__",
"null_count": 0,
"row_count": 1188522479222658429
}
]'
----

norm
SELECT count(*)
FROM t74667
WHERE col < '03:33:05.598931+07:11:00':::TIMETZ
GROUP BY col;
----
project
├── columns: count:4(int!null)
├── stats: [rows=2.00509067e+16]
└── group-by
├── columns: col:1(timetz!null) count_rows:4(int!null)
├── grouping columns: col:1(timetz!null)
├── stats: [rows=2.00509067e+16, distinct(1)=2.00509067e+16, null(1)=0]
├── key: (1)
├── fd: (1)-->(4)
├── select
│ ├── columns: col:1(timetz!null)
│ ├── stats: [rows=4.52141047e+17, distinct(1)=2.00509067e+16, null(1)=0]
│ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 2.0051e+16 0
│ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ------------ '03:33:06.598931+07:11:01'
│ ├── key: (1)
│ ├── scan t74667
│ │ ├── columns: col:1(timetz!null)
│ │ ├── stats: [rows=1.18852248e+18, distinct(1)=9.50814764e+17, null(1)=0]
│ │ │ histogram(1)= 0 4.3209e+17 44624 3.3468e+08 4.252e+16 7.1391e+17
│ │ │ <--- '00:00:00+15:59:00' ------- '04:40:23.558699+11:08:00' ----------- '06:12:15.740051+06:40:00'
│ │ └── key: (1)
│ └── filters
│ └── col:1 < '03:33:05.598931+07:11:00' [type=bool, outer=(1), constraints=(/1: (/NULL - /'03:33:06.598931+07:11:01']; tight)]
└── aggregations
└── count-rows [as=count_rows:4, type=int]
1 change: 1 addition & 0 deletions pkg/sql/opt/props/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ go_library(
"//pkg/sql/types",
"//pkg/util/encoding",
"//pkg/util/log",
"//pkg/util/timetz",
"@com_github_cockroachdb_errors//:errors",
"@com_github_olekukonko_tablewriter//:tablewriter",
],
Expand Down
17 changes: 14 additions & 3 deletions pkg/sql/opt/props/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"io"
"math"
"sort"
"time"

"github.com/cockroachdb/cockroach/pkg/sql/inverted"
"github.com/cockroachdb/cockroach/pkg/sql/opt"
Expand All @@ -26,6 +27,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/timetz"
"github.com/cockroachdb/errors"
"github.com/olekukonko/tablewriter"
)
Expand Down Expand Up @@ -785,9 +787,18 @@ func getRangesBeforeAndAfter(
return rng, true

case types.TimeTZFamily:
lower := lowerBound.(*tree.DTimeTZ).TimeOfDay
upper := upperBound.(*tree.DTimeTZ).TimeOfDay
rng = float64(upper) - float64(lower)
// timeTZOffsetSecsRange is the total number of possible values for offset.
timeTZOffsetSecsRange := timetz.MaxTimeTZOffsetSecs - timetz.MinTimeTZOffsetSecs + 1

// Find the range in microseconds based on the absolute times of the range
// boundaries.
lower := lowerBound.(*tree.DTimeTZ)
upper := upperBound.(*tree.DTimeTZ)
rng = float64(upper.ToTime().Sub(lower.ToTime()) / time.Microsecond)

// Account for the offset.
rng *= float64(timeTZOffsetSecsRange)
rng += float64(upper.OffsetSecs - lower.OffsetSecs)
return rng, true

default:
Expand Down
70 changes: 61 additions & 9 deletions pkg/sql/opt/props/histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ func TestFilterBucket(t *testing.T) {
t.Fatalf("for span %s expected an error", testCase.span)
}
if !reflect.DeepEqual(testCase.expected, actual) {
t.Fatalf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual)
t.Errorf("for span %s exected %v but found %v", testCase.span, testCase.expected, actual)
}
}
}
Expand Down Expand Up @@ -612,36 +612,88 @@ func TestFilterBucket(t *testing.T) {
})

t.Run("timetz", func(t *testing.T) {
upperBound, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond)
upperBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00", time.Microsecond)
if err != nil {
t.Fatal(err)
}
lowerBound, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond)
lowerBound1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:00:00", time.Microsecond)
if err != nil {
t.Fatal(err)
}
h := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound)},
{NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound},
h1 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: getPrevUpperBound(lowerBound1)},
{NumEq: 1, NumRange: 62, DistinctRange: 31, UpperBound: upperBound1},
}}

ub1, _, err := tree.ParseDTimeTZ(&evalCtx, "04:15:00", time.Microsecond)
if err != nil {
t.Fatal(err)
}

testData := []testCase{
testData1 := []testCase{
{
span: "[/04:00:00 - /04:15:00]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 15.5, DistinctRange: 7.75, UpperBound: ub1},
},
{
span: "[/04:30:00 - /05:00:00]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound},
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1},
},
{
span: "[/06:30:00+02:00:00 - /05:00:00]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1},
},
{
span: "[/08:30:00+04:00:00 - /05:00:00]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 31, DistinctRange: 15.5, UpperBound: upperBound1},
},
}

// This test distinguishes between values that have the same time but
// different offsets.
upperBound2, _, err := tree.ParseDTimeTZ(&evalCtx, "05:00:00.000001", time.Microsecond)
if err != nil {
t.Fatal(err)
}
h2 := &Histogram{evalCtx: &evalCtx, col: col, buckets: []cat.HistogramBucket{
{NumEq: 0, NumRange: 0, DistinctRange: 0, UpperBound: upperBound1},
{NumEq: 1, NumRange: 10000, DistinctRange: 1000, UpperBound: upperBound2},
}}

ub2, _, err := tree.ParseDTimeTZ(&evalCtx, "20:59:00.000001+15:59:00", time.Microsecond)
if err != nil {
t.Fatal(err)
}

testData2 := []testCase{
{
span: "[/05:00:00.000001 - /05:00:00.000001]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: upperBound2},
},
{
span: "[/07:00:00.000001+02:00:00 - /05:00:00.000001]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 625.65, DistinctRange: 62.57, UpperBound: upperBound2},
},
{
span: "[/09:00:00.000001+04:00:00 - /05:00:00.000001]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 1251.3, DistinctRange: 125.13, UpperBound: upperBound2},
},
{
span: "[/04:59:59-00:00:01 - /20:59:00.000001+15:59:00]",
expected: &cat.HistogramBucket{NumEq: 0, NumRange: 5000, DistinctRange: 500, UpperBound: ub2},
},
{
span: "[/20:59:00.000001+15:59:00 - /05:00:00.000001]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 5000, DistinctRange: 500, UpperBound: upperBound2},
},
{
span: "[/04:59:58-00:00:02 - /05:00:00.000001]",
expected: &cat.HistogramBucket{NumEq: 1, NumRange: 9999.91, DistinctRange: 999.99, UpperBound: upperBound2},
},
}

runTest(h, testData, types.TimeTZFamily)
runTest(h1, testData1, types.TimeTZFamily)
runTest(h2, testData2, types.TimeTZFamily)
})

t.Run("string", func(t *testing.T) {
Expand Down
46 changes: 44 additions & 2 deletions pkg/sql/sem/tree/datum.go
Original file line number Diff line number Diff line change
Expand Up @@ -2177,15 +2177,57 @@ func (d *DTimeTZ) Prev(ctx *EvalContext) (Datum, bool) {
if d.IsMin(ctx) {
return nil, false
}
return NewDTimeTZFromOffset(d.TimeOfDay-1, d.OffsetSecs), true
// In the common case, the absolute time doesn't change, we simply decrement
// the offset by one second and increment the time of day by one second. Once
// we hit the minimum offset for the current absolute time, then we decrement
// the absolute time by one microsecond and wrap around to the highest offset
// for the new absolute time. This aligns with how Before and After are
// defined for TimeTZ.
var newTimeOfDay timeofday.TimeOfDay
var newOffsetSecs int32
if d.OffsetSecs == timetz.MinTimeTZOffsetSecs ||
d.TimeOfDay+duration.MicrosPerSec > timeofday.Max {
newTimeOfDay = d.TimeOfDay - 1
shiftSeconds := int32((newTimeOfDay - timeofday.Min) / duration.MicrosPerSec)
if d.OffsetSecs+shiftSeconds > timetz.MaxTimeTZOffsetSecs {
shiftSeconds = timetz.MaxTimeTZOffsetSecs - d.OffsetSecs
}
newOffsetSecs = d.OffsetSecs + shiftSeconds
newTimeOfDay -= timeofday.TimeOfDay(shiftSeconds) * duration.MicrosPerSec
} else {
newTimeOfDay = d.TimeOfDay + duration.MicrosPerSec
newOffsetSecs = d.OffsetSecs - 1
}
return NewDTimeTZFromOffset(newTimeOfDay, newOffsetSecs), true
}

// Next implements the Datum interface.
func (d *DTimeTZ) Next(ctx *EvalContext) (Datum, bool) {
if d.IsMax(ctx) {
return nil, false
}
return NewDTimeTZFromOffset(d.TimeOfDay+1, d.OffsetSecs), true
// In the common case, the absolute time doesn't change, we simply increment
// the offset by one second and decrement the time of day by one second. Once
// we hit the maximum offset for the current absolute time, then we increment
// the absolute time by one microsecond and wrap around to the lowest offset
// for the new absolute time. This aligns with how Before and After are
// defined for TimeTZ.
var newTimeOfDay timeofday.TimeOfDay
var newOffsetSecs int32
if d.OffsetSecs == timetz.MaxTimeTZOffsetSecs ||
d.TimeOfDay-duration.MicrosPerSec < timeofday.Min {
newTimeOfDay = d.TimeOfDay + 1
shiftSeconds := int32((timeofday.Max - newTimeOfDay) / duration.MicrosPerSec)
if d.OffsetSecs-shiftSeconds < timetz.MinTimeTZOffsetSecs {
shiftSeconds = d.OffsetSecs - timetz.MinTimeTZOffsetSecs
}
newOffsetSecs = d.OffsetSecs - shiftSeconds
newTimeOfDay += timeofday.TimeOfDay(shiftSeconds) * duration.MicrosPerSec
} else {
newTimeOfDay = d.TimeOfDay - duration.MicrosPerSec
newOffsetSecs = d.OffsetSecs + 1
}
return NewDTimeTZFromOffset(newTimeOfDay, newOffsetSecs), true
}

// IsMax implements the Datum interface.
Expand Down
Loading

0 comments on commit 252bba4

Please sign in to comment.