Skip to content

Commit

Permalink
opt/execbuilder: faster maximum ordinal method for colOrdMap
Browse files Browse the repository at this point in the history
This commit makes `colOrdMap.MaxOrd()` a constant-time operation in most
cases. See the newly added comments for more details.

Release note: None
  • Loading branch information
mgartner committed Feb 27, 2024
1 parent be09228 commit 3f4d099
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 20 deletions.
66 changes: 55 additions & 11 deletions pkg/sql/opt/exec/execbuilder/col_ord_map.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,11 @@ type colOrdMap struct {
// 1, which allows the map to store the zero ordinal and have the zero value
// in the map represent an unset column ID.
ords []int32
// maxOrd is the maximum ordinal in the map and is used by MaxOrd. If it is
// 0, then the set is empty. If it is -1, then the current maximum ordinal
// is "unknown" and MaxOrd must scan the entire map to find the maximum
// ordinal. See Set and MaxOrd for more details.
maxOrd int
}

// newColOrdMap returns a new column mapping that can store column IDs less than
Expand All @@ -111,6 +116,20 @@ func (m *colOrdMap) Set(col opt.ColumnID, ord int) {
if ord > math.MaxInt32 {
panic(errors.AssertionFailedf("ordinal %d exceeds max ordinal %d", ord-1, math.MaxInt32-1))
}
switch {
case m.maxIsUnknown():
// If the maximum ordinal is currently unknown, then leave it as-is.
case m.ords[col] > 0 && m.ords[col] == int32(m.maxOrd) && ord < m.maxOrd:
// If we are overriding an ordinal that was previously the maximum
// ordinal with a smaller ordinal, then we don't know what the new
// maximum ordinal is. So we set the maximum ordinal as "unknown". This
// makes MaxOrd scan the map to find the maximum ordinal. See MaxOrd for
// more details.
m.setUnknownMax()
default:
// Otherwise, set the known maximum ordinal.
m.maxOrd = max(m.maxOrd, ord)
}
m.ords[col] = int32(ord)
}

Expand All @@ -128,19 +147,34 @@ func (m colOrdMap) Get(col opt.ColumnID) (ord int, ok bool) {
return ord - 1, true
}

// MaxOrd returns the maximum ordinal stored in the map, or -1 if the map is
// empty.
// MaxOrd returns the maximum ordinal in the map, or -1 if the map is empty.
//
// In most cases, MaxOrd has constant time complexity. If the map had a previous
// maximum ordinal that was overwritten by a smaller ordinal, then MaxOrd has
// linear time complexity with respect to the size of the map. In this case, the
// result is memoized, so future calls to MaxOrd are constant time until the
// maximum ordinal is overwritten by a smaller ordinal again.
//
// The case with linear time complexity should be rare in practice. The benefit
// of this approach is that it allows for constant time complexity in most cases
// with only constant additional space. Making MaxOrd constant-time in all cases
// would require non-constant additional space to keep a history of all previous
// maximum ordinals.
func (m colOrdMap) MaxOrd() int {
maxOrd := -1
for _, ord := range m.ords {
if ord == 0 {
continue
}
// If the maximum ordinal is known, return it.
if !m.maxIsUnknown() {
// Reverse the bias when fetching the max ordinal from the map.
ord--
maxOrd = max(maxOrd, int(ord))
return m.maxOrd - 1
}
return maxOrd
// Otherwise, maxOrd is negative, meaning that a previous maximum ordinal
// was overwritten by a smaller ordinal. So we have to search for the
// maximum ordinal in the set.
m.maxOrd = 0
for _, ord := range m.ords {
m.maxOrd = max(m.maxOrd, int(ord))
}
// Reverse the bias when fetching the max ordinal from the map.
return m.maxOrd - 1
}

// ForEach calls the given function for each column ID and ordinal pair in the
Expand All @@ -163,11 +197,21 @@ func (m *colOrdMap) CopyFrom(other colOrdMap) {
len(m.ords), len(other.ords)))
}
copy(m.ords, other.ords)
m.maxOrd = other.maxOrd
}

// Clear clears the map. The allocated memory is retained for future reuse.
func (m colOrdMap) Clear() {
func (m *colOrdMap) Clear() {
for i := range m.ords {
m.ords[i] = 0
}
m.maxOrd = 0
}

func (m colOrdMap) maxIsUnknown() bool {
return m.maxOrd == -1
}

func (m *colOrdMap) setUnknownMax() {
m.maxOrd = -1
}
41 changes: 35 additions & 6 deletions pkg/sql/opt/exec/execbuilder/col_ord_map_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
package execbuilder

import (
"math"
"testing"

"github.com/cockroachdb/cockroach/pkg/sql/opt"
Expand All @@ -24,17 +25,12 @@ func TestColOrdMap(t *testing.T) {
const maxCol = 100
m := newColOrdMap(maxCol)
oracle := make(map[opt.ColumnID]int)

if m.MaxOrd() != -1 {
t.Errorf("expected empty map to have MaxOrd of -1, got %d", m.MaxOrd())
}

rng, _ := randutil.NewTestRand()

const numOps = 1000
for i := 0; i < numOps; i++ {
col := opt.ColumnID(rng.Intn(maxCol + 1))
ord := int(rng.Int31())
ord := rng.Intn(math.MaxInt32)

oracle[col] = ord
m.Set(col, ord)
Expand Down Expand Up @@ -77,3 +73,36 @@ func validate(t *testing.T, m colOrdMap, oracle map[opt.ColumnID]int) {
}
})
}

func TestMaxOrd(t *testing.T) {
defer leaktest.AfterTest(t)()

const maxCol = 100
m := newColOrdMap(maxCol)

assertMax := func(expectedMax int) {
maxOrd := m.MaxOrd()
if maxOrd != expectedMax {
t.Errorf("expected empty map to have MaxOrd of %d, got %d", expectedMax, maxOrd)
}
}

// An empty map has a max ordinal of -1.
assertMax(-1)

m.Set(1, 2)
assertMax(2)

m.Set(1, 3)
m.Set(2, 3)
assertMax(3)

m.Set(1, 1)
assertMax(3)

m.Set(2, 1)
assertMax(1)

m.Set(1, 0)
assertMax(1)
}
3 changes: 0 additions & 3 deletions pkg/sql/opt/exec/execbuilder/relational.go
Original file line number Diff line number Diff line change
Expand Up @@ -2885,9 +2885,6 @@ func (b *Builder) buildInvertedJoin(
return execPlan{}, colOrdMap{}, errors.AssertionFailedf("outputCols should not be empty")
}
// Assign the continuation column the next unused value in the map.
// TODO(mgartner): It's currently safe to use maxOrd like this, but it
// is not robust because it's not guaranteed to be the max ordinal, only
// an approximate upper-bound.
outputCols.Set(join.ContinuationCol, maxOrd+1)

// leftAndRightCols is only needed for the lifetime of the function, so free
Expand Down

0 comments on commit 3f4d099

Please sign in to comment.