diff --git a/pkg/storage/cmdq/interval_btree.go b/pkg/storage/cmdq/interval_btree.go
new file mode 100644
index 000000000000..0f144948eb0a
--- /dev/null
+++ b/pkg/storage/cmdq/interval_btree.go
@@ -0,0 +1,1013 @@
+// Copyright 2018 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package cmdq
+
+import (
+	"bytes"
+	"sort"
+	"strings"
+	"unsafe"
+
+	"github.com/cockroachdb/cockroach/pkg/roachpb"
+)
+
+// TODO(nvanbenschoten):
+// 2. Add synchronized node and leafNode freelists
+// 3. Introduce immutability and a copy-on-write policy:
+// 4. Describe pedigree, changes, etc. of this implementation
+
+const (
+	degree  = 16
+	maxCmds = 2*degree - 1
+	minCmds = degree - 1
+)
+
+// TODO(nvanbenschoten): remove.
+type cmd struct {
+	id   int64
+	span roachpb.Span
+}
+
+// cmp returns a value indicating the sort order relationship between
+// a and b. The comparison is performed lexicographically on
+//  (a.span.Key, a.span.EndKey, a.id)
+// and
+//  (b.span.Key, b.span.EndKey, b.id)
+// tuples.
+//
+// Given c = cmp(a, b):
+//
+//  c == -1  if (a.span.Key, a.span.EndKey, a.id) <  (b.span.Key, b.span.EndKey, b.id)
+//  c ==  0  if (a.span.Key, a.span.EndKey, a.id) == (b.span.Key, b.span.EndKey, b.id)
+//  c ==  1  if (a.span.Key, a.span.EndKey, a.id) >  (b.span.Key, b.span.EndKey, b.id)
+//
+func cmp(a, b *cmd) int {
+	c := bytes.Compare(a.span.Key, b.span.Key)
+	if c != 0 {
+		return c
+	}
+	c = bytes.Compare(a.span.EndKey, b.span.EndKey)
+	if c != 0 {
+		return c
+	}
+	if a.id < b.id {
+		return -1
+	} else if a.id > b.id {
+		return 1
+	} else {
+		return 0
+	}
+}
+
+// keyBound represents the upper-bound of a key range.
+type keyBound struct {
+	key roachpb.Key
+	inc bool
+}
+
+func (b keyBound) compare(o keyBound) int {
+	c := bytes.Compare(b.key, o.key)
+	if c != 0 {
+		return c
+	}
+	if b.inc == o.inc {
+		return 0
+	}
+	if b.inc {
+		return 1
+	}
+	return -1
+}
+
+func (b keyBound) contains(a *cmd) bool {
+	c := bytes.Compare(a.span.Key, b.key)
+	if c == 0 {
+		return b.inc
+	}
+	return c < 0
+}
+
+func upperBound(c *cmd) keyBound {
+	if len(c.span.EndKey) != 0 {
+		return keyBound{key: c.span.EndKey}
+	}
+	return keyBound{key: c.span.Key, inc: true}
+}
+
+type leafNode struct {
+	max   keyBound
+	count int16
+	leaf  bool
+	cmds  [maxCmds]*cmd
+}
+
+func newLeafNode() *node {
+	return (*node)(unsafe.Pointer(&leafNode{leaf: true}))
+}
+
+type node struct {
+	leafNode
+	children [maxCmds + 1]*node
+}
+
+func (n *node) insertAt(index int, c *cmd, nd *node) {
+	if index < int(n.count) {
+		copy(n.cmds[index+1:n.count+1], n.cmds[index:n.count])
+		if !n.leaf {
+			copy(n.children[index+2:n.count+2], n.children[index+1:n.count+1])
+		}
+	}
+	n.cmds[index] = c
+	if !n.leaf {
+		n.children[index+1] = nd
+	}
+	n.count++
+}
+
+func (n *node) pushBack(c *cmd, nd *node) {
+	n.cmds[n.count] = c
+	if !n.leaf {
+		n.children[n.count+1] = nd
+	}
+	n.count++
+}
+
+func (n *node) pushFront(c *cmd, nd *node) {
+	if !n.leaf {
+		copy(n.children[1:n.count+2], n.children[:n.count+1])
+		n.children[0] = nd
+	}
+	copy(n.cmds[1:n.count+1], n.cmds[:n.count])
+	n.cmds[0] = c
+	n.count++
+}
+
+// removeAt removes a value at a given index, pulling all subsequent values
+// back.
+func (n *node) removeAt(index int) (*cmd, *node) {
+	var child *node
+	if !n.leaf {
+		child = n.children[index+1]
+		copy(n.children[index+1:n.count], n.children[index+2:n.count+1])
+		n.children[n.count] = nil
+	}
+	n.count--
+	out := n.cmds[index]
+	copy(n.cmds[index:n.count], n.cmds[index+1:n.count+1])
+	n.cmds[n.count] = nil
+	return out, child
+}
+
+// popBack removes and returns the last element in the list.
+func (n *node) popBack() (*cmd, *node) {
+	n.count--
+	out := n.cmds[n.count]
+	n.cmds[n.count] = nil
+	if n.leaf {
+		return out, nil
+	}
+	child := n.children[n.count+1]
+	n.children[n.count+1] = nil
+	return out, child
+}
+
+// popFront removes and returns the first element in the list.
+func (n *node) popFront() (*cmd, *node) {
+	n.count--
+	var child *node
+	if !n.leaf {
+		child = n.children[0]
+		copy(n.children[:n.count+1], n.children[1:n.count+2])
+		n.children[n.count+1] = nil
+	}
+	out := n.cmds[0]
+	copy(n.cmds[:n.count], n.cmds[1:n.count+1])
+	n.cmds[n.count] = nil
+	return out, child
+}
+
+// find returns the index where the given cmd should be inserted into this
+// list. 'found' is true if the cmd already exists in the list at the given
+// index.
+func (n *node) find(c *cmd) (index int, found bool) {
+	// Logic copied from sort.Search. Inlining this gave
+	// an 11% speedup on BenchmarkBTreeDeleteInsert.
+	i, j := 0, int(n.count)
+	for i < j {
+		h := int(uint(i+j) >> 1) // avoid overflow when computing h
+		// i ≤ h < j
+		v := cmp(c, n.cmds[h])
+		if v == 0 {
+			return h, true
+		} else if v > 0 {
+			i = h + 1
+		} else {
+			j = h
+		}
+	}
+	return i, false
+}
+
+// split splits the given node at the given index. The current node shrinks,
+// and this function returns the cmd that existed at that index and a new node
+// containing all cmds/children after it.
+//
+// Before:
+//
+//          +-----------+
+//          |   x y z   |
+//          +--/-/-\-\--+
+//
+// After:
+//
+//          +-----------+
+//          |     y     |
+//          +----/-\----+
+//              /   \
+//             v     v
+// +-----------+     +-----------+
+// |         x |     | z         |
+// +-----------+     +-----------+
+//
+func (n *node) split(i int) (*cmd, *node) {
+	out := n.cmds[i]
+	var next *node
+	if n.leaf {
+		next = newLeafNode()
+	} else {
+		next = &node{}
+	}
+	next.count = n.count - int16(i+1)
+	copy(next.cmds[:], n.cmds[i+1:n.count])
+	for j := int16(i); j < n.count; j++ {
+		n.cmds[j] = nil
+	}
+	if !n.leaf {
+		copy(next.children[:], n.children[i+1:n.count+1])
+		for j := int16(i + 1); j <= n.count; j++ {
+			n.children[j] = nil
+		}
+	}
+	n.count = int16(i)
+
+	next.max = next.findUpperBound()
+	if n.max.compare(next.max) != 0 && n.max.compare(upperBound(out)) != 0 {
+		// If upper bound wasn't from new node or cmd
+		// at index i, it must still be from old node.
+	} else {
+		n.max = n.findUpperBound()
+	}
+	return out, next
+}
+
+// insert inserts a cmd into the subtree rooted at this node, making sure no
+// nodes in the subtree exceed maxCmds cmds. Returns true if an existing cmd was
+// replaced and false if a command was inserted. Also returns whether the node's
+// upper bound changes.
+func (n *node) insert(c *cmd) (replaced, newBound bool) {
+	i, found := n.find(c)
+	if found {
+		n.cmds[i] = c
+		return true, false
+	}
+	if n.leaf {
+		n.insertAt(i, c, nil)
+		return false, n.adjustUpperBoundOnInsertion(c, nil)
+	}
+	if n.children[i].count >= maxCmds {
+		splitcmd, splitNode := n.children[i].split(maxCmds / 2)
+		n.insertAt(i, splitcmd, splitNode)
+
+		switch cmp := cmp(c, n.cmds[i]); {
+		case cmp < 0:
+			// no change, we want first split node
+		case cmp > 0:
+			i++ // we want second split node
+		default:
+			n.cmds[i] = c
+			return true, false
+		}
+	}
+	replaced, newBound = n.children[i].insert(c)
+	if newBound {
+		newBound = n.adjustUpperBoundOnInsertion(c, nil)
+	}
+	return replaced, newBound
+}
+
+// removeMax removes and returns the maximum cmd from the subtree rooted at
+// this node.
+func (n *node) removeMax() *cmd {
+	if n.leaf {
+		n.count--
+		out := n.cmds[n.count]
+		n.cmds[n.count] = nil
+		n.adjustUpperBoundOnRemoval(out, nil)
+		return out
+	}
+	child := n.children[n.count]
+	if child.count <= minCmds {
+		n.rebalanceOrMerge(int(n.count))
+		return n.removeMax()
+	}
+	return child.removeMax()
+}
+
+// remove removes a cmd from the subtree rooted at this node. Returns
+// the cmd that was removed or nil if no matching command was found.
+// Also returns whether the node's upper bound changes.
+func (n *node) remove(c *cmd) (out *cmd, newBound bool) {
+	i, found := n.find(c)
+	if n.leaf {
+		if found {
+			out, _ = n.removeAt(i)
+			return out, n.adjustUpperBoundOnRemoval(out, nil)
+		}
+		return nil, false
+	}
+	child := n.children[i]
+	if child.count <= minCmds {
+		// Child not large enough to remove from.
+		n.rebalanceOrMerge(i)
+		return n.remove(c)
+	}
+	if found {
+		// Replace the cmd being removed with the max cmd in our left child.
+		out = n.cmds[i]
+		n.cmds[i] = child.removeMax()
+		return out, n.adjustUpperBoundOnRemoval(out, nil)
+	}
+	// Cmd is not in this node and child is large enough to remove from.
+	out, newBound = child.remove(c)
+	if newBound {
+		newBound = n.adjustUpperBoundOnRemoval(out, nil)
+	}
+	return out, newBound
+}
+
+// rebalanceOrMerge grows child 'i' to ensure it has sufficient room to remove
+// a cmd from it while keeping it at or above minCmds.
+func (n *node) rebalanceOrMerge(i int) {
+	switch {
+	case i > 0 && n.children[i-1].count > minCmds:
+		// Rebalance from left sibling.
+		//
+		//          +-----------+
+		//          |     y     |
+		//          +----/-\----+
+		//              /   \
+		//             v     v
+		// +-----------+     +-----------+
+		// |         x |     |           |
+		// +----------\+     +-----------+
+		//             \
+		//              v
+		//              a
+		//
+		// After:
+		//
+		//          +-----------+
+		//          |     x     |
+		//          +----/-\----+
+		//              /   \
+		//             v     v
+		// +-----------+     +-----------+
+		// |           |     | y         |
+		// +-----------+     +/----------+
+		//                   /
+		//                  v
+		//                  a
+		//
+		left := n.children[i-1]
+		child := n.children[i]
+		xCmd, grandChild := left.popBack()
+		yCmd := n.cmds[i-1]
+		child.pushFront(yCmd, grandChild)
+		n.cmds[i-1] = xCmd
+
+		left.adjustUpperBoundOnRemoval(xCmd, grandChild)
+		child.adjustUpperBoundOnInsertion(yCmd, grandChild)
+
+	case i < int(n.count) && n.children[i+1].count > minCmds:
+		// Rebalance from right sibling.
+		//
+		//          +-----------+
+		//          |     y     |
+		//          +----/-\----+
+		//              /   \
+		//             v     v
+		// +-----------+     +-----------+
+		// |           |     | x         |
+		// +-----------+     +/----------+
+		//                   /
+		//                  v
+		//                  a
+		//
+		// After:
+		//
+		//          +-----------+
+		//          |     x     |
+		//          +----/-\----+
+		//              /   \
+		//             v     v
+		// +-----------+     +-----------+
+		// |         y |     |           |
+		// +----------\+     +-----------+
+		//             \
+		//              v
+		//              a
+		//
+		right := n.children[i+1]
+		child := n.children[i]
+		xCmd, grandChild := right.popFront()
+		yCmd := n.cmds[i]
+		child.pushBack(yCmd, grandChild)
+		n.cmds[i] = xCmd
+
+		right.adjustUpperBoundOnRemoval(xCmd, grandChild)
+		child.adjustUpperBoundOnInsertion(yCmd, grandChild)
+
+	default:
+		// Merge with either the left or right sibling.
+		//
+		//          +-----------+
+		//          |   u y v   |
+		//          +----/-\----+
+		//              /   \
+		//             v     v
+		// +-----------+     +-----------+
+		// |         x |     | z         |
+		// +-----------+     +-----------+
+		//
+		// After:
+		//
+		//          +-----------+
+		//          |    u v    |
+		//          +-----|-----+
+		//                |
+		//                v
+		//          +-----------+
+		//          |   x y z   |
+		//          +-----------+
+		//
+		if i >= int(n.count) {
+			i = int(n.count - 1)
+		}
+		child := n.children[i]
+		mergeCmd, mergeChild := n.removeAt(i)
+		child.cmds[child.count] = mergeCmd
+		copy(child.cmds[child.count+1:], mergeChild.cmds[:mergeChild.count])
+		if !child.leaf {
+			copy(child.children[child.count+1:], mergeChild.children[:mergeChild.count+1])
+		}
+		child.count += mergeChild.count + 1
+
+		child.adjustUpperBoundOnInsertion(mergeCmd, mergeChild)
+	}
+}
+
+// findUpperBound returns the largest end key node range, assuming that its
+// children have correct upper bounds already set.
+func (n *node) findUpperBound() keyBound {
+	var max keyBound
+	for i := int16(0); i < n.count; i++ {
+		up := upperBound(n.cmds[i])
+		if max.compare(up) < 0 {
+			max = up
+		}
+	}
+	if !n.leaf {
+		for i := int16(0); i <= n.count; i++ {
+			up := n.children[i].max
+			if max.compare(up) < 0 {
+				max = up
+			}
+		}
+	}
+	return max
+}
+
+// adjustUpperBoundOnInsertion adjusts the upper key bound for this node
+// given a cmd and an optional child node that was inserted. Returns true
+// is the upper bound was changed and false if not.
+func (n *node) adjustUpperBoundOnInsertion(c *cmd, child *node) bool {
+	up := upperBound(c)
+	if child != nil {
+		if up.compare(child.max) < 0 {
+			up = child.max
+		}
+	}
+	if n.max.compare(up) < 0 {
+		n.max = up
+		return true
+	}
+	return false
+}
+
+// adjustUpperBoundOnRemoval adjusts the upper key bound for this node
+// given a cmd and an optional child node that were removed. Returns true
+// is the upper bound was changed and false if not.
+func (n *node) adjustUpperBoundOnRemoval(c *cmd, child *node) bool {
+	up := upperBound(c)
+	if child != nil {
+		if up.compare(child.max) < 0 {
+			up = child.max
+		}
+	}
+	if n.max.compare(up) == 0 {
+		n.max = n.findUpperBound()
+		return true
+	}
+	return false
+}
+
+// btree is an implementation of an augmented interval B-Tree.
+//
+// btree stores cmds in an ordered structure, allowing easy insertion,
+// removal, and iteration. It represents intervals and permits an interval
+// search operation following the approach laid out in CLRS, Chapter 14.
+// The B-Tree stores cmds in order based on their start key and each B-Tree
+// node maintains the upper-bound end key of all cmds in its subtree.
+//
+// Write operations are not safe for concurrent mutation by multiple
+// goroutines, but Read operations are.
+type btree struct {
+	root   *node
+	length int
+}
+
+// Reset removes all cmds from the btree.
+func (t *btree) Reset() {
+	t.root = nil
+	t.length = 0
+}
+
+// Silent unused warning.
+var _ = (*btree).Reset
+
+// Delete removes a cmd equal to the passed in cmd from the tree.
+func (t *btree) Delete(c *cmd) {
+	if t.root == nil || t.root.count == 0 {
+		return
+	}
+	if out, _ := t.root.remove(c); out != nil {
+		t.length--
+	}
+	if t.root.count == 0 && !t.root.leaf {
+		t.root = t.root.children[0]
+	}
+}
+
+// Set adds the given cmd to the tree. If a cmd in the tree already equals
+// the given one, it is replaced with the new cmd.
+func (t *btree) Set(c *cmd) {
+	if t.root == nil {
+		t.root = newLeafNode()
+	} else if t.root.count >= maxCmds {
+		splitcmd, splitNode := t.root.split(maxCmds / 2)
+		newRoot := &node{}
+		newRoot.count = 1
+		newRoot.cmds[0] = splitcmd
+		newRoot.children[0] = t.root
+		newRoot.children[1] = splitNode
+		newRoot.max = newRoot.findUpperBound()
+		t.root = newRoot
+	}
+	if replaced, _ := t.root.insert(c); !replaced {
+		t.length++
+	}
+}
+
+// MakeIter returns a new iterator object. It is not safe to continue using an
+// iterator after modifications are made to the tree. If modifications are made,
+// create a new iterator.
+func (t *btree) MakeIter() iterator {
+	return iterator{r: t.root, pos: -1}
+}
+
+// Height returns the height of the tree.
+func (t *btree) Height() int {
+	if t.root == nil {
+		return 0
+	}
+	h := 1
+	n := t.root
+	for !n.leaf {
+		n = n.children[0]
+		h++
+	}
+	return h
+}
+
+// Len returns the number of cmds currently in the tree.
+func (t *btree) Len() int {
+	return t.length
+}
+
+// String returns a string description of the tree. The format is
+// similar to the https://en.wikipedia.org/wiki/Newick_format.
+func (t *btree) String() string {
+	if t.length == 0 {
+		return ";"
+	}
+	var b strings.Builder
+	t.root.writeString(&b)
+	return b.String()
+}
+
+func (n *node) writeString(b *strings.Builder) {
+	if n.leaf {
+		for i := int16(0); i < n.count; i++ {
+			if i != 0 {
+				b.WriteString(",")
+			}
+			b.WriteString(n.cmds[i].span.String())
+		}
+		return
+	}
+	for i := int16(0); i <= n.count; i++ {
+		b.WriteString("(")
+		n.children[i].writeString(b)
+		b.WriteString(")")
+		if i < n.count {
+			b.WriteString(n.cmds[i].span.String())
+		}
+	}
+}
+
+// iterStack represents a stack of (node, pos) tuples, which captures
+// iteration state as an iterator descends a btree.
+type iterStack struct {
+	a    iterStackArr
+	aLen int16 // -1 when using s
+	s    []iterFrame
+}
+
+// Used to avoid allocations for stacks below a certain size.
+type iterStackArr [3]iterFrame
+
+type iterFrame struct {
+	n   *node
+	pos int16
+}
+
+func (is *iterStack) push(f iterFrame) {
+	if is.aLen == -1 {
+		is.s = append(is.s, f)
+	} else if int(is.aLen) == len(is.a) {
+		is.s = make([]iterFrame, int(is.aLen)+1, 2*int(is.aLen))
+		copy(is.s, is.a[:])
+		is.s[int(is.aLen)] = f
+		is.aLen = -1
+	} else {
+		is.a[is.aLen] = f
+		is.aLen++
+	}
+}
+
+func (is *iterStack) pop() iterFrame {
+	if is.aLen == -1 {
+		f := is.s[len(is.s)-1]
+		is.s = is.s[:len(is.s)-1]
+		return f
+	}
+	is.aLen--
+	return is.a[is.aLen]
+}
+
+func (is *iterStack) len() int {
+	if is.aLen == -1 {
+		return len(is.s)
+	}
+	return int(is.aLen)
+}
+
+func (is *iterStack) reset() {
+	if is.aLen == -1 {
+		is.s = is.s[:0]
+	} else {
+		is.aLen = 0
+	}
+}
+
+// iterator is responsible for search and traversal within a btree.
+type iterator struct {
+	r   *node
+	n   *node
+	pos int16
+	s   iterStack
+	o   overlapScan
+}
+
+func (i *iterator) reset() {
+	i.n = i.r
+	i.pos = -1
+	i.s.reset()
+	i.o = overlapScan{}
+}
+
+func (i *iterator) descend(n *node, pos int16) {
+	i.s.push(iterFrame{n: n, pos: pos})
+	i.n = n.children[pos]
+	i.pos = 0
+}
+
+// ascend ascends up to the current node's parent and resets the position
+// to the one previously set for this parent node.
+func (i *iterator) ascend() {
+	f := i.s.pop()
+	i.n = f.n
+	i.pos = f.pos
+}
+
+// SeekGE seeks to the first cmd greater-than or equal to the provided cmd.
+func (i *iterator) SeekGE(c *cmd) {
+	i.reset()
+	if i.n == nil {
+		return
+	}
+	for {
+		pos, found := i.n.find(c)
+		i.pos = int16(pos)
+		if found {
+			return
+		}
+		if i.n.leaf {
+			if i.pos == i.n.count {
+				i.Next()
+			}
+			return
+		}
+		i.descend(i.n, i.pos)
+	}
+}
+
+// SeekLT seeks to the first cmd less-than the provided cmd.
+func (i *iterator) SeekLT(c *cmd) {
+	i.reset()
+	if i.n == nil {
+		return
+	}
+	for {
+		pos, found := i.n.find(c)
+		i.pos = int16(pos)
+		if found || i.n.leaf {
+			i.Prev()
+			return
+		}
+		i.descend(i.n, i.pos)
+	}
+}
+
+// First seeks to the first cmd in the btree.
+func (i *iterator) First() {
+	i.reset()
+	if i.n == nil {
+		return
+	}
+	for !i.n.leaf {
+		i.descend(i.n, 0)
+	}
+	i.pos = 0
+}
+
+// Last seeks to the last cmd in the btree.
+func (i *iterator) Last() {
+	i.reset()
+	if i.n == nil {
+		return
+	}
+	for !i.n.leaf {
+		i.descend(i.n, i.n.count)
+	}
+	i.pos = i.n.count - 1
+}
+
+// Next positions the iterator to the cmd immediately following
+// its current position.
+func (i *iterator) Next() {
+	if i.n == nil {
+		return
+	}
+
+	if i.n.leaf {
+		i.pos++
+		if i.pos < i.n.count {
+			return
+		}
+		for i.s.len() > 0 && i.pos >= i.n.count {
+			i.ascend()
+		}
+		return
+	}
+
+	i.descend(i.n, i.pos+1)
+	for !i.n.leaf {
+		i.descend(i.n, 0)
+	}
+	i.pos = 0
+}
+
+// Prev positions the iterator to the cmd immediately preceding
+// its current position.
+func (i *iterator) Prev() {
+	if i.n == nil {
+		return
+	}
+
+	if i.n.leaf {
+		i.pos--
+		if i.pos >= 0 {
+			return
+		}
+		for i.s.len() > 0 && i.pos < 0 {
+			i.ascend()
+			i.pos--
+		}
+		return
+	}
+
+	i.descend(i.n, i.pos)
+	for !i.n.leaf {
+		i.descend(i.n, i.n.count)
+	}
+	i.pos = i.n.count - 1
+}
+
+// Valid returns whether the iterator is positioned at a valid position.
+func (i *iterator) Valid() bool {
+	return i.pos >= 0 && i.pos < i.n.count
+}
+
+// Cmd returns the cmd at the iterator's current position. It is illegal
+// to call Cmd if the iterator is not valid.
+func (i *iterator) Cmd() *cmd {
+	return i.n.cmds[i.pos]
+}
+
+// An overlap scan is a scan over all cmds that overlap with the provided cmd
+// in order of the overlapping cmds' start keys. The goal of the scan is to
+// minimize the number of key comparisons performed in total. The algorithm
+// operates based on the following two invariants maintained by augmented
+// interval btree:
+// 1. all cmds are sorted in the btree based on their start key.
+// 2. all btree nodes maintain the upper bound end key of all cmds
+//    in their subtree.
+//
+// The scan algorithm starts in "unconstrained minimum" and "unconstrained
+// maximum" states. To enter a "constrained minimum" state, the scan must reach
+// cmds in the tree with start keys above the search range's start key. Because
+// cmds in the tree are sorted by start key, once the scan enters the
+// "constrained minimum" state it will remain there. To enter a "constrained
+// maximum" state, the scan must determine the first child btree node in a given
+// subtree that can have cmds with start keys above the search range's end key.
+// The scan then remains in the "constrained maximum" state until it traverse
+// into this child node, at which point it moves to the "unconstrained maximum"
+// state again.
+//
+// The scan algorithm works like a standard btree forward scan with the
+// following augmentations:
+// 1. before tranversing the tree, the scan performs a binary search on the
+//    root node's items to determine a "soft" lower-bound constraint position
+//    and a "hard" upper-bound constraint position in the root's children.
+// 2. when tranversing into a child node in the lower or upper bound constraint
+//    position, the constraint is refined by searching the child's items.
+// 3. the initial traversal down the tree follows the left-most children
+//    whose upper bound end keys are equal to or greater than the start key
+//    of the search range. The children followed will be equal to or less
+//    than the soft lower bound constraint.
+// 4. once the initial tranversal completes and the scan is in the left-most
+//    btree node whose upper bound overlaps the search range, key comparisons
+//    must be performed with each cmd in the tree. This is necessary because
+//    any of these cmds may have end keys that cause them to overlap with the
+//    search range.
+// 5. once the scan reaches the lower bound constraint position (the first cmd
+//    with a start key equal to or greater than the search range's start key),
+//    it can begin scaning without performing key comparisons. This is allowed
+//    because all commands from this point forward will have end keys that are
+//    greater than the search range's start key.
+// 6. once the scan reaches the upper bound constraint position, it terminates.
+//    It does so because the cmd at this position is the first cmd with a start
+//    key larger than the search range's end key.
+type overlapScan struct {
+	c *cmd // search cmd
+
+	// The "soft" lower-bound constraint.
+	constrMinN       *node
+	constrMinPos     int16
+	constrMinReached bool
+
+	// The "hard" upper-bound constraint.
+	constrMaxN   *node
+	constrMaxPos int16
+}
+
+// FirstOverlap seeks to the first cmd in the btree that overlaps with the
+// provided search cmd.
+func (i *iterator) FirstOverlap(c *cmd) {
+	i.reset()
+	if i.n == nil {
+		return
+	}
+	i.pos = 0
+	i.o = overlapScan{c: c}
+	i.constrainMinSearchBounds()
+	i.constrainMaxSearchBounds()
+	i.findNextOverlap()
+}
+
+// NextOverlap positions the iterator to the cmd immediately following
+// its current position that overlaps with the search cmd.
+func (i *iterator) NextOverlap() {
+	if i.n == nil {
+		return
+	}
+	if i.o.c == nil {
+		// Invalid. Mixed overlap scan with non-overlap scan.
+		i.pos = i.n.count
+		return
+	}
+	i.pos++
+	i.findNextOverlap()
+}
+
+func (i *iterator) constrainMinSearchBounds() {
+	k := i.o.c.span.Key
+	j := sort.Search(int(i.n.count), func(j int) bool {
+		return bytes.Compare(k, i.n.cmds[j].span.Key) <= 0
+	})
+	i.o.constrMinN = i.n
+	i.o.constrMinPos = int16(j)
+}
+
+func (i *iterator) constrainMaxSearchBounds() {
+	up := upperBound(i.o.c)
+	j := sort.Search(int(i.n.count), func(j int) bool {
+		return !up.contains(i.n.cmds[j])
+	})
+	i.o.constrMaxN = i.n
+	i.o.constrMaxPos = int16(j)
+}
+
+func (i *iterator) findNextOverlap() {
+	for {
+		if i.pos > i.n.count {
+			// Iterate up tree.
+			i.ascend()
+		} else if !i.n.leaf {
+			// Iterate down tree.
+			if i.o.constrMinReached || i.n.children[i.pos].max.contains(i.o.c) {
+				par := i.n
+				pos := i.pos
+				i.descend(par, pos)
+
+				// Refine the constraint bounds, if necessary.
+				if par == i.o.constrMinN && pos == i.o.constrMinPos {
+					i.constrainMinSearchBounds()
+				}
+				if par == i.o.constrMaxN && pos == i.o.constrMaxPos {
+					i.constrainMaxSearchBounds()
+				}
+				continue
+			}
+		}
+
+		// Check search bounds.
+		if i.n == i.o.constrMaxN && i.pos == i.o.constrMaxPos {
+			// Invalid. Past possible overlaps.
+			i.pos = i.n.count
+			return
+		}
+		if i.n == i.o.constrMinN && i.pos == i.o.constrMinPos {
+			// The scan reached the soft lower-bound constraint.
+			i.o.constrMinReached = true
+		}
+
+		// Iterate across node.
+		if i.pos < i.n.count {
+			// Check for overlapping cmd.
+			if i.o.constrMinReached {
+				// Fast-path to avoid span comparison. i.o.constrMinReached
+				// tells us that all cmds have end keys above our search
+				// span's start key.
+				return
+			}
+			if upperBound(i.n.cmds[i.pos]).contains(i.o.c) {
+				return
+			}
+		}
+		i.pos++
+	}
+}
diff --git a/pkg/storage/cmdq/interval_btree_test.go b/pkg/storage/cmdq/interval_btree_test.go
new file mode 100644
index 000000000000..693787c5d86c
--- /dev/null
+++ b/pkg/storage/cmdq/interval_btree_test.go
@@ -0,0 +1,805 @@
+// Copyright 2018 The Cockroach Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+// implied. See the License for the specific language governing
+// permissions and limitations under the License.
+
+package cmdq
+
+import (
+	"fmt"
+	"math/rand"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	"github.com/cockroachdb/cockroach/pkg/roachpb"
+	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+)
+
+//////////////////////////////////////////
+//        Invariant verification        //
+//////////////////////////////////////////
+
+// Verify asserts that the tree's structural invariants all hold.
+func (t *btree) Verify(tt *testing.T) {
+	if t.root == nil {
+		return
+	}
+	t.verifyLeafSameDepth(tt)
+	t.verifyCountAllowed(tt)
+	t.isSorted(tt)
+	t.isUpperBoundCorrect(tt)
+}
+
+func (t *btree) verifyLeafSameDepth(tt *testing.T) {
+	h := t.Height()
+	t.root.verifyDepthEqualToHeight(tt, 1, h)
+}
+
+func (n *node) verifyDepthEqualToHeight(t *testing.T, depth, height int) {
+	if n.leaf {
+		require.Equal(t, height, depth, "all leaves should have the same depth as the tree height")
+	}
+	n.recurse(func(child *node, _ int16) {
+		child.verifyDepthEqualToHeight(t, depth+1, height)
+	})
+}
+
+func (t *btree) verifyCountAllowed(tt *testing.T) {
+	t.root.verifyCountAllowed(tt, true)
+}
+
+func (n *node) verifyCountAllowed(t *testing.T, root bool) {
+	if !root {
+		require.True(t, n.count >= minCmds, "cmd count %d must be in range [%d,%d]", n.count, minCmds, maxCmds)
+		require.True(t, n.count <= maxCmds, "cmd count %d must be in range [%d,%d]", n.count, minCmds, maxCmds)
+	}
+	for i, cmd := range n.cmds {
+		if i < int(n.count) {
+			require.NotNil(t, cmd, "cmd below count")
+		} else {
+			require.Nil(t, cmd, "cmd above count")
+		}
+	}
+	if !n.leaf {
+		for i, child := range n.children {
+			if i <= int(n.count) {
+				require.NotNil(t, child, "node below count")
+			} else {
+				require.Nil(t, child, "node above count")
+			}
+		}
+	}
+	n.recurse(func(child *node, _ int16) {
+		child.verifyCountAllowed(t, false)
+	})
+}
+
+func (t *btree) isSorted(tt *testing.T) {
+	t.root.isSorted(tt)
+}
+
+func (n *node) isSorted(t *testing.T) {
+	for i := int16(1); i < n.count; i++ {
+		require.True(t, cmp(n.cmds[i-1], n.cmds[i]) <= 0)
+	}
+	if !n.leaf {
+		for i := int16(0); i < n.count; i++ {
+			prev := n.children[i]
+			next := n.children[i+1]
+
+			require.True(t, cmp(prev.cmds[prev.count-1], n.cmds[i]) <= 0)
+			require.True(t, cmp(n.cmds[i], next.cmds[0]) <= 0)
+		}
+	}
+	n.recurse(func(child *node, _ int16) {
+		child.isSorted(t)
+	})
+}
+
+func (t *btree) isUpperBoundCorrect(tt *testing.T) {
+	t.root.isUpperBoundCorrect(tt)
+}
+
+func (n *node) isUpperBoundCorrect(t *testing.T) {
+	require.Equal(t, 0, n.findUpperBound().compare(n.max))
+	for i := int16(1); i < n.count; i++ {
+		require.True(t, upperBound(n.cmds[i]).compare(n.max) <= 0)
+	}
+	if !n.leaf {
+		for i := int16(0); i <= n.count; i++ {
+			child := n.children[i]
+			require.True(t, child.max.compare(n.max) <= 0)
+		}
+	}
+	n.recurse(func(child *node, _ int16) {
+		child.isUpperBoundCorrect(t)
+	})
+}
+
+func (n *node) recurse(f func(child *node, pos int16)) {
+	if !n.leaf {
+		for i := int16(0); i <= n.count; i++ {
+			f(n.children[i], i)
+		}
+	}
+}
+
+//////////////////////////////////////////
+//              Unit Tests              //
+//////////////////////////////////////////
+
+func key(i int) roachpb.Key {
+	if i < 0 || i > 99999 {
+		panic("key out of bounds")
+	}
+	return []byte(fmt.Sprintf("%05d", i))
+}
+
+func span(i int) roachpb.Span {
+	switch i % 10 {
+	case 0:
+		return roachpb.Span{Key: key(i)}
+	case 1:
+		return roachpb.Span{Key: key(i), EndKey: key(i).Next()}
+	case 2:
+		return roachpb.Span{Key: key(i), EndKey: key(i + 64)}
+	default:
+		return roachpb.Span{Key: key(i), EndKey: key(i + 4)}
+	}
+}
+
+func spanWithEnd(start, end int) roachpb.Span {
+	if start < end {
+		return roachpb.Span{Key: key(start), EndKey: key(end)}
+	} else if start == end {
+		return roachpb.Span{Key: key(start)}
+	} else {
+		panic("illegal span")
+	}
+}
+
+func randomSpan(rng *rand.Rand, n int) roachpb.Span {
+	start := rng.Intn(n)
+	end := rng.Intn(n + 1)
+	if end < start {
+		start, end = end, start
+	}
+	return spanWithEnd(start, end)
+}
+
+func newCmd(s roachpb.Span) *cmd {
+	return &cmd{span: s}
+}
+
+func checkIter(t *testing.T, it iterator, start, end int) {
+	i := start
+	for it.First(); it.Valid(); it.Next() {
+		cmd := it.Cmd()
+		expected := span(i)
+		if !expected.Equal(cmd.span) {
+			t.Fatalf("expected %s, but found %s", expected, cmd.span)
+		}
+		i++
+	}
+	if i != end {
+		t.Fatalf("expected %d, but at %d", end, i)
+	}
+
+	for it.Last(); it.Valid(); it.Prev() {
+		i--
+		cmd := it.Cmd()
+		expected := span(i)
+		if !expected.Equal(cmd.span) {
+			t.Fatalf("expected %s, but found %s", expected, cmd.span)
+		}
+	}
+	if i != start {
+		t.Fatalf("expected %d, but at %d: %+v", start, i, it)
+	}
+
+	all := newCmd(spanWithEnd(start, end))
+	for it.FirstOverlap(all); it.Valid(); it.NextOverlap() {
+		cmd := it.Cmd()
+		expected := span(i)
+		if !expected.Equal(cmd.span) {
+			t.Fatalf("expected %s, but found %s", expected, cmd.span)
+		}
+		i++
+	}
+	if i != end {
+		t.Fatalf("expected %d, but at %d", end, i)
+	}
+}
+
+func TestBTree(t *testing.T) {
+	var tr btree
+
+	// With degree == 16 (max-items/node == 31) we need 513 items in order for
+	// there to be 3 levels in the tree. The count here is comfortably above
+	// that.
+	const count = 768
+
+	// Add keys in sorted order.
+	for i := 0; i < count; i++ {
+		tr.Set(newCmd(span(i)))
+		tr.Verify(t)
+		if e := i + 1; e != tr.Len() {
+			t.Fatalf("expected length %d, but found %d", e, tr.Len())
+		}
+		checkIter(t, tr.MakeIter(), 0, i+1)
+	}
+
+	// Delete keys in sorted order.
+	for i := 0; i < count; i++ {
+		tr.Delete(newCmd(span(i)))
+		tr.Verify(t)
+		if e := count - (i + 1); e != tr.Len() {
+			t.Fatalf("expected length %d, but found %d", e, tr.Len())
+		}
+		checkIter(t, tr.MakeIter(), i+1, count)
+	}
+
+	// Add keys in reverse sorted order.
+	for i := 0; i < count; i++ {
+		tr.Set(newCmd(span(count - i)))
+		tr.Verify(t)
+		if e := i + 1; e != tr.Len() {
+			t.Fatalf("expected length %d, but found %d", e, tr.Len())
+		}
+		checkIter(t, tr.MakeIter(), count-i, count+1)
+	}
+
+	// Delete keys in reverse sorted order.
+	for i := 0; i < count; i++ {
+		tr.Delete(newCmd(span(count - i)))
+		tr.Verify(t)
+		if e := count - (i + 1); e != tr.Len() {
+			t.Fatalf("expected length %d, but found %d", e, tr.Len())
+		}
+		checkIter(t, tr.MakeIter(), 1, count-i)
+	}
+}
+
+func TestBTreeSeek(t *testing.T) {
+	const count = 513
+
+	var tr btree
+	for i := 0; i < count; i++ {
+		tr.Set(newCmd(span(i * 2)))
+	}
+
+	it := tr.MakeIter()
+	for i := 0; i < 2*count-1; i++ {
+		it.SeekGE(newCmd(span(i)))
+		if !it.Valid() {
+			t.Fatalf("%d: expected valid iterator", i)
+		}
+		cmd := it.Cmd()
+		expected := span(2 * ((i + 1) / 2))
+		if !expected.Equal(cmd.span) {
+			t.Fatalf("%d: expected %s, but found %s", i, expected, cmd.span)
+		}
+	}
+	it.SeekGE(newCmd(span(2*count - 1)))
+	if it.Valid() {
+		t.Fatalf("expected invalid iterator")
+	}
+
+	for i := 1; i < 2*count; i++ {
+		it.SeekLT(newCmd(span(i)))
+		if !it.Valid() {
+			t.Fatalf("%d: expected valid iterator", i)
+		}
+		cmd := it.Cmd()
+		expected := span(2 * ((i - 1) / 2))
+		if !expected.Equal(cmd.span) {
+			t.Fatalf("%d: expected %s, but found %s", i, expected, cmd.span)
+		}
+	}
+	it.SeekLT(newCmd(span(0)))
+	if it.Valid() {
+		t.Fatalf("expected invalid iterator")
+	}
+}
+
+func TestBTreeSeekOverlap(t *testing.T) {
+	const count = 513
+	const size = 2 * maxCmds
+
+	var tr btree
+	for i := 0; i < count; i++ {
+		tr.Set(newCmd(spanWithEnd(i, i+size+1)))
+	}
+
+	// Iterate over overlaps with a point scan.
+	it := tr.MakeIter()
+	for i := 0; i < count+size; i++ {
+		it.FirstOverlap(newCmd(spanWithEnd(i, i)))
+		for j := 0; j < size+1; j++ {
+			expStart := i - size + j
+			if expStart < 0 {
+				continue
+			}
+			if expStart >= count {
+				continue
+			}
+
+			if !it.Valid() {
+				t.Fatalf("%d/%d: expected valid iterator", i, j)
+			}
+			cmd := it.Cmd()
+			expected := spanWithEnd(expStart, expStart+size+1)
+			if !expected.Equal(cmd.span) {
+				t.Fatalf("%d: expected %s, but found %s", i, expected, cmd.span)
+			}
+
+			it.NextOverlap()
+		}
+		if it.Valid() {
+			t.Fatalf("%d: expected invalid iterator %v", i, it.Cmd())
+		}
+	}
+	it.FirstOverlap(newCmd(span(count + size + 1)))
+	if it.Valid() {
+		t.Fatalf("expected invalid iterator")
+	}
+
+	// Iterate over overlaps with a range scan.
+	it = tr.MakeIter()
+	for i := 0; i < count+size; i++ {
+		it.FirstOverlap(newCmd(spanWithEnd(i, i+size+1)))
+		for j := 0; j < 2*size+1; j++ {
+			expStart := i - size + j
+			if expStart < 0 {
+				continue
+			}
+			if expStart >= count {
+				continue
+			}
+
+			if !it.Valid() {
+				t.Fatalf("%d/%d: expected valid iterator", i, j)
+			}
+			cmd := it.Cmd()
+			expected := spanWithEnd(expStart, expStart+size+1)
+			if !expected.Equal(cmd.span) {
+				t.Fatalf("%d: expected %s, but found %s", i, expected, cmd.span)
+			}
+
+			it.NextOverlap()
+		}
+		if it.Valid() {
+			t.Fatalf("%d: expected invalid iterator %v", i, it.Cmd())
+		}
+	}
+	it.FirstOverlap(newCmd(span(count + size + 1)))
+	if it.Valid() {
+		t.Fatalf("expected invalid iterator")
+	}
+}
+
+func TestBTreeSeekOverlapRandom(t *testing.T) {
+	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+
+	const trials = 10
+	for i := 0; i < trials; i++ {
+		var tr btree
+
+		const count = 1000
+		cmds := make([]*cmd, count)
+		cmdSpans := make([]int, count)
+		for j := 0; j < count; j++ {
+			var cmd *cmd
+			end := rng.Intn(count + 10)
+			if end <= j {
+				end = j
+				cmd = newCmd(spanWithEnd(j, end))
+			} else {
+				cmd = newCmd(spanWithEnd(j, end+1))
+			}
+			tr.Set(cmd)
+			cmds[j] = cmd
+			cmdSpans[j] = end
+		}
+
+		const scanTrials = 100
+		for j := 0; j < scanTrials; j++ {
+			var scanCmd *cmd
+			scanStart := rng.Intn(count)
+			scanEnd := rng.Intn(count + 10)
+			if scanEnd <= scanStart {
+				scanEnd = scanStart
+				scanCmd = newCmd(spanWithEnd(scanStart, scanEnd))
+			} else {
+				scanCmd = newCmd(spanWithEnd(scanStart, scanEnd+1))
+			}
+
+			var exp, found []*cmd
+			for startKey, endKey := range cmdSpans {
+				if startKey <= scanEnd && endKey >= scanStart {
+					exp = append(exp, cmds[startKey])
+				}
+			}
+
+			it := tr.MakeIter()
+			it.FirstOverlap(scanCmd)
+			for it.Valid() {
+				found = append(found, it.Cmd())
+				it.NextOverlap()
+			}
+
+			require.Equal(t, len(exp), len(found), "search for %v", scanCmd.span)
+		}
+	}
+}
+
+func TestBTreeCmp(t *testing.T) {
+	testCases := []struct {
+		spanA, spanB roachpb.Span
+		idA, idB     int64
+		exp          int
+	}{
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a")},
+			spanB: roachpb.Span{Key: roachpb.Key("a")},
+			idA:   1,
+			idB:   1,
+			exp:   0,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a")},
+			spanB: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")},
+			idA:   1,
+			idB:   1,
+			exp:   -1,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("c")},
+			spanB: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b")},
+			idA:   1,
+			idB:   1,
+			exp:   1,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("c")},
+			spanB: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("c")},
+			idA:   1,
+			idB:   1,
+			exp:   0,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a")},
+			spanB: roachpb.Span{Key: roachpb.Key("a")},
+			idA:   1,
+			idB:   2,
+			exp:   -1,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("a")},
+			spanB: roachpb.Span{Key: roachpb.Key("a")},
+			idA:   2,
+			idB:   1,
+			exp:   1,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("b")},
+			spanB: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("c")},
+			idA:   1,
+			idB:   1,
+			exp:   1,
+		},
+		{
+			spanA: roachpb.Span{Key: roachpb.Key("b"), EndKey: roachpb.Key("e")},
+			spanB: roachpb.Span{Key: roachpb.Key("c"), EndKey: roachpb.Key("d")},
+			idA:   1,
+			idB:   1,
+			exp:   -1,
+		},
+	}
+	for _, tc := range testCases {
+		name := fmt.Sprintf("cmp(%s:%d,%s:%d)", tc.spanA, tc.idA, tc.spanB, tc.idB)
+		t.Run(name, func(t *testing.T) {
+			cmdA := &cmd{id: tc.idA, span: tc.spanA}
+			cmdB := &cmd{id: tc.idB, span: tc.spanB}
+			require.Equal(t, tc.exp, cmp(cmdA, cmdB))
+		})
+	}
+}
+
+func TestIterStack(t *testing.T) {
+	f := func(i int) iterFrame { return iterFrame{pos: int16(i)} }
+	var is iterStack
+	for i := 1; i <= 2*len(iterStackArr{}); i++ {
+		var j int
+		for j = 0; j < i; j++ {
+			is.push(f(j))
+		}
+		require.Equal(t, j, is.len())
+		for j--; j >= 0; j-- {
+			require.Equal(t, f(j), is.pop())
+		}
+		is.reset()
+	}
+}
+
+//////////////////////////////////////////
+//              Benchmarks              //
+//////////////////////////////////////////
+
+// perm returns a random permutation of cmds with spans in the range [0, n).
+func perm(n int) (out []*cmd) {
+	for _, i := range rand.Perm(n) {
+		out = append(out, newCmd(spanWithEnd(i, i+1)))
+	}
+	return out
+}
+
+func forBenchmarkSizes(b *testing.B, f func(b *testing.B, count int)) {
+	for _, count := range []int{16, 128, 1024, 8192, 65536} {
+		b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) {
+			f(b, count)
+		})
+	}
+}
+
+func BenchmarkBTreeInsert(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(count)
+		b.ResetTimer()
+		for i := 0; i < b.N; {
+			var tr btree
+			for _, cmd := range insertP {
+				tr.Set(cmd)
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDelete(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP, removeP := perm(count), perm(count)
+		b.ResetTimer()
+		for i := 0; i < b.N; {
+			b.StopTimer()
+			var tr btree
+			for _, cmd := range insertP {
+				tr.Set(cmd)
+			}
+			b.StartTimer()
+			for _, cmd := range removeP {
+				tr.Delete(cmd)
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+			if tr.Len() > 0 {
+				b.Fatalf("tree not empty: %s", &tr)
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDeleteInsert(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(count)
+		var tr btree
+		for _, cmd := range insertP {
+			tr.Set(cmd)
+		}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			cmd := insertP[i%count]
+			tr.Delete(cmd)
+			tr.Set(cmd)
+		}
+	})
+}
+
+func BenchmarkBTreeMakeIter(b *testing.B) {
+	var tr btree
+	for i := 0; i < b.N; i++ {
+		it := tr.MakeIter()
+		it.First()
+	}
+}
+
+func BenchmarkBTreeIterSeekGE(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		var spans []roachpb.Span
+		var tr btree
+
+		for i := 0; i < count; i++ {
+			s := span(i)
+			spans = append(spans, s)
+			tr.Set(newCmd(s))
+		}
+
+		rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+		it := tr.MakeIter()
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			s := spans[rng.Intn(len(spans))]
+			it.SeekGE(newCmd(s))
+			if testing.Verbose() {
+				if !it.Valid() {
+					b.Fatal("expected to find key")
+				}
+				if !s.Equal(it.Cmd().span) {
+					b.Fatalf("expected %s, but found %s", s, it.Cmd().span)
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeIterSeekLT(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		var spans []roachpb.Span
+		var tr btree
+
+		for i := 0; i < count; i++ {
+			s := span(i)
+			spans = append(spans, s)
+			tr.Set(newCmd(s))
+		}
+
+		rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+		it := tr.MakeIter()
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			j := rng.Intn(len(spans))
+			s := spans[j]
+			it.SeekLT(newCmd(s))
+			if testing.Verbose() {
+				if j == 0 {
+					if it.Valid() {
+						b.Fatal("unexpected key")
+					}
+				} else {
+					if !it.Valid() {
+						b.Fatal("expected to find key")
+					}
+					s := spans[j-1]
+					if !s.Equal(it.Cmd().span) {
+						b.Fatalf("expected %s, but found %s", s, it.Cmd().span)
+					}
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeIterFirstOverlap(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		var spans []roachpb.Span
+		var cmds []*cmd
+		var tr btree
+
+		for i := 0; i < count; i++ {
+			s := spanWithEnd(i, i+1)
+			spans = append(spans, s)
+			cmd := newCmd(s)
+			cmds = append(cmds, cmd)
+			tr.Set(cmd)
+		}
+
+		rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+		it := tr.MakeIter()
+
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			j := rng.Intn(len(spans))
+			s := spans[j]
+			cmd := cmds[j]
+			it.FirstOverlap(cmd)
+			if testing.Verbose() {
+				if !it.Valid() {
+					b.Fatal("expected to find key")
+				}
+				if !s.Equal(it.Cmd().span) {
+					b.Fatalf("expected %s, but found %s", s, it.Cmd().span)
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeIterNext(b *testing.B) {
+	var tr btree
+
+	const count = 8 << 10
+	const size = 2 * maxCmds
+	for i := 0; i < count; i++ {
+		cmd := newCmd(spanWithEnd(i, i+size+1))
+		tr.Set(cmd)
+	}
+
+	it := tr.MakeIter()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if !it.Valid() {
+			it.First()
+		}
+		it.Next()
+	}
+}
+
+func BenchmarkBTreeIterPrev(b *testing.B) {
+	var tr btree
+
+	const count = 8 << 10
+	const size = 2 * maxCmds
+	for i := 0; i < count; i++ {
+		cmd := newCmd(spanWithEnd(i, i+size+1))
+		tr.Set(cmd)
+	}
+
+	it := tr.MakeIter()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if !it.Valid() {
+			it.First()
+		}
+		it.Prev()
+	}
+}
+
+func BenchmarkBTreeIterNextOverlap(b *testing.B) {
+	var tr btree
+
+	const count = 8 << 10
+	const size = 2 * maxCmds
+	for i := 0; i < count; i++ {
+		cmd := newCmd(spanWithEnd(i, i+size+1))
+		tr.Set(cmd)
+	}
+
+	allCmd := newCmd(spanWithEnd(0, count+1))
+	it := tr.MakeIter()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		if !it.Valid() {
+			it.FirstOverlap(allCmd)
+		}
+		it.NextOverlap()
+	}
+}
+
+func BenchmarkBTreeIterOverlapScan(b *testing.B) {
+	var tr btree
+	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+
+	const count = 8 << 10
+	const size = 2 * maxCmds
+	for i := 0; i < count; i++ {
+		tr.Set(newCmd(spanWithEnd(i, i+size+1)))
+	}
+
+	cmd := new(cmd)
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		cmd.span = randomSpan(rng, count)
+		it := tr.MakeIter()
+		it.FirstOverlap(cmd)
+		for it.Valid() {
+			it.NextOverlap()
+		}
+	}
+}
diff --git a/pkg/util/interval/btree_based_interval.go b/pkg/util/interval/btree_based_interval.go
index b47eaaee39c0..8b464c0c79ad 100644
--- a/pkg/util/interval/btree_based_interval.go
+++ b/pkg/util/interval/btree_based_interval.go
@@ -12,7 +12,7 @@
 // implied. See the License for the specific language governing
 // permissions and limitations under the License.
 //
-// This code is based on: https://github.com/google/btree
+// This code is based on: https://github.com/google/btree.
 
 package interval
 
@@ -21,15 +21,62 @@ import (
 	"sort"
 
 	"github.com/cockroachdb/cockroach/pkg/util/log"
+	"github.com/cockroachdb/cockroach/pkg/util/syncutil"
 )
 
 const (
 	// DefaultBTreeMinimumDegree is the default B-tree minimum degree. Benchmarks
 	// show that the interval tree performs best with this minimum degree.
 	DefaultBTreeMinimumDegree = 32
+	// DefaultBTreeFreeListSize is the default size of a B-tree's freelist.
+	DefaultBTreeFreeListSize = 32
 )
 
-var _ = newBTree
+var (
+	nilItems    = make(items, 16)
+	nilChildren = make(children, 16)
+)
+
+// FreeList represents a free list of btree nodes. By default each
+// BTree has its own FreeList, but multiple BTrees can share the same
+// FreeList.
+// Two Btrees using the same freelist are safe for concurrent write access.
+type FreeList struct {
+	mu       syncutil.Mutex
+	freelist []*node
+}
+
+// NewFreeList creates a new free list.
+// size is the maximum size of the returned free list.
+func NewFreeList(size int) *FreeList {
+	return &FreeList{freelist: make([]*node, 0, size)}
+}
+
+func (f *FreeList) newNode() (n *node) {
+	f.mu.Lock()
+	index := len(f.freelist) - 1
+	if index < 0 {
+		f.mu.Unlock()
+		return new(node)
+	}
+	n = f.freelist[index]
+	f.freelist[index] = nil
+	f.freelist = f.freelist[:index]
+	f.mu.Unlock()
+	return
+}
+
+// freeNode adds the given node to the list, returning true if it was added
+// and false if it was discarded.
+func (f *FreeList) freeNode(n *node) (out bool) {
+	f.mu.Lock()
+	if len(f.freelist) < cap(f.freelist) {
+		f.freelist = append(f.freelist, n)
+		out = true
+	}
+	f.mu.Unlock()
+	return
+}
 
 // newBTree creates a new interval tree with the given overlapper function and
 // the default B-tree minimum degree.
@@ -47,9 +94,11 @@ func newBTreeWithDegree(overlapper Overlapper, minimumDegree int) *btree {
 	if minimumDegree < 2 {
 		panic("bad minimum degree")
 	}
+	f := NewFreeList(DefaultBTreeFreeListSize)
 	return &btree{
-		MinimumDegree: minimumDegree,
-		Overlapper:    overlapper,
+		minimumDegree: minimumDegree,
+		overlapper:    overlapper,
+		cow:           &copyOnWriteContext{freelist: f},
 	}
 }
 
@@ -79,8 +128,8 @@ func (s *items) insertAt(index int, e Interface) {
 // back.
 func (s *items) removeAt(index int) Interface {
 	e := (*s)[index]
-	(*s)[index] = nil
 	copy((*s)[index:], (*s)[index+1:])
+	(*s)[len(*s)-1] = nil
 	*s = (*s)[:len(*s)-1]
 	return e
 }
@@ -94,6 +143,16 @@ func (s *items) pop() (out Interface) {
 	return
 }
 
+// truncate truncates this instance at index so that it contains only the
+// first index items. index must be less than or equal to length.
+func (s *items) truncate(index int) {
+	var toClear items
+	*s, toClear = (*s)[:index], (*s)[index:]
+	for len(toClear) > 0 {
+		toClear = toClear[copy(toClear, nilItems):]
+	}
+}
+
 // find returns the index where the given Interface should be inserted into this
 // list. 'found' is true if the interface already exists in the list at the
 // given index.
@@ -125,8 +184,8 @@ func (s *children) insertAt(index int, n *node) {
 // back.
 func (s *children) removeAt(index int) *node {
 	n := (*s)[index]
-	(*s)[index] = nil
 	copy((*s)[index:], (*s)[index+1:])
+	(*s)[len(*s)-1] = nil
 	*s = (*s)[:len(*s)-1]
 	return n
 }
@@ -140,6 +199,16 @@ func (s *children) pop() (out *node) {
 	return
 }
 
+// truncate truncates this instance at index so that it contains only the
+// first index children. index must be less than or equal to length.
+func (s *children) truncate(index int) {
+	var toClear children
+	*s, toClear = (*s)[:index], (*s)[index:]
+	for len(toClear) > 0 {
+		toClear = toClear[copy(toClear, nilChildren):]
+	}
+}
+
 // node is an internal node in a tree.
 //
 // It must at all times maintain the invariant that either
@@ -155,7 +224,35 @@ type node struct {
 	Range    Range
 	items    items
 	children children
-	t        *btree
+	cow      *copyOnWriteContext
+}
+
+func (n *node) mutableFor(cow *copyOnWriteContext) *node {
+	if n.cow == cow {
+		return n
+	}
+	out := cow.newNode()
+	out.Range = n.Range
+	if cap(out.items) >= len(n.items) {
+		out.items = out.items[:len(n.items)]
+	} else {
+		out.items = make(items, len(n.items), cap(n.items))
+	}
+	copy(out.items, n.items)
+	// Copy children
+	if cap(out.children) >= len(n.children) {
+		out.children = out.children[:len(n.children)]
+	} else {
+		out.children = make(children, len(n.children), cap(n.children))
+	}
+	copy(out.children, n.children)
+	return out
+}
+
+func (n *node) mutableChild(i int) *node {
+	c := n.children[i].mutableFor(n.cow)
+	n.children[i] = c
+	return c
 }
 
 // split splits the given node at the given index. The current node shrinks, and
@@ -179,21 +276,19 @@ type node struct {
 //
 func (n *node) split(i int, fast bool) (Interface, *node) {
 	e := n.items[i]
-	second := n.t.newNode()
-	second.items = make(items, n.t.minItems())
-	copy(second.items, n.items[i+1:])
-	n.items = n.items[:i]
+	second := n.cow.newNode()
+	second.items = append(second.items, n.items[i+1:]...)
+	n.items.truncate(i)
 	if len(n.children) > 0 {
-		second.children = make(children, n.t.minItems()+1)
-		copy(second.children, n.children[i+1:])
-		n.children = n.children[:i+1]
+		second.children = append(second.children, n.children[i+1:]...)
+		n.children.truncate(i + 1)
 	}
 	if !fast {
 		// adjust range for the first split part
 		oldRangeEnd := n.Range.End
 		n.Range.End = n.rangeEnd()
 
-		// adjust ragne for the second split part
+		// adjust range for the second split part
 		second.Range.Start = second.rangeStart()
 		if n.Range.End.Equal(oldRangeEnd) || e.Range().End.Equal(oldRangeEnd) {
 			second.Range.End = second.rangeEnd()
@@ -206,12 +301,11 @@ func (n *node) split(i int, fast bool) (Interface, *node) {
 
 // maybeSplitChild checks if a child should be split, and if so splits it.
 // Returns whether or not a split occurred.
-func (n *node) maybeSplitChild(i int, fast bool) bool {
-	maxItems := n.t.maxItems()
+func (n *node) maybeSplitChild(i, maxItems int, fast bool) bool {
 	if len(n.children[i].items) < maxItems {
 		return false
 	}
-	first := n.children[i]
+	first := n.mutableChild(i)
 	e, second := first.split(maxItems/2, fast)
 	n.items.insertAt(i, e)
 	n.children.insertAt(i+1, second)
@@ -220,7 +314,7 @@ func (n *node) maybeSplitChild(i int, fast bool) bool {
 
 // insert inserts an Interface into the subtree rooted at this node, making sure
 // no nodes in the subtree exceed maxItems Interfaces.
-func (n *node) insert(e Interface, fast bool) (out Interface, extended bool) {
+func (n *node) insert(e Interface, maxItems int, fast bool) (out Interface, extended bool) {
 	i, found := n.items.find(e)
 	if found {
 		out = n.items[i]
@@ -242,7 +336,7 @@ func (n *node) insert(e Interface, fast bool) (out Interface, extended bool) {
 		}
 		return
 	}
-	if n.maybeSplitChild(i, fast) {
+	if n.maybeSplitChild(i, maxItems, fast) {
 		inTree := n.items[i]
 		switch Compare(e, inTree) {
 		case -1:
@@ -255,7 +349,7 @@ func (n *node) insert(e Interface, fast bool) (out Interface, extended bool) {
 			return
 		}
 	}
-	out, extended = n.children[i].insert(e, fast)
+	out, extended = n.mutableChild(i).insert(e, maxItems, fast)
 	if !fast && extended {
 		extended = false
 		if i == 0 && n.children[0].Range.Start.Compare(n.Range.Start) < 0 {
@@ -275,7 +369,7 @@ func (t *btree) isEmpty() bool {
 }
 
 func (t *btree) Get(r Range) (o []Interface) {
-	return t.GetWithOverlapper(r, t.Overlapper)
+	return t.GetWithOverlapper(r, t.overlapper)
 }
 
 func (t *btree) GetWithOverlapper(r Range, overlapper Overlapper) (o []Interface) {
@@ -296,11 +390,11 @@ func (t *btree) DoMatching(fn Operation, r Range) bool {
 	if !t.overlappable(r) {
 		return false
 	}
-	return t.root.doMatch(fn, r, t.Overlapper)
+	return t.root.doMatch(fn, r, t.overlapper)
 }
 
 func (t *btree) overlappable(r Range) bool {
-	if t.isEmpty() || !t.Overlapper.Overlap(r, t.root.Range) {
+	if t.isEmpty() || !t.overlapper.Overlap(r, t.root.Range) {
 		return false
 	}
 	return true
@@ -464,11 +558,11 @@ func (n *node) remove(
 		panic("invalid remove type")
 	}
 	// If we get to here, we have children.
-	child := n.children[i]
-	if len(child.items) <= minItems {
+	if len(n.children[i].items) <= minItems {
 		out, shrunk = n.growChildAndRemove(i, e, minItems, typ, fast)
 		return
 	}
+	child := n.mutableChild(i)
 	// Either we had enough interfaces to begin with, or we've done some
 	// merging/stealing, because we've got enough now and we're ready to return
 	// stuff.
@@ -603,8 +697,8 @@ func (n *node) growChildAndRemove(
 //
 func (n *node) stealFromLeftChild(i int, fast bool) {
 	// steal
-	stealTo := n.children[i]
-	stealFrom := n.children[i-1]
+	stealTo := n.mutableChild(i)
+	stealFrom := n.mutableChild(i - 1)
 	x := stealFrom.items.pop()
 	y := n.items[i-1]
 	stealTo.items.insertAt(0, y)
@@ -660,8 +754,8 @@ func (n *node) stealFromLeftChild(i int, fast bool) {
 //
 func (n *node) stealFromRightChild(i int, fast bool) {
 	// steal
-	stealTo := n.children[i]
-	stealFrom := n.children[i+1]
+	stealTo := n.mutableChild(i)
+	stealFrom := n.mutableChild(i + 1)
 	x := stealFrom.items.removeAt(0)
 	y := n.items[i]
 	stealTo.items = append(stealTo.items, y)
@@ -711,21 +805,22 @@ func (n *node) stealFromRightChild(i int, fast bool) {
 //
 func (n *node) mergeWithRightChild(i int, fast bool) {
 	// merge
-	y := n.items.removeAt(i)
-	child := n.children[i]
+	child := n.mutableChild(i)
+	mergeItem := n.items.removeAt(i)
 	mergeChild := n.children.removeAt(i + 1)
-	child.items = append(child.items, y)
+	child.items = append(child.items, mergeItem)
 	child.items = append(child.items, mergeChild.items...)
 	child.children = append(child.children, mergeChild.children...)
 
 	if !fast {
-		if y.Range().End.Compare(child.Range.End) > 0 {
-			child.Range.End = y.Range().End
+		if mergeItem.Range().End.Compare(child.Range.End) > 0 {
+			child.Range.End = mergeItem.Range().End
 		}
 		if mergeChild.Range.End.Compare(child.Range.End) > 0 {
 			child.Range.End = mergeChild.Range.End
 		}
 	}
+	n.cow.freeNode(mergeChild)
 }
 
 var _ Tree = (*btree)(nil)
@@ -738,10 +833,58 @@ var _ Tree = (*btree)(nil)
 // Write operations are not safe for concurrent mutation by multiple
 // goroutines, but Read operations are.
 type btree struct {
-	root          *node
 	length        int
-	Overlapper    Overlapper
-	MinimumDegree int
+	minimumDegree int
+	overlapper    Overlapper
+	root          *node
+	cow           *copyOnWriteContext
+}
+
+// copyOnWriteContext pointers determine node ownership... a tree with a write
+// context equivalent to a node's write context is allowed to modify that node.
+// A tree whose write context does not match a node's is not allowed to modify
+// it, and must create a new, writable copy (IE: it's a Clone).
+//
+// When doing any write operation, we maintain the invariant that the current
+// node's context is equal to the context of the tree that requested the write.
+// We do this by, before we descend into any node, creating a copy with the
+// correct context if the contexts don't match.
+//
+// Since the node we're currently visiting on any write has the requesting
+// tree's context, that node is modifiable in place.  Children of that node may
+// not share context, but before we descend into them, we'll make a mutable
+// copy.
+type copyOnWriteContext struct {
+	freelist *FreeList
+}
+
+// cloneInternal clones the btree, lazily.  Clone should not be called concurrently,
+// but the original tree (t) and the new tree (t2) can be used concurrently
+// once the Clone call completes.
+//
+// The internal tree structure of b is marked read-only and shared between t and
+// t2.  Writes to both t and t2 use copy-on-write logic, creating new nodes
+// whenever one of b's original nodes would have been modified.  Read operations
+// should have no performance degredation.  Write operations for both t and t2
+// will initially experience minor slow-downs caused by additional allocs and
+// copies due to the aforementioned copy-on-write logic, but should converge to
+// the original performance characteristics of the original tree.
+func (t *btree) cloneInternal() (t2 *btree) {
+	// Create two entirely new copy-on-write contexts.
+	// This operation effectively creates three trees:
+	//   the original, shared nodes (old b.cow)
+	//   the new b.cow nodes
+	//   the new out.cow nodes
+	cow1, cow2 := *t.cow, *t.cow
+	out := *t
+	t.cow = &cow1
+	out.cow = &cow2
+	return &out
+}
+
+// Clone clones the btree, lazily.
+func (t *btree) Clone() Tree {
+	return t.cloneInternal()
 }
 
 // adjustRange sets the Range to the maximum extent of the childrens' Range
@@ -791,32 +934,62 @@ func (t *btree) AdjustRanges() {
 	if t.isEmpty() {
 		return
 	}
-	t.root.adjustRanges()
+	t.root.adjustRanges(t.root.cow)
 }
 
-func (n *node) adjustRanges() {
-	for _, c := range n.children {
-		c.adjustRanges()
+func (n *node) adjustRanges(c *copyOnWriteContext) {
+	if n.cow != c {
+		// Could not have been modified.
+		return
+	}
+	for _, child := range n.children {
+		child.adjustRanges(c)
 	}
 	n.adjustRange()
 }
 
 // maxItems returns the max number of Interfaces to allow per node.
 func (t *btree) maxItems() int {
-	return t.MinimumDegree*2 - 1
+	return t.minimumDegree*2 - 1
 }
 
 // minItems returns the min number of Interfaces to allow per node (ignored
 // for the root node).
 func (t *btree) minItems() int {
-	return t.MinimumDegree - 1
+	return t.minimumDegree - 1
 }
 
-func (t *btree) newNode() (n *node) {
-	n = &node{t: t}
+func (c *copyOnWriteContext) newNode() (n *node) {
+	n = c.freelist.newNode()
+	n.cow = c
 	return
 }
 
+type freeType int
+
+const (
+	ftFreelistFull freeType = iota // node was freed (available for GC, not stored in freelist)
+	ftStored                       // node was stored in the freelist for later use
+	ftNotOwned                     // node was ignored by COW, since it's owned by another one
+)
+
+// freeNode frees a node within a given COW context, if it's owned by that
+// context.  It returns what happened to the node (see freeType const
+// documentation).
+func (c *copyOnWriteContext) freeNode(n *node) freeType {
+	if n.cow == c {
+		// clear to allow GC
+		n.items.truncate(0)
+		n.children.truncate(0)
+		n.cow = nil // clear to allow GC
+		if c.freelist.freeNode(n) {
+			return ftStored
+		}
+		return ftFreelistFull
+	}
+	return ftNotOwned
+}
+
 func (t *btree) Insert(e Interface, fast bool) (err error) {
 	// t.metrics("Insert")
 	if err = isValidInterface(e); err != nil {
@@ -824,7 +997,7 @@ func (t *btree) Insert(e Interface, fast bool) (err error) {
 	}
 
 	if t.root == nil {
-		t.root = t.newNode()
+		t.root = t.cow.newNode()
 		t.root.items = append(t.root.items, e)
 		t.length++
 		if !fast {
@@ -832,9 +1005,12 @@ func (t *btree) Insert(e Interface, fast bool) (err error) {
 			t.root.Range.End = e.Range().End
 		}
 		return nil
-	} else if len(t.root.items) >= t.maxItems() {
+	}
+
+	t.root = t.root.mutableFor(t.cow)
+	if len(t.root.items) >= t.maxItems() {
 		oldroot := t.root
-		t.root = t.newNode()
+		t.root = t.cow.newNode()
 		if !fast {
 			t.root.Range.Start = oldroot.Range.Start
 			t.root.Range.End = oldroot.Range.End
@@ -843,7 +1019,8 @@ func (t *btree) Insert(e Interface, fast bool) (err error) {
 		t.root.items = append(t.root.items, e2)
 		t.root.children = append(t.root.children, oldroot, second)
 	}
-	out, _ := t.root.insert(e, fast)
+
+	out, _ := t.root.insert(e, t.maxItems(), fast)
 	if out == nil {
 		t.length++
 	}
@@ -863,9 +1040,12 @@ func (t *btree) Delete(e Interface, fast bool) (err error) {
 }
 
 func (t *btree) delete(e Interface, typ toRemove, fast bool) Interface {
+	t.root = t.root.mutableFor(t.cow)
 	out, _ := t.root.remove(e, t.minItems(), typ, fast)
 	if len(t.root.items) == 0 && len(t.root.children) > 0 {
+		oldroot := t.root
 		t.root = t.root.children[0]
+		t.cow.freeNode(oldroot)
 	}
 	if out != nil {
 		t.length--
@@ -918,7 +1098,47 @@ func (t *btree) Iterator() TreeIterator {
 	return &ti
 }
 
+// ClearWithOpt removes all items from the btree.  If addNodesToFreelist is
+// true, t's nodes are added to its freelist as part of this call, until the
+// freelist is full.  Otherwise, the root node is simply dereferenced and the
+// subtree left to Go's normal GC processes.
+//
+// This can be much faster than calling Delete on all elements, because that
+// requires finding/removing each element in the tree and updating the tree
+// accordingly.  It also is somewhat faster than creating a new tree to replace
+// the old one, because nodes from the old tree are reclaimed into the freelist
+// for use by the new one, instead of being lost to the garbage collector.
+//
+// This call takes:
+//   O(1): when addNodesToFreelist is false, this is a single operation.
+//   O(1): when the freelist is already full, it breaks out immediately
+//   O(freelist size):  when the freelist is empty and the nodes are all owned
+//       by this tree, nodes are added to the freelist until full.
+//   O(tree size):  when all nodes are owned by another tree, all nodes are
+//       iterated over looking for nodes to add to the freelist, and due to
+//       ownership, none are.
+func (t *btree) ClearWithOpt(addNodesToFreelist bool) {
+	if t.root != nil && addNodesToFreelist {
+		t.root.reset(t.cow)
+	}
+	t.root, t.length = nil, 0
+}
+
 func (t *btree) Clear() {
-	t.root = nil
-	t.length = 0
+	t.ClearWithOpt(true)
+}
+
+// reset returns a subtree to the freelist.  It breaks out immediately if the
+// freelist is full, since the only benefit of iterating is to fill that
+// freelist up.  Returns true if parent reset call should continue.
+func (n *node) reset(c *copyOnWriteContext) bool {
+	if n.cow != c {
+		return false
+	}
+	for _, child := range n.children {
+		if !child.reset(c) {
+			return false
+		}
+	}
+	return c.freeNode(n) != ftFreelistFull
 }
diff --git a/pkg/util/interval/btree_based_interval_test.go b/pkg/util/interval/btree_based_interval_test.go
index 387f85ff58d6..ab518f2a543c 100644
--- a/pkg/util/interval/btree_based_interval_test.go
+++ b/pkg/util/interval/btree_based_interval_test.go
@@ -25,6 +25,7 @@ import (
 	"testing"
 
 	"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+	"golang.org/x/sync/errgroup"
 )
 
 var btreeMinDegree = flag.Int("btree_min_degree", DefaultBTreeMinimumDegree, "B-Tree minimum degree")
@@ -54,6 +55,15 @@ func rang(m, n uint32) (out items) {
 	return
 }
 
+// all extracts all items from a tree in order as a slice.
+func all(t *btree) (out items) {
+	t.Do(func(a Interface) bool {
+		out = append(out, a)
+		return false
+	})
+	return
+}
+
 func makeMultiByteInterval(start, end, id uint32) *Interval {
 	return &Interval{Range{toBytes(start), toBytes(end)}, uintptr(id)}
 }
@@ -479,7 +489,7 @@ func TestBTree(t *testing.T) {
 		}
 
 		if len := tree.Len(); len > 0 {
-			t.Fatalf("expected 0 item, got %d itemes", len)
+			t.Fatalf("expected 0 item, got %d items", len)
 		}
 	}
 }
@@ -597,6 +607,89 @@ func TestLargeTree(t *testing.T) {
 	checkFastDelete(t, tree, ivs, 10)
 }
 
+const cloneTestSize = 10000
+
+func cloneTest(
+	t *testing.T, b *btree, start int, p items, g *errgroup.Group, treeC chan *btree,
+) error {
+	t.Logf("Starting new clone at %v", start)
+	treeC <- b
+	for i := start; i < cloneTestSize; i++ {
+		if err := b.Insert(p[i], false); err != nil {
+			return err
+		}
+		if i%(cloneTestSize/5) == 0 {
+			i := i
+			c := b.cloneInternal()
+			g.Go(func() error {
+				return cloneTest(t, c, i+1, p, g, treeC)
+			})
+		}
+	}
+	return nil
+}
+
+func TestCloneConcurrentOperations(t *testing.T) {
+	var trees []*btree
+	treeC, treeDone := make(chan *btree), make(chan struct{})
+	go func() {
+		for b := range treeC {
+			trees = append(trees, b)
+		}
+		close(treeDone)
+	}()
+
+	var g errgroup.Group
+	b := newBTree(InclusiveOverlapper)
+	p := perm(cloneTestSize)
+	g.Go(func() error {
+		return cloneTest(t, b, 0, p, &g, treeC)
+	})
+	if err := g.Wait(); err != nil {
+		t.Fatal(err)
+	}
+	close(treeC)
+	<-treeDone
+
+	want := rang(0, cloneTestSize-1)
+	t.Logf("Starting equality checks on %d trees", len(trees))
+	for i, tree := range trees {
+		if !reflect.DeepEqual(want, all(tree)) {
+			t.Errorf("tree %v mismatch", i)
+		}
+	}
+
+	t.Log("Removing half from first half")
+	toRemove := want[cloneTestSize/2:]
+	for i := 0; i < len(trees)/2; i++ {
+		tree := trees[i]
+		g.Go(func() error {
+			for _, item := range toRemove {
+				if err := tree.Delete(item, false); err != nil {
+					return err
+				}
+			}
+			return nil
+		})
+	}
+	if err := g.Wait(); err != nil {
+		t.Fatal(err)
+	}
+
+	t.Log("Checking all values again")
+	for i, tree := range trees {
+		var wantpart items
+		if i < len(trees)/2 {
+			wantpart = want[:cloneTestSize/2]
+		} else {
+			wantpart = want
+		}
+		if got := all(tree); !reflect.DeepEqual(wantpart, got) {
+			t.Errorf("tree %v mismatch, want %v got %v", i, len(want), len(got))
+		}
+	}
+}
+
 func TestIterator(t *testing.T) {
 	var ivs items
 	const treeSize = 400
@@ -613,3 +706,218 @@ func TestIterator(t *testing.T) {
 	tree.AdjustRanges()
 	checkIterator(t, tree, ivs)
 }
+
+func forBenchmarkSizes(b *testing.B, f func(b *testing.B, count int)) {
+	for _, count := range []int{16, 128, 1024, 8192, 65536} {
+		b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) {
+			f(b, count)
+		})
+	}
+}
+
+func BenchmarkBTreeInsert(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		b.ResetTimer()
+		i := 0
+		for i < b.N {
+			tr := newBTree(InclusiveOverlapper)
+			for _, item := range insertP {
+				if err := tr.Insert(item, false); err != nil {
+					b.Fatal(err)
+				}
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDelete(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP, removeP := perm(uint32(count)), perm(uint32(count))
+		b.ResetTimer()
+		i := 0
+		for i < b.N {
+			b.StopTimer()
+			tr := newBTree(InclusiveOverlapper)
+			for _, item := range insertP {
+				if err := tr.Insert(item, false); err != nil {
+					b.Fatal(err)
+				}
+			}
+			b.StartTimer()
+			for _, item := range removeP {
+				if err := tr.Delete(item, false); err != nil {
+					b.Fatal(err)
+				}
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+			if tr.Len() > 0 {
+				panic(tr.Len())
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDeleteInsert(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		tr := newBTree(InclusiveOverlapper)
+		for _, item := range insertP {
+			if err := tr.Insert(item, false); err != nil {
+				b.Fatal(err)
+			}
+		}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if err := tr.Delete(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+			if err := tr.Insert(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDeleteInsertCloneOnce(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		tr := newBTree(InclusiveOverlapper)
+		for _, item := range insertP {
+			if err := tr.Insert(item, false); err != nil {
+				b.Fatal(err)
+			}
+		}
+		tr = tr.cloneInternal()
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if err := tr.Delete(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+			if err := tr.Insert(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeDeleteInsertCloneEachTime(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		tr := newBTree(InclusiveOverlapper)
+		for _, item := range insertP {
+			if err := tr.Insert(item, false); err != nil {
+				b.Fatal(err)
+			}
+		}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			tr = tr.cloneInternal()
+			if err := tr.Delete(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+			if err := tr.Insert(insertP[i%count], false); err != nil {
+				b.Fatal(err)
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeGet(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		removeP := perm(uint32(count))
+		b.ResetTimer()
+		i := 0
+		for i < b.N {
+			b.StopTimer()
+			tr := newBTree(InclusiveOverlapper)
+			for _, item := range insertP {
+				if err := tr.Insert(item, false); err != nil {
+					b.Fatal(err)
+				}
+			}
+			b.StartTimer()
+			for _, item := range removeP {
+				tr.Get(item.Range())
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+		}
+	})
+}
+
+func BenchmarkBTreeGetCloneEachTime(b *testing.B) {
+	forBenchmarkSizes(b, func(b *testing.B, count int) {
+		insertP := perm(uint32(count))
+		removeP := perm(uint32(count))
+		b.ResetTimer()
+		i := 0
+		for i < b.N {
+			b.StopTimer()
+			tr := newBTree(InclusiveOverlapper)
+			for _, v := range insertP {
+				if err := tr.Insert(v, false); err != nil {
+					b.Fatal(err)
+				}
+			}
+			b.StartTimer()
+			for _, item := range removeP {
+				tr = tr.cloneInternal()
+				tr.Get(item.Range())
+				i++
+				if i >= b.N {
+					return
+				}
+			}
+		}
+	})
+}
+
+func key(i int) Comparable {
+	return []byte(fmt.Sprintf("%04d", i))
+}
+
+func rangeWithEnd(start, end int) Range {
+	return Range{Start: key(start), End: key(end)}
+}
+
+func randomRange(rng *rand.Rand, n int) Range {
+	start := rng.Intn(n)
+	end := rng.Intn(n + 1)
+	if end < start {
+		start, end = end, start
+	}
+	return rangeWithEnd(start, end)
+}
+
+func BenchmarkBTreeOverlapScan(b *testing.B) {
+	tr := newBTree(InclusiveOverlapper)
+	rng := rand.New(rand.NewSource(timeutil.Now().UnixNano()))
+
+	const count = 8 << 10
+	const size = 2 * 31
+	for i := 0; i < count; i++ {
+		iv := &Interval{rangeWithEnd(i, i+size+1), uintptr(i)}
+		if err := tr.Insert(iv, false); err != nil {
+			b.Fatal(err)
+		}
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		cmd := randomRange(rng, count)
+		tr.DoMatching(func(e Interface) bool {
+			return false
+		}, cmd)
+	}
+}
diff --git a/pkg/util/interval/interval.go b/pkg/util/interval/interval.go
index ceef00be3730..67a6ca06c0c7 100644
--- a/pkg/util/interval/interval.go
+++ b/pkg/util/interval/interval.go
@@ -154,7 +154,7 @@ func Compare(a, b Interface) int {
 // former has measurably better performance than the latter. So Equal should be used when only
 // equality state is needed.
 func Equal(a, b Interface) bool {
-	return a.Range().Start.Equal(b.Range().Start) && a.ID() == b.ID()
+	return a.ID() == b.ID() && a.Range().Start.Equal(b.Range().Start)
 }
 
 // A Comparable is a type that describes the ends of a Range.
@@ -224,6 +224,8 @@ type Tree interface {
 	Iterator() TreeIterator
 	// Clear this tree.
 	Clear()
+	// Clone clones the tree, returning a copy.
+	Clone() Tree
 }
 
 // TreeIterator iterates over all intervals stored in the interval tree, in-order.
@@ -234,7 +236,7 @@ type TreeIterator interface {
 	Next() (Interface, bool)
 }
 
-var useBTreeImpl = envutil.EnvOrDefaultBool("COCKROACH_INTERVAL_BTREE", false)
+var useBTreeImpl = envutil.EnvOrDefaultBool("COCKROACH_INTERVAL_BTREE", true)
 
 // NewTree creates a new interval tree with the given overlapper function. It
 // uses the augmented Left-Leaning Red Black tree implementation.
diff --git a/pkg/util/interval/llrb_based_interval.go b/pkg/util/interval/llrb_based_interval.go
index cfea42c883ba..e2a176959e70 100644
--- a/pkg/util/interval/llrb_based_interval.go
+++ b/pkg/util/interval/llrb_based_interval.go
@@ -676,3 +676,7 @@ func (t *llrbTree) Clear() {
 	t.Root = nil
 	t.Count = 0
 }
+
+func (t *llrbTree) Clone() Tree {
+	panic("unimplemented")
+}